1145
1165
# The old API returned a list, should this actually be a set?
1146
1166
return parent_map.keys()
1168
def _check_inventories(self, checker):
1169
"""Check the inventories found from the revision scan.
1171
This is responsible for verifying the sha1 of inventories and
1172
creating a pending_keys set that covers data referenced by inventories.
1174
bar = ui.ui_factory.nested_progress_bar()
1176
self._do_check_inventories(checker, bar)
1180
def _do_check_inventories(self, checker, bar):
1181
"""Helper for _check_inventories."""
1183
keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}
1184
kinds = ['chk_bytes', 'texts']
1185
count = len(checker.pending_keys)
1186
bar.update("inventories", 0, 2)
1187
current_keys = checker.pending_keys
1188
checker.pending_keys = {}
1189
# Accumulate current checks.
1190
for key in current_keys:
1191
if key[0] != 'inventories' and key[0] not in kinds:
1192
checker._report_items.append('unknown key type %r' % (key,))
1193
keys[key[0]].add(key[1:])
1194
if keys['inventories']:
1195
# NB: output order *should* be roughly sorted - topo or
1196
# inverse topo depending on repository - either way decent
1197
# to just delta against. However, pre-CHK formats didn't
1198
# try to optimise inventory layout on disk. As such the
1199
# pre-CHK code path does not use inventory deltas.
1201
for record in self.inventories.check(keys=keys['inventories']):
1202
if record.storage_kind == 'absent':
1203
checker._report_items.append(
1204
'Missing inventory {%s}' % (record.key,))
1206
last_object = self._check_record('inventories', record,
1207
checker, last_object,
1208
current_keys[('inventories',) + record.key])
1209
del keys['inventories']
1212
bar.update("texts", 1)
1213
while (checker.pending_keys or keys['chk_bytes']
1215
# Something to check.
1216
current_keys = checker.pending_keys
1217
checker.pending_keys = {}
1218
# Accumulate current checks.
1219
for key in current_keys:
1220
if key[0] not in kinds:
1221
checker._report_items.append('unknown key type %r' % (key,))
1222
keys[key[0]].add(key[1:])
1223
# Check the outermost kind only - inventories || chk_bytes || texts
1227
for record in getattr(self, kind).check(keys=keys[kind]):
1228
if record.storage_kind == 'absent':
1229
checker._report_items.append(
1230
'Missing %s {%s}' % (kind, record.key,))
1232
last_object = self._check_record(kind, record,
1233
checker, last_object, current_keys[(kind,) + record.key])
1237
def _check_record(self, kind, record, checker, last_object, item_data):
1238
"""Check a single text from this repository."""
1239
if kind == 'inventories':
1240
rev_id = record.key[0]
1241
inv = self.deserialise_inventory(rev_id,
1242
record.get_bytes_as('fulltext'))
1243
if last_object is not None:
1244
delta = inv._make_delta(last_object)
1245
for old_path, path, file_id, ie in delta:
1248
ie.check(checker, rev_id, inv)
1250
for path, ie in inv.iter_entries():
1251
ie.check(checker, rev_id, inv)
1252
if self._format.fast_deltas:
1254
elif kind == 'chk_bytes':
1255
# No code written to check chk_bytes for this repo format.
1256
checker._report_items.append(
1257
'unsupported key type chk_bytes for %s' % (record.key,))
1258
elif kind == 'texts':
1259
self._check_text(record, checker, item_data)
1261
checker._report_items.append(
1262
'unknown key type %s for %s' % (kind, record.key))
1264
def _check_text(self, record, checker, item_data):
1265
"""Check a single text."""
1266
# Check it is extractable.
1267
# TODO: check length.
1268
if record.storage_kind == 'chunked':
1269
chunks = record.get_bytes_as(record.storage_kind)
1270
sha1 = osutils.sha_strings(chunks)
1271
length = sum(map(len, chunks))
1273
content = record.get_bytes_as('fulltext')
1274
sha1 = osutils.sha_string(content)
1275
length = len(content)
1276
if item_data and sha1 != item_data[1]:
1277
checker._report_items.append(
1278
'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
1279
(record.key, sha1, item_data[1], item_data[2]))
1149
1282
def create(a_bzrdir):
1150
1283
"""Construct the current default format repository in a_bzrdir."""
1693
1856
@needs_read_lock
1694
1857
def get_revisions(self, revision_ids):
1695
"""Get many revisions at once."""
1858
"""Get many revisions at once.
1860
Repositories that need to check data on every revision read should
1861
subclass this method.
1696
1863
return self._get_revisions(revision_ids)
1698
1865
@needs_read_lock
1699
1866
def _get_revisions(self, revision_ids):
1700
1867
"""Core work logic to get many revisions without sanity checks."""
1701
for rev_id in revision_ids:
1702
if not rev_id or not isinstance(rev_id, basestring):
1703
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1869
for revid, rev in self._iter_revisions(revision_ids):
1871
raise errors.NoSuchRevision(self, revid)
1873
return [revs[revid] for revid in revision_ids]
1875
def _iter_revisions(self, revision_ids):
1876
"""Iterate over revision objects.
1878
:param revision_ids: An iterable of revisions to examine. None may be
1879
passed to request all revisions known to the repository. Note that
1880
not all repositories can find unreferenced revisions; for those
1881
repositories only referenced ones will be returned.
1882
:return: An iterator of (revid, revision) tuples. Absent revisions (
1883
those asked for but not available) are returned as (revid, None).
1885
if revision_ids is None:
1886
revision_ids = self.all_revision_ids()
1888
for rev_id in revision_ids:
1889
if not rev_id or not isinstance(rev_id, basestring):
1890
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1704
1891
keys = [(key,) for key in revision_ids]
1705
1892
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1707
1893
for record in stream:
1894
revid = record.key[0]
1708
1895
if record.storage_kind == 'absent':
1709
raise errors.NoSuchRevision(self, record.key[0])
1710
text = record.get_bytes_as('fulltext')
1711
rev = self._serializer.read_revision_from_string(text)
1712
revs[record.key[0]] = rev
1713
return [revs[revid] for revid in revision_ids]
1898
text = record.get_bytes_as('fulltext')
1899
rev = self._serializer.read_revision_from_string(text)
1715
1902
@needs_read_lock
1716
1903
def get_revision_xml(self, revision_id):
2188
2374
:param revision_ids: The expected revision ids of the inventories.
2375
:param ordering: optional ordering, e.g. 'topological'. If not
2376
specified, the order of revision_ids will be preserved (by
2377
buffering if necessary).
2189
2378
:return: An iterator of inventories.
2191
2380
if ((None in revision_ids)
2192
2381
or (_mod_revision.NULL_REVISION in revision_ids)):
2193
2382
raise ValueError('cannot get null revision inventory')
2194
return self._iter_inventories(revision_ids)
2383
return self._iter_inventories(revision_ids, ordering)
2196
def _iter_inventories(self, revision_ids):
2385
def _iter_inventories(self, revision_ids, ordering):
2197
2386
"""single-document based inventory iteration."""
2198
for text, revision_id in self._iter_inventory_xmls(revision_ids):
2387
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
2388
for text, revision_id in inv_xmls:
2199
2389
yield self.deserialise_inventory(revision_id, text)
2201
def _iter_inventory_xmls(self, revision_ids):
2391
def _iter_inventory_xmls(self, revision_ids, ordering):
2392
if ordering is None:
2393
order_as_requested = True
2394
ordering = 'unordered'
2396
order_as_requested = False
2202
2397
keys = [(revision_id,) for revision_id in revision_ids]
2203
stream = self.inventories.get_record_stream(keys, 'unordered', True)
2400
if order_as_requested:
2401
key_iter = iter(keys)
2402
next_key = key_iter.next()
2403
stream = self.inventories.get_record_stream(keys, ordering, True)
2204
2404
text_chunks = {}
2205
2405
for record in stream:
2206
2406
if record.storage_kind != 'absent':
2207
text_chunks[record.key] = record.get_bytes_as('chunked')
2407
chunks = record.get_bytes_as('chunked')
2408
if order_as_requested:
2409
text_chunks[record.key] = chunks
2411
yield ''.join(chunks), record.key[-1]
2209
2413
raise errors.NoSuchRevision(self, record.key)
2211
chunks = text_chunks.pop(key)
2212
yield ''.join(chunks), key[-1]
2414
if order_as_requested:
2415
# Yield as many results as we can while preserving order.
2416
while next_key in text_chunks:
2417
chunks = text_chunks.pop(next_key)
2418
yield ''.join(chunks), next_key[-1]
2420
next_key = key_iter.next()
2421
except StopIteration:
2422
# We still want to fully consume the get_record_stream,
2423
# just in case it is not actually finished at this point
2214
2427
def deserialise_inventory(self, revision_id, xml):
2215
2428
"""Transform the xml into an inventory object.
3492
3724
# This is redundant with format.check_conversion_target(), however that
3493
3725
# raises an exception, and we just want to say "False" as in we won't
3494
3726
# support converting between these formats.
3727
if 'IDS_never' in debug.debug_flags:
3495
3729
if source.supports_rich_root() and not target.supports_rich_root():
3497
3731
if (source._format.supports_tree_reference
3498
3732
and not target._format.supports_tree_reference):
3734
if target._fallback_repositories and target._format.supports_chks:
3735
# IDS doesn't know how to copy CHKs for the parent inventories it
3736
# adds to stacked repos.
3738
if 'IDS_always' in debug.debug_flags:
3740
# Only use this code path for local source and target. IDS does far
3741
# too much IO (both bandwidth and roundtrips) over a network.
3742
if not source.bzrdir.transport.base.startswith('file:///'):
3744
if not target.bzrdir.transport.base.startswith('file:///'):
3502
def _get_delta_for_revision(self, tree, parent_ids, basis_id, cache):
3748
def _get_trees(self, revision_ids, cache):
3750
for rev_id in revision_ids:
3752
possible_trees.append((rev_id, cache[rev_id]))
3754
# Not cached, but inventory might be present anyway.
3756
tree = self.source.revision_tree(rev_id)
3757
except errors.NoSuchRevision:
3758
# Nope, parent is ghost.
3761
cache[rev_id] = tree
3762
possible_trees.append((rev_id, tree))
3763
return possible_trees
3765
def _get_delta_for_revision(self, tree, parent_ids, possible_trees):
3503
3766
"""Get the best delta and base for this revision.
3505
3768
:return: (basis_id, delta)
3507
possible_trees = [(parent_id, cache[parent_id])
3508
for parent_id in parent_ids
3509
if parent_id in cache]
3510
if len(possible_trees) == 0:
3511
# There either aren't any parents, or the parents aren't in the
3512
# cache, so just use the last converted tree
3513
possible_trees.append((basis_id, cache[basis_id]))
3771
# Generate deltas against each tree, to find the shortest.
3772
texts_possibly_new_in_tree = set()
3515
3773
for basis_id, basis_tree in possible_trees:
3516
3774
delta = tree.inventory._make_delta(basis_tree.inventory)
3775
for old_path, new_path, file_id, new_entry in delta:
3776
if new_path is None:
3777
# This file_id isn't present in the new rev, so we don't
3781
# Rich roots are handled elsewhere...
3783
kind = new_entry.kind
3784
if kind != 'directory' and kind != 'file':
3785
# No text record associated with this inventory entry.
3787
# This is a directory or file that has changed somehow.
3788
texts_possibly_new_in_tree.add((file_id, new_entry.revision))
3517
3789
deltas.append((len(delta), basis_id, delta))
3519
3791
return deltas[0][1:]
3521
def _get_parent_keys(self, root_key, parent_map):
3522
"""Get the parent keys for a given root id."""
3523
root_id, rev_id = root_key
3524
# Include direct parents of the revision, but only if they used
3525
# the same root_id and are heads.
3527
for parent_id in parent_map[rev_id]:
3528
if parent_id == _mod_revision.NULL_REVISION:
3530
if parent_id not in self._revision_id_to_root_id:
3531
# We probably didn't read this revision, go spend the
3532
# extra effort to actually check
3534
tree = self.source.revision_tree(parent_id)
3535
except errors.NoSuchRevision:
3536
# Ghost, fill out _revision_id_to_root_id in case we
3537
# encounter this again.
3538
# But set parent_root_id to None since we don't really know
3539
parent_root_id = None
3541
parent_root_id = tree.get_root_id()
3542
self._revision_id_to_root_id[parent_id] = None
3544
parent_root_id = self._revision_id_to_root_id[parent_id]
3545
if root_id == parent_root_id:
3546
# With stacking we _might_ want to refer to a non-local
3547
# revision, but this code path only applies when we have the
3548
# full content available, so ghosts really are ghosts, not just
3549
# the edge of local data.
3550
parent_keys.append((parent_id,))
3552
# root_id may be in the parent anyway.
3554
tree = self.source.revision_tree(parent_id)
3555
except errors.NoSuchRevision:
3556
# ghost, can't refer to it.
3560
parent_keys.append((tree.inventory[root_id].revision,))
3561
except errors.NoSuchId:
3564
g = graph.Graph(self.source.revisions)
3565
heads = g.heads(parent_keys)
3567
for key in parent_keys:
3568
if key in heads and key not in selected_keys:
3569
selected_keys.append(key)
3570
return tuple([(root_id,)+ key for key in selected_keys])
3572
def _new_root_data_stream(self, root_keys_to_create, parent_map):
3573
for root_key in root_keys_to_create:
3574
parent_keys = self._get_parent_keys(root_key, parent_map)
3575
yield versionedfile.FulltextContentFactory(root_key,
3576
parent_keys, None, '')
3578
def _fetch_batch(self, revision_ids, basis_id, cache):
3793
def _fetch_parent_invs_for_stacking(self, parent_map, cache):
3794
"""Find all parent revisions that are absent, but for which the
3795
inventory is present, and copy those inventories.
3797
This is necessary to preserve correctness when the source is stacked
3798
without fallbacks configured. (Note that in cases like upgrade the
3799
source may be not have _fallback_repositories even though it is
3803
for parents in parent_map.values():
3804
parent_revs.update(parents)
3805
present_parents = self.source.get_parent_map(parent_revs)
3806
absent_parents = set(parent_revs).difference(present_parents)
3807
parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(
3808
(rev_id,) for rev_id in absent_parents)
3809
parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]
3810
for parent_tree in self.source.revision_trees(parent_inv_ids):
3811
current_revision_id = parent_tree.get_revision_id()
3812
parents_parents_keys = parent_invs_keys_for_stacking[
3813
(current_revision_id,)]
3814
parents_parents = [key[-1] for key in parents_parents_keys]
3815
basis_id = _mod_revision.NULL_REVISION
3816
basis_tree = self.source.revision_tree(basis_id)
3817
delta = parent_tree.inventory._make_delta(basis_tree.inventory)
3818
self.target.add_inventory_by_delta(
3819
basis_id, delta, current_revision_id, parents_parents)
3820
cache[current_revision_id] = parent_tree
3822
def _fetch_batch(self, revision_ids, basis_id, cache, a_graph=None):
3579
3823
"""Fetch across a few revisions.
3581
3825
:param revision_ids: The revisions to copy
3582
3826
:param basis_id: The revision_id of a tree that must be in cache, used
3583
3827
as a basis for delta when no other base is available
3584
3828
:param cache: A cache of RevisionTrees that we can use.
3829
:param a_graph: A Graph object to determine the heads() of the
3830
rich-root data stream.
3585
3831
:return: The revision_id of the last converted tree. The RevisionTree
3586
3832
for it will be in cache
3593
3839
pending_deltas = []
3594
3840
pending_revisions = []
3595
3841
parent_map = self.source.get_parent_map(revision_ids)
3842
self._fetch_parent_invs_for_stacking(parent_map, cache)
3596
3843
for tree in self.source.revision_trees(revision_ids):
3844
# Find a inventory delta for this revision.
3845
# Find text entries that need to be copied, too.
3597
3846
current_revision_id = tree.get_revision_id()
3598
3847
parent_ids = parent_map.get(current_revision_id, ())
3848
parent_trees = self._get_trees(parent_ids, cache)
3849
possible_trees = list(parent_trees)
3850
if len(possible_trees) == 0:
3851
# There either aren't any parents, or the parents are ghosts,
3852
# so just use the last converted tree.
3853
possible_trees.append((basis_id, cache[basis_id]))
3599
3854
basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
3856
revision = self.source.get_revision(current_revision_id)
3857
pending_deltas.append((basis_id, delta,
3858
current_revision_id, revision.parent_ids))
3601
3859
if self._converting_to_rich_root:
3602
3860
self._revision_id_to_root_id[current_revision_id] = \
3603
3861
tree.get_root_id()
3604
# Find text entries that need to be copied
3862
# Determine which texts are in present in this revision but not in
3863
# any of the available parents.
3864
texts_possibly_new_in_tree = set()
3605
3865
for old_path, new_path, file_id, entry in delta:
3606
if new_path is not None:
3609
if not self.target.supports_rich_root():
3610
# The target doesn't support rich root, so we don't
3613
if self._converting_to_rich_root:
3614
# This can't be copied normally, we have to insert
3616
root_keys_to_create.add((file_id, entry.revision))
3618
text_keys.add((file_id, entry.revision))
3619
revision = self.source.get_revision(current_revision_id)
3620
pending_deltas.append((basis_id, delta,
3621
current_revision_id, revision.parent_ids))
3866
if new_path is None:
3867
# This file_id isn't present in the new rev
3871
if not self.target.supports_rich_root():
3872
# The target doesn't support rich root, so we don't
3875
if self._converting_to_rich_root:
3876
# This can't be copied normally, we have to insert
3878
root_keys_to_create.add((file_id, entry.revision))
3881
texts_possibly_new_in_tree.add((file_id, entry.revision))
3882
for basis_id, basis_tree in possible_trees:
3883
basis_inv = basis_tree.inventory
3884
for file_key in list(texts_possibly_new_in_tree):
3885
file_id, file_revision = file_key
3887
entry = basis_inv[file_id]
3888
except errors.NoSuchId:
3890
if entry.revision == file_revision:
3891
texts_possibly_new_in_tree.remove(file_key)
3892
text_keys.update(texts_possibly_new_in_tree)
3622
3893
pending_revisions.append(revision)
3623
3894
cache[current_revision_id] = tree
3624
3895
basis_id = current_revision_id
4049
4357
self.target_repo.pack(hint=hint)
4050
4358
return [], set()
4052
def _extract_and_insert_inventories(self, substream, serializer):
4360
def _extract_and_insert_inventory_deltas(self, substream, serializer):
4361
target_rich_root = self.target_repo._format.rich_root_data
4362
target_tree_refs = self.target_repo._format.supports_tree_reference
4363
for record in substream:
4364
# Insert the delta directly
4365
inventory_delta_bytes = record.get_bytes_as('fulltext')
4366
deserialiser = inventory_delta.InventoryDeltaDeserializer()
4368
parse_result = deserialiser.parse_text_bytes(
4369
inventory_delta_bytes)
4370
except inventory_delta.IncompatibleInventoryDelta, err:
4371
trace.mutter("Incompatible delta: %s", err.msg)
4372
raise errors.IncompatibleRevision(self.target_repo._format)
4373
basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result
4374
revision_id = new_id
4375
parents = [key[0] for key in record.parents]
4376
self.target_repo.add_inventory_by_delta(
4377
basis_id, inv_delta, revision_id, parents)
4379
def _extract_and_insert_inventories(self, substream, serializer,
4053
4381
"""Generate a new inventory versionedfile in target, converting data.
4055
4383
The inventory is retrieved from the source, (deserializing it), and
4056
4384
stored in the target (reserializing it in a different format).
4386
target_rich_root = self.target_repo._format.rich_root_data
4387
target_tree_refs = self.target_repo._format.supports_tree_reference
4058
4388
for record in substream:
4389
# It's not a delta, so it must be a fulltext in the source
4390
# serializer's format.
4059
4391
bytes = record.get_bytes_as('fulltext')
4060
4392
revision_id = record.key[0]
4061
4393
inv = serializer.read_inventory_from_string(bytes, revision_id)
4062
4394
parents = [key[0] for key in record.parents]
4063
4395
self.target_repo.add_inventory(revision_id, inv, parents)
4396
# No need to keep holding this full inv in memory when the rest of
4397
# the substream is likely to be all deltas.
4065
4400
def _extract_and_insert_revisions(self, substream, serializer):
4066
4401
for record in substream:
4224
4564
return (not self.from_repository._format.rich_root_data and
4225
4565
self.to_format.rich_root_data)
4227
def _get_inventory_stream(self, revision_ids):
4567
def _get_inventory_stream(self, revision_ids, missing=False):
4228
4568
from_format = self.from_repository._format
4229
if (from_format.supports_chks and self.to_format.supports_chks
4230
and (from_format._serializer == self.to_format._serializer)):
4231
# Both sides support chks, and they use the same serializer, so it
4232
# is safe to transmit the chk pages and inventory pages across
4234
return self._get_chk_inventory_stream(revision_ids)
4235
elif (not from_format.supports_chks):
4236
# Source repository doesn't support chks. So we can transmit the
4237
# inventories 'as-is' and either they are just accepted on the
4238
# target, or the Sink will properly convert it.
4239
return self._get_simple_inventory_stream(revision_ids)
4569
if (from_format.supports_chks and self.to_format.supports_chks and
4570
from_format.network_name() == self.to_format.network_name()):
4571
raise AssertionError(
4572
"this case should be handled by GroupCHKStreamSource")
4573
elif 'forceinvdeltas' in debug.debug_flags:
4574
return self._get_convertable_inventory_stream(revision_ids,
4575
delta_versus_null=missing)
4576
elif from_format.network_name() == self.to_format.network_name():
4578
return self._get_simple_inventory_stream(revision_ids,
4580
elif (not from_format.supports_chks and not self.to_format.supports_chks
4581
and from_format._serializer == self.to_format._serializer):
4582
# Essentially the same format.
4583
return self._get_simple_inventory_stream(revision_ids,
4241
# XXX: Hack to make not-chk->chk fetch: copy the inventories as
4242
# inventories. Note that this should probably be done somehow
4243
# as part of bzrlib.repository.StreamSink. Except JAM couldn't
4244
# figure out how a non-chk repository could possibly handle
4245
# deserializing an inventory stream from a chk repo, as it
4246
# doesn't have a way to understand individual pages.
4247
return self._get_convertable_inventory_stream(revision_ids)
4586
# Any time we switch serializations, we want to use an
4587
# inventory-delta based approach.
4588
return self._get_convertable_inventory_stream(revision_ids,
4589
delta_versus_null=missing)
4249
def _get_simple_inventory_stream(self, revision_ids):
4591
def _get_simple_inventory_stream(self, revision_ids, missing=False):
4592
# NB: This currently reopens the inventory weave in source;
4593
# using a single stream interface instead would avoid this.
4250
4594
from_weave = self.from_repository.inventories
4596
delta_closure = True
4598
delta_closure = not self.delta_on_metadata()
4251
4599
yield ('inventories', from_weave.get_record_stream(
4252
4600
[(rev_id,) for rev_id in revision_ids],
4253
self.inventory_fetch_order(),
4254
not self.delta_on_metadata()))
4256
def _get_chk_inventory_stream(self, revision_ids):
4257
"""Fetch the inventory texts, along with the associated chk maps."""
4258
# We want an inventory outside of the search set, so that we can filter
4259
# out uninteresting chk pages. For now we use
4260
# _find_revision_outside_set, but if we had a Search with cut_revs, we
4261
# could use that instead.
4262
start_rev_id = self.from_repository._find_revision_outside_set(
4264
start_rev_key = (start_rev_id,)
4265
inv_keys_to_fetch = [(rev_id,) for rev_id in revision_ids]
4266
if start_rev_id != _mod_revision.NULL_REVISION:
4267
inv_keys_to_fetch.append((start_rev_id,))
4268
# Any repo that supports chk_bytes must also support out-of-order
4269
# insertion. At least, that is how we expect it to work
4270
# We use get_record_stream instead of iter_inventories because we want
4271
# to be able to insert the stream as well. We could instead fetch
4272
# allowing deltas, and then iter_inventories, but we don't know whether
4273
# source or target is more 'local' anway.
4274
inv_stream = self.from_repository.inventories.get_record_stream(
4275
inv_keys_to_fetch, 'unordered',
4276
True) # We need them as full-texts so we can find their references
4277
uninteresting_chk_roots = set()
4278
interesting_chk_roots = set()
4279
def filter_inv_stream(inv_stream):
4280
for idx, record in enumerate(inv_stream):
4281
### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
4282
bytes = record.get_bytes_as('fulltext')
4283
chk_inv = inventory.CHKInventory.deserialise(
4284
self.from_repository.chk_bytes, bytes, record.key)
4285
if record.key == start_rev_key:
4286
uninteresting_chk_roots.add(chk_inv.id_to_entry.key())
4287
p_id_map = chk_inv.parent_id_basename_to_file_id
4288
if p_id_map is not None:
4289
uninteresting_chk_roots.add(p_id_map.key())
4292
interesting_chk_roots.add(chk_inv.id_to_entry.key())
4293
p_id_map = chk_inv.parent_id_basename_to_file_id
4294
if p_id_map is not None:
4295
interesting_chk_roots.add(p_id_map.key())
4296
### pb.update('fetch inventory', 0, 2)
4297
yield ('inventories', filter_inv_stream(inv_stream))
4298
# Now that we have worked out all of the interesting root nodes, grab
4299
# all of the interesting pages and insert them
4300
### pb.update('fetch inventory', 1, 2)
4301
interesting = chk_map.iter_interesting_nodes(
4302
self.from_repository.chk_bytes, interesting_chk_roots,
4303
uninteresting_chk_roots)
4304
def to_stream_adapter():
4305
"""Adapt the iter_interesting_nodes result to a single stream.
4307
iter_interesting_nodes returns records as it processes them, along
4308
with keys. However, we only want to return the records themselves.
4310
for record, items in interesting:
4311
if record is not None:
4313
# XXX: We could instead call get_record_stream(records.keys())
4314
# ATM, this will always insert the records as fulltexts, and
4315
# requires that you can hang on to records once you have gone
4316
# on to the next one. Further, it causes the target to
4317
# recompress the data. Testing shows it to be faster than
4318
# requesting the records again, though.
4319
yield ('chk_bytes', to_stream_adapter())
4320
### pb.update('fetch inventory', 2, 2)
4322
def _get_convertable_inventory_stream(self, revision_ids):
4323
# XXX: One of source or target is using chks, and they don't have
4324
# compatible serializations. The StreamSink code expects to be
4325
# able to convert on the target, so we need to put
4326
# bytes-on-the-wire that can be converted
4327
yield ('inventories', self._stream_invs_as_fulltexts(revision_ids))
4329
def _stream_invs_as_fulltexts(self, revision_ids):
4601
self.inventory_fetch_order(), delta_closure))
4603
def _get_convertable_inventory_stream(self, revision_ids,
4604
delta_versus_null=False):
4605
# The source is using CHKs, but the target either doesn't or it has a
4606
# different serializer. The StreamSink code expects to be able to
4607
# convert on the target, so we need to put bytes-on-the-wire that can
4608
# be converted. That means inventory deltas (if the remote is <1.19,
4609
# RemoteStreamSink will fallback to VFS to insert the deltas).
4610
yield ('inventory-deltas',
4611
self._stream_invs_as_deltas(revision_ids,
4612
delta_versus_null=delta_versus_null))
4614
def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):
4615
"""Return a stream of inventory-deltas for the given rev ids.
4617
:param revision_ids: The list of inventories to transmit
4618
:param delta_versus_null: Don't try to find a minimal delta for this
4619
entry, instead compute the delta versus the NULL_REVISION. This
4620
effectively streams a complete inventory. Used for stuff like
4621
filling in missing parents, etc.
4330
4623
from_repo = self.from_repository
4331
from_serializer = from_repo._format._serializer
4332
4624
revision_keys = [(rev_id,) for rev_id in revision_ids]
4333
4625
parent_map = from_repo.inventories.get_parent_map(revision_keys)
4334
for inv in self.from_repository.iter_inventories(revision_ids):
4335
# XXX: This is a bit hackish, but it works. Basically,
4336
# CHKSerializer 'accidentally' supports
4337
# read/write_inventory_to_string, even though that is never
4338
# the format that is stored on disk. It *does* give us a
4339
# single string representation for an inventory, so live with
4341
# This would be far better if we had a 'serialized inventory
4342
# delta' form. Then we could use 'inventory._make_delta', and
4343
# transmit that. This would both be faster to generate, and
4344
# result in fewer bytes-on-the-wire.
4345
as_bytes = from_serializer.write_inventory_to_string(inv)
4626
# XXX: possibly repos could implement a more efficient iter_inv_deltas
4628
inventories = self.from_repository.iter_inventories(
4629
revision_ids, 'topological')
4630
format = from_repo._format
4631
invs_sent_so_far = set([_mod_revision.NULL_REVISION])
4632
inventory_cache = lru_cache.LRUCache(50)
4633
null_inventory = from_repo.revision_tree(
4634
_mod_revision.NULL_REVISION).inventory
4635
# XXX: ideally the rich-root/tree-refs flags would be per-revision, not
4636
# per-repo (e.g. streaming a non-rich-root revision out of a rich-root
4637
# repo back into a non-rich-root repo ought to be allowed)
4638
serializer = inventory_delta.InventoryDeltaSerializer(
4639
versioned_root=format.rich_root_data,
4640
tree_references=format.supports_tree_reference)
4641
for inv in inventories:
4346
4642
key = (inv.revision_id,)
4347
4643
parent_keys = parent_map.get(key, ())
4645
if not delta_versus_null and parent_keys:
4646
# The caller did not ask for complete inventories and we have
4647
# some parents that we can delta against. Make a delta against
4648
# each parent so that we can find the smallest.
4649
parent_ids = [parent_key[0] for parent_key in parent_keys]
4650
for parent_id in parent_ids:
4651
if parent_id not in invs_sent_so_far:
4652
# We don't know that the remote side has this basis, so
4655
if parent_id == _mod_revision.NULL_REVISION:
4656
parent_inv = null_inventory
4658
parent_inv = inventory_cache.get(parent_id, None)
4659
if parent_inv is None:
4660
parent_inv = from_repo.get_inventory(parent_id)
4661
candidate_delta = inv._make_delta(parent_inv)
4662
if (delta is None or
4663
len(delta) > len(candidate_delta)):
4664
delta = candidate_delta
4665
basis_id = parent_id
4667
# Either none of the parents ended up being suitable, or we
4668
# were asked to delta against NULL
4669
basis_id = _mod_revision.NULL_REVISION
4670
delta = inv._make_delta(null_inventory)
4671
invs_sent_so_far.add(inv.revision_id)
4672
inventory_cache[inv.revision_id] = inv
4673
delta_serialized = ''.join(
4674
serializer.delta_to_lines(basis_id, key[-1], delta))
4348
4675
yield versionedfile.FulltextContentFactory(
4349
key, parent_keys, None, as_bytes)
4676
key, parent_keys, None, delta_serialized)
4352
4679
def _iter_for_revno(repo, partial_history_cache, stop_index=None,