241
241
# know for unselected inventories whether all their required
242
242
# texts are present in the other repository - it could be
244
if (self.from_repository._format.supports_chks or
245
self.to_repository._format.supports_chks):
246
# Hack to make chk->chk fetch: copy the inventories as
244
if (self.from_repository._format.supports_chks and
245
self.to_repository._format.supports_chks):
246
self._fetch_chk_inventories(revs, pb)
247
elif (self.from_repository._format.supports_chks or
248
self.to_repository._format.supports_chks):
249
# Hack to make not-chk->chk fetch: copy the inventories as
248
251
total = len(revs)
249
252
for pos, inv in enumerate(
283
286
# revision stream, when you weren't ever supposed to have deltas.
284
287
# So we now *force* fulltext copying for signatures and revisions
289
def _fetch_chk_inventories(self, revs, pb):
290
"""Fetch the inventory texts, along with the associated chk maps."""
291
from bzrlib import inventory, chk_map
292
# We want an inventory outside of the search set, so that we can filter
293
# out uninteresting chk pages. For now we use
294
# _find_revision_outside_set, but if we had a Search with cut_revs, we
295
# could use that instead.
296
start_rev_id = self.from_repository._find_revision_outside_set(revs)
297
start_rev_key = (start_rev_id,)
298
inv_keys_to_fetch = [(rev_id,) for rev_id in revs]
299
if start_rev_id != NULL_REVISION:
300
inv_keys_to_fetch.append((start_rev_id,))
301
# Any repo that supports chk_bytes must also support out-of-order
302
# insertion. At least, that is how we expect it to work
303
# We use get_record_stream instead of iter_inventories because we want
304
# to be able to insert the stream as well. We could instead fetch
305
# allowing deltas, and then iter_inventories, but we don't know whether
306
# source or target is more 'local' anway.
307
inv_stream = self.from_repository.inventories.get_record_stream(
308
inv_keys_to_fetch, 'unordered',
309
True) # We need them as full-texts so we can find their references
310
uninteresting_chk_roots = set()
311
interesting_chk_roots = set()
312
for record in inv_stream:
313
bytes = record.get_bytes_as('fulltext')
314
chk_inv = inventory.CHKInventory.deserialise(
315
self.from_repository.chk_bytes, bytes, record.key)
316
if record.key == start_rev_key:
317
uninteresting_chk_roots.add(chk_inv.id_to_entry.key())
318
p_id_map = chk_inv.parent_id_basename_to_file_id
319
if p_id_map is not None:
320
uninteresting_chk_roots.add(p_id_map.key())
322
self.to_repository.inventories.insert_record_stream([record])
323
interesting_chk_roots.add(chk_inv.id_to_entry.key())
324
p_id_map = chk_inv.parent_id_basename_to_file_id
325
if p_id_map is not None:
326
interesting_chk_roots.add(p_id_map.key())
327
# Now that we have worked out all of the interesting root nodes, grab
328
# all of the interesting pages and insert them
329
interesting = chk_map.iter_interesting_nodes(
330
self.from_repository.chk_bytes, interesting_chk_roots,
331
uninteresting_chk_roots, pb=pb)
332
def to_stream_adapter():
333
"""Adapt the iter_interesting_nodes result to a single stream.
335
iter_interesting_nodes returns records as it processes them, which
336
can be in batches. But we only want a single stream to be inserted.
338
for record, items in interesting:
339
for value in record.itervalues():
341
# XXX: We could instead call get_record_stream(records.keys())
342
# ATM, this will always insert the records as fulltexts, and
343
# requires that you can hang on to records once you have gone
344
# on to the next one. Further, it causes the target to
345
# recompress the data. Testing shows it to be faster than
346
# requesting the records again, though.
347
self.to_repository.chk_bytes.insert_record_stream(
286
350
def _generate_root_texts(self, revs):
287
351
"""This will be called by __fetch between fetching weave texts and
288
352
fetching the inventory weave.