14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
17
from cStringIO import StringIO
29
repository as _mod_repository,
30
28
revision as _mod_revision,
37
versionedfile as _mod_versionedfile,
39
from .. import bundle_data, serializer as bundle_serializer
40
from ....i18n import ngettext
41
from ....sixish import (
47
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
48
"""Generate Inventory diffs serialized inventories."""
50
def __init__(self, repo, inventory_keys):
51
super(_MPDiffInventoryGenerator, self).__init__(repo.inventories,
57
"""Compute the diffs one at a time."""
58
# This is instead of compute_diffs() since we guarantee our ordering of
59
# inventories, we don't have to do any buffering
60
self._find_needed_keys()
61
# We actually use a slightly different ordering. We grab all of the
62
# parents first, and then grab the ordered requests.
63
needed_ids = [k[-1] for k in self.present_parents]
64
needed_ids.extend([k[-1] for k in self.ordered_keys])
65
inv_to_lines = self.repo._serializer.write_inventory_to_chunks
66
for inv in self.repo.iter_inventories(needed_ids):
67
revision_id = inv.revision_id
69
if key in self.present_parents:
70
# Not a key we will transmit, which is a shame, since because
71
# of that bundles don't work with stacked branches
74
parent_ids = [k[-1] for k in self.parent_map[key]]
75
as_chunks = inv_to_lines(inv)
76
self._process_one_record(key, as_chunks)
77
if parent_ids is None:
79
diff = self.diffs.pop(key)
80
sha1 = osutils.sha_strings(as_chunks)
81
yield revision_id, parent_ids, sha1, diff
32
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
33
from bzrlib import bencode
84
36
class BundleWriter(object):
138
91
:revision_id: The revision id of the fulltext being added.
140
metadata = {b'parents': parents,
141
b'storage_kind': b'mpdiff'}
142
self._add_record(bytes, {b'parents': parents,
143
b'storage_kind': b'fulltext'}, repo_kind, revision_id, None)
93
metadata = {'parents': parents,
94
'storage_kind': 'mpdiff'}
95
self._add_record(bytes, {'parents': parents,
96
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
145
def add_info_record(self, kwargs):
98
def add_info_record(self, **kwargs):
146
99
"""Add an info record to the bundle
148
101
Any parameters may be supplied, except 'self' and 'storage_kind'.
149
102
Values must be lists, strings, integers, dicts, or a combination.
151
kwargs[b'storage_kind'] = b'header'
104
kwargs['storage_kind'] = 'header'
152
105
self._add_record(None, kwargs, 'info', None, None)
155
108
def encode_name(content_kind, revision_id, file_id=None):
156
109
"""Encode semantic ids as a container name"""
157
110
if content_kind not in ('revision', 'file', 'inventory', 'signature',
159
112
raise ValueError(content_kind)
160
113
if content_kind == 'file':
161
114
if file_id is None:
258
211
raise errors.BadBundle('Record has %d names instead of 1'
260
213
metadata = bencode.bdecode(bytes)
261
if metadata[b'storage_kind'] == b'header':
214
if metadata['storage_kind'] == 'header':
264
_unused, bytes = next(iterator)
217
_unused, bytes = iterator.next()
265
218
yield (bytes, metadata) + self.decode_name(names[0][0])
268
221
class BundleSerializerV4(bundle_serializer.BundleSerializer):
269
222
"""Implement the high-level bundle interface"""
224
def write(self, repository, revision_ids, forced_bases, fileobj):
225
"""Write a bundle to a file-like object
227
For backwards-compatibility only
229
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
230
forced_bases, fileobj)
231
return write_op.do_write()
271
233
def write_bundle(self, repository, target, base, fileobj):
272
234
"""Write a bundle to a file object
343
315
def write_revisions(self):
344
316
"""Write bundle records for all revisions and signatures"""
345
317
inv_vf = self.repository.inventories
346
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
347
inv_vf, self.revision_keys)]
348
revision_order = topological_order
318
revision_order = [key[-1] for key in multiparent.topo_iter_keys(inv_vf,
349
320
if self.target is not None and self.target in self.revision_ids:
350
# Make sure the target revision is always the last entry
351
revision_order = list(topological_order)
352
321
revision_order.remove(self.target)
353
322
revision_order.append(self.target)
354
if self.repository._serializer.support_altered_by_hack:
355
# Repositories that support_altered_by_hack means that
356
# inventories.make_mpdiffs() contains all the data about the tree
357
# shape. Formats without support_altered_by_hack require
358
# chk_bytes/etc, so we use a different code path.
359
self._add_mp_records_keys('inventory', inv_vf,
360
[(revid,) for revid in topological_order])
362
# Inventories should always be added in pure-topological order, so
363
# that we can apply the mpdiff for the child to the parent texts.
364
self._add_inventory_mpdiffs_from_serializer(topological_order)
365
self._add_revision_texts(revision_order)
367
def _add_inventory_mpdiffs_from_serializer(self, revision_order):
368
"""Generate mpdiffs by serializing inventories.
370
The current repository only has part of the tree shape information in
371
the 'inventories' vf. So we use serializer.write_inventory_to_lines to
372
get a 'full' representation of the tree shape, and then generate
373
mpdiffs on that data stream. This stream can then be reconstructed on
376
inventory_key_order = [(r,) for r in revision_order]
377
generator = _MPDiffInventoryGenerator(self.repository,
379
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
380
text = b''.join(diff.to_patch())
381
self.bundle.add_multiparent_record(text, sha1, parent_ids,
382
'inventory', revision_id, None)
384
def _add_revision_texts(self, revision_order):
323
self._add_mp_records_keys('inventory', inv_vf, [(revid,) for revid in revision_order])
385
324
parent_map = self.repository.get_parent_map(revision_order)
386
revision_to_bytes = self.repository._serializer.write_revision_to_string
325
revision_to_str = self.repository._serializer.write_revision_to_string
387
326
revisions = self.repository.get_revisions(revision_order)
388
327
for revision in revisions:
389
328
revision_id = revision.revision_id
390
329
parents = parent_map.get(revision_id, None)
391
revision_text = revision_to_bytes(revision)
330
revision_text = revision_to_str(revision)
392
331
self.bundle.add_fulltext_record(revision_text, parents,
393
'revision', revision_id)
332
'revision', revision_id)
395
334
self.bundle.add_fulltext_record(
396
335
self.repository.get_signature_text(
397
revision_id), parents, 'signature', revision_id)
336
revision_id), parents, 'signature', revision_id)
398
337
except errors.NoSuchRevision:
532
479
added_inv = set()
533
480
target_revision = None
534
481
for bytes, metadata, repo_kind, revision_id, file_id in\
535
self._container.iter_records():
482
self._container.iter_records():
536
483
if repo_kind == 'info':
537
484
if self._info is not None:
538
485
raise AssertionError()
539
486
self._handle_info(metadata)
540
487
if (pending_file_records and
541
(repo_kind, file_id) != ('file', current_file)):
488
(repo_kind, file_id) != ('file', current_file)):
542
489
# Flush the data for a single file - prevents memory
543
490
# spiking due to buffering all files in memory.
544
491
self._install_mp_records_keys(self._repository.texts,
545
pending_file_records)
492
pending_file_records)
546
493
current_file = None
547
494
del pending_file_records[:]
548
495
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
549
496
self._install_inventory_records(pending_inventory_records)
550
497
pending_inventory_records = []
551
498
if repo_kind == 'inventory':
552
pending_inventory_records.append(
553
((revision_id,), metadata, bytes))
499
pending_inventory_records.append(((revision_id,), metadata, bytes))
554
500
if repo_kind == 'revision':
555
501
target_revision = revision_id
556
502
self._install_revision(revision_id, metadata, bytes)
558
504
self._install_signature(revision_id, metadata, bytes)
559
505
if repo_kind == 'file':
560
506
current_file = file_id
561
pending_file_records.append(
562
((file_id, revision_id), metadata, bytes))
563
self._install_mp_records_keys(
564
self._repository.texts, pending_file_records)
507
pending_file_records.append(((file_id, revision_id), metadata, bytes))
508
self._install_mp_records_keys(self._repository.texts, pending_file_records)
565
509
return target_revision
567
511
def _handle_info(self, info):
568
512
"""Extract data from an info record"""
569
513
self._info = info
570
514
self._source_serializer = self._serializer.get_source_serializer(info)
571
if (info[b'supports_rich_root'] == 0 and
572
self._repository.supports_rich_root()):
515
if (info['supports_rich_root'] == 0 and
516
self._repository.supports_rich_root()):
573
517
self.update_root = True
575
519
self.update_root = False
598
parents = [prefix + (parent,) for parent in meta[b'parents']]
599
vf_records.append((key, parents, meta[b'sha1'], d_func(text)))
542
parents = [prefix + (parent,) for parent in meta['parents']]
543
vf_records.append((key, parents, meta['sha1'], d_func(text)))
600
544
versionedfile.add_mpdiffs(vf_records)
602
def _get_parent_inventory_texts(self, inventory_text_cache,
603
inventory_cache, parent_ids):
604
cached_parent_texts = {}
605
remaining_parent_ids = []
606
for parent_id in parent_ids:
607
p_text = inventory_text_cache.get(parent_id, None)
609
remaining_parent_ids.append(parent_id)
611
cached_parent_texts[parent_id] = p_text
613
# TODO: Use inventory_cache to grab inventories we already have in
615
if remaining_parent_ids:
616
# first determine what keys are actually present in the local
617
# inventories object (don't use revisions as they haven't been
619
parent_keys = [(r,) for r in remaining_parent_ids]
620
present_parent_map = self._repository.inventories.get_parent_map(
622
present_parent_ids = []
624
for p_id in remaining_parent_ids:
625
if (p_id,) in present_parent_map:
626
present_parent_ids.append(p_id)
629
to_lines = self._source_serializer.write_inventory_to_chunks
630
for parent_inv in self._repository.iter_inventories(
632
p_text = b''.join(to_lines(parent_inv))
633
inventory_cache[parent_inv.revision_id] = parent_inv
634
cached_parent_texts[parent_inv.revision_id] = p_text
635
inventory_text_cache[parent_inv.revision_id] = p_text
637
parent_texts = [cached_parent_texts[parent_id]
638
for parent_id in parent_ids
639
if parent_id not in ghosts]
642
546
def _install_inventory_records(self, records):
643
if (self._info[b'serializer'] == self._repository._serializer.format_num
644
and self._repository._serializer.support_altered_by_hack):
547
if self._info['serializer'] == self._repository._serializer.format_num:
645
548
return self._install_mp_records_keys(self._repository.inventories,
647
# Use a 10MB text cache, since these are string xml inventories. Note
648
# that 10MB is fairly small for large projects (a single inventory can
649
# be >5MB). Another possibility is to cache 10-20 inventory texts
651
inventory_text_cache = lru_cache.LRUSizeCache(10 * 1024 * 1024)
652
# Also cache the in-memory representation. This allows us to create
653
# inventory deltas to apply rather than calling add_inventory from
655
inventory_cache = lru_cache.LRUCache(10)
656
with ui.ui_factory.nested_progress_bar() as pb:
657
num_records = len(records)
658
for idx, (key, metadata, bytes) in enumerate(records):
659
pb.update('installing inventory', idx, num_records)
660
revision_id = key[-1]
661
parent_ids = metadata[b'parents']
662
# Note: This assumes the local ghosts are identical to the
663
# ghosts in the source, as the Bundle serialization
664
# format doesn't record ghosts.
665
p_texts = self._get_parent_inventory_texts(inventory_text_cache,
668
# Why does to_lines() take strings as the source, it seems that
669
# it would have to cast to a list of lines, which we get back
670
# as lines and then cast back to a string.
671
target_lines = multiparent.MultiParent.from_patch(bytes
673
sha1 = osutils.sha_strings(target_lines)
674
if sha1 != metadata[b'sha1']:
675
raise errors.BadBundle("Can't convert to target format")
676
# Add this to the cache so we don't have to extract it again.
677
inventory_text_cache[revision_id] = b''.join(target_lines)
678
target_inv = self._source_serializer.read_inventory_from_lines(
681
self._handle_root(target_inv, parent_ids)
684
parent_inv = inventory_cache.get(parent_ids[0], None)
686
if parent_inv is None:
687
self._repository.add_inventory(revision_id, target_inv,
690
delta = target_inv._make_delta(parent_inv)
691
self._repository.add_inventory_by_delta(parent_ids[0],
692
delta, revision_id, parent_ids)
693
except errors.UnsupportedInventoryKind:
694
raise errors.IncompatibleRevision(repr(self._repository))
695
inventory_cache[revision_id] = target_inv
550
for key, metadata, bytes in records:
551
revision_id = key[-1]
552
parent_ids = metadata['parents']
553
parents = [self._repository.get_inventory(p)
555
p_texts = [self._source_serializer.write_inventory_to_string(p)
557
target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
559
sha1 = osutils.sha_strings(target_lines)
560
if sha1 != metadata['sha1']:
561
raise errors.BadBundle("Can't convert to target format")
562
target_inv = self._source_serializer.read_inventory_from_string(
563
''.join(target_lines))
564
self._handle_root(target_inv, parent_ids)
566
self._repository.add_inventory(revision_id, target_inv,
568
except errors.UnsupportedInventoryKind:
569
raise errors.IncompatibleRevision(repr(self._repository))
697
571
def _handle_root(self, target_inv, parent_ids):
698
572
revision_id = target_inv.revision_id
699
573
if self.update_root:
700
574
text_key = (target_inv.root.file_id, revision_id)
701
575
parent_keys = [(target_inv.root.file_id, parent) for
702
parent in parent_ids]
576
parent in parent_ids]
703
577
self._repository.texts.add_lines(text_key, parent_keys, [])
704
578
elif not self._repository.supports_rich_root():
705
579
if target_inv.root.revision != revision_id: