14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
17
from cStringIO import StringIO
29
repository as _mod_repository,
30
29
revision as _mod_revision,
37
versionedfile as _mod_versionedfile,
39
from .. import bundle_data, serializer as bundle_serializer
40
from ....i18n import ngettext
41
from ....sixish import (
47
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
48
"""Generate Inventory diffs serialized inventories."""
50
def __init__(self, repo, inventory_keys):
51
super(_MPDiffInventoryGenerator, self).__init__(repo.inventories,
57
"""Compute the diffs one at a time."""
58
# This is instead of compute_diffs() since we guarantee our ordering of
59
# inventories, we don't have to do any buffering
60
self._find_needed_keys()
61
# We actually use a slightly different ordering. We grab all of the
62
# parents first, and then grab the ordered requests.
63
needed_ids = [k[-1] for k in self.present_parents]
64
needed_ids.extend([k[-1] for k in self.ordered_keys])
65
inv_to_lines = self.repo._serializer.write_inventory_to_chunks
66
for inv in self.repo.iter_inventories(needed_ids):
67
revision_id = inv.revision_id
69
if key in self.present_parents:
70
# Not a key we will transmit, which is a shame, since because
71
# of that bundles don't work with stacked branches
74
parent_ids = [k[-1] for k in self.parent_map[key]]
75
as_chunks = inv_to_lines(inv)
76
self._process_one_record(key, as_chunks)
77
if parent_ids is None:
79
diff = self.diffs.pop(key)
80
sha1 = osutils.sha_strings(as_chunks)
81
yield revision_id, parent_ids, sha1, diff
34
from bzrlib.bundle import bundle_data, serializer as bundle_serializer
35
from bzrlib import bencode
84
38
class BundleWriter(object):
123
78
:revision_id: The revision id of the mpdiff being added.
124
79
:file_id: The file-id of the file, or None for inventories.
126
metadata = {b'parents': parents,
127
b'storage_kind': b'mpdiff',
81
metadata = {'parents': parents,
82
'storage_kind': 'mpdiff',
129
84
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
131
86
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
138
93
:revision_id: The revision id of the fulltext being added.
140
metadata = {b'parents': parents,
141
b'storage_kind': b'mpdiff'}
142
self._add_record(bytes, {b'parents': parents,
143
b'storage_kind': b'fulltext'}, repo_kind, revision_id, None)
95
metadata = {'parents': parents,
96
'storage_kind': 'mpdiff'}
97
self._add_record(bytes, {'parents': parents,
98
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
145
def add_info_record(self, kwargs):
100
def add_info_record(self, **kwargs):
146
101
"""Add an info record to the bundle
148
103
Any parameters may be supplied, except 'self' and 'storage_kind'.
149
104
Values must be lists, strings, integers, dicts, or a combination.
151
kwargs[b'storage_kind'] = b'header'
106
kwargs['storage_kind'] = 'header'
152
107
self._add_record(None, kwargs, 'info', None, None)
155
110
def encode_name(content_kind, revision_id, file_id=None):
156
111
"""Encode semantic ids as a container name"""
157
112
if content_kind not in ('revision', 'file', 'inventory', 'signature',
159
114
raise ValueError(content_kind)
160
115
if content_kind == 'file':
161
116
if file_id is None:
168
123
raise AssertionError()
169
124
elif revision_id is None:
170
125
raise AssertionError()
171
names = [n.replace(b'/', b'//') for n in
172
(content_kind.encode('ascii'), revision_id, file_id) if n is not None]
173
return b'/'.join(names)
126
names = [n.replace('/', '//') for n in
127
(content_kind, revision_id, file_id) if n is not None]
128
return '/'.join(names)
175
130
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
176
131
"""Add a bundle record to the container.
182
137
name = self.encode_name(repo_kind, revision_id, file_id)
183
138
encoded_metadata = bencode.bencode(metadata)
184
self._container.add_bytes_record([encoded_metadata], len(encoded_metadata), [(name, )])
185
if metadata[b'storage_kind'] != b'header':
186
self._container.add_bytes_record([bytes], len(bytes), [])
139
self._container.add_bytes_record(encoded_metadata, [(name, )])
140
if metadata['storage_kind'] != 'header':
141
self._container.add_bytes_record(bytes, [])
189
144
class BundleReader(object):
258
213
raise errors.BadBundle('Record has %d names instead of 1'
260
215
metadata = bencode.bdecode(bytes)
261
if metadata[b'storage_kind'] == b'header':
216
if metadata['storage_kind'] == 'header':
264
_unused, bytes = next(iterator)
219
_unused, bytes = iterator.next()
265
220
yield (bytes, metadata) + self.decode_name(names[0][0])
268
223
class BundleSerializerV4(bundle_serializer.BundleSerializer):
269
224
"""Implement the high-level bundle interface"""
226
def write(self, repository, revision_ids, forced_bases, fileobj):
227
"""Write a bundle to a file-like object
229
For backwards-compatibility only
231
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
232
forced_bases, fileobj)
233
return write_op.do_write()
271
235
def write_bundle(self, repository, target, base, fileobj):
272
236
"""Write a bundle to a file object
289
253
def get_source_serializer(info):
290
254
"""Retrieve the serializer for a given info object"""
291
return serializer.format_registry.get(info[b'serializer'].decode('ascii'))
255
return serializer.format_registry.get(info['serializer'])
294
258
class BundleWriteOperation(object):
295
259
"""Perform the operation of writing revisions to a bundle"""
262
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
263
"""Create a BundleWriteOperation from old-style arguments"""
264
base, target = cls.get_base_target(revision_ids, forced_bases,
266
return BundleWriteOperation(base, target, repository, fileobj,
297
269
def __init__(self, base, target, repository, fileobj, revision_ids=None):
299
271
self.target = target
309
281
parents = graph.get_parent_map(revision_ids)
310
282
self.revision_ids = [r for r in revision_ids if r in parents]
311
self.revision_keys = {(revid,) for revid in self.revision_ids}
283
self.revision_keys = set([(revid,) for revid in self.revision_ids])
313
285
def do_write(self):
314
286
"""Write all data to the bundle"""
315
trace.note(ngettext('Bundling %d revision.', 'Bundling %d revisions.',
316
len(self.revision_ids)), len(self.revision_ids))
317
with self.repository.lock_read():
287
trace.note('Bundling %d revision(s).', len(self.revision_ids))
288
self.repository.lock_read()
318
290
self.bundle.begin()
319
291
self.write_info()
320
292
self.write_files()
321
293
self.write_revisions()
322
294
self.bundle.end()
296
self.repository.unlock()
323
297
return self.revision_ids
325
299
def write_info(self):
327
301
serializer_format = self.repository.get_serializer_format()
328
302
supports_rich_root = {True: 1, False: 0}[
329
303
self.repository.supports_rich_root()]
330
self.bundle.add_info_record({b'serializer': serializer_format,
331
b'supports_rich_root': supports_rich_root})
304
self.bundle.add_info_record(serializer=serializer_format,
305
supports_rich_root=supports_rich_root)
333
307
def write_files(self):
334
308
"""Write bundle records for all revisions of all files"""
336
310
altered_fileids = self.repository.fileids_altered_by_revision_ids(
338
for file_id, revision_ids in viewitems(altered_fileids):
312
for file_id, revision_ids in altered_fileids.iteritems():
339
313
for revision_id in revision_ids:
340
314
text_keys.append((file_id, revision_id))
341
315
self._add_mp_records_keys('file', self.repository.texts, text_keys)
368
342
"""Generate mpdiffs by serializing inventories.
370
344
The current repository only has part of the tree shape information in
371
the 'inventories' vf. So we use serializer.write_inventory_to_lines to
345
the 'inventories' vf. So we use serializer.write_inventory_to_string to
372
346
get a 'full' representation of the tree shape, and then generate
373
347
mpdiffs on that data stream. This stream can then be reconstructed on
376
350
inventory_key_order = [(r,) for r in revision_order]
377
generator = _MPDiffInventoryGenerator(self.repository,
379
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
380
text = b''.join(diff.to_patch())
351
parent_map = self.repository.inventories.get_parent_map(
353
missing_keys = set(inventory_key_order).difference(parent_map)
355
raise errors.RevisionNotPresent(list(missing_keys)[0],
356
self.repository.inventories)
357
inv_to_str = self.repository._serializer.write_inventory_to_string
358
# Make sure that we grab the parent texts first
360
map(just_parents.update, parent_map.itervalues())
361
just_parents.difference_update(parent_map)
362
# Ignore ghost parents
363
present_parents = self.repository.inventories.get_parent_map(
365
ghost_keys = just_parents.difference(present_parents)
366
needed_inventories = list(present_parents) + inventory_key_order
367
needed_inventories = [k[-1] for k in needed_inventories]
369
for inv in self.repository.iter_inventories(needed_inventories):
370
revision_id = inv.revision_id
372
as_bytes = inv_to_str(inv)
373
# The sha1 is validated as the xml/textual form, not as the
374
# form-in-the-repository
375
sha1 = osutils.sha_string(as_bytes)
376
as_lines = osutils.split_lines(as_bytes)
378
all_lines[key] = as_lines
379
if key in just_parents:
380
# We don't transmit those entries
382
# Create an mpdiff for this text, and add it to the output
383
parent_keys = parent_map[key]
384
# See the comment in VF.make_mpdiffs about how this effects
385
# ordering when there are ghosts present. I think we have a latent
387
parent_lines = [all_lines[p_key] for p_key in parent_keys
388
if p_key not in ghost_keys]
389
diff = multiparent.MultiParent.from_lines(
390
as_lines, parent_lines)
391
text = ''.join(diff.to_patch())
392
parent_ids = [k[-1] for k in parent_keys]
381
393
self.bundle.add_multiparent_record(text, sha1, parent_ids,
382
394
'inventory', revision_id, None)
384
396
def _add_revision_texts(self, revision_order):
385
397
parent_map = self.repository.get_parent_map(revision_order)
386
revision_to_bytes = self.repository._serializer.write_revision_to_string
398
revision_to_str = self.repository._serializer.write_revision_to_string
387
399
revisions = self.repository.get_revisions(revision_order)
388
400
for revision in revisions:
389
401
revision_id = revision.revision_id
390
402
parents = parent_map.get(revision_id, None)
391
revision_text = revision_to_bytes(revision)
403
revision_text = revision_to_str(revision)
392
404
self.bundle.add_fulltext_record(revision_text, parents,
393
'revision', revision_id)
405
'revision', revision_id)
395
407
self.bundle.add_fulltext_record(
396
408
self.repository.get_signature_text(
397
revision_id), parents, 'signature', revision_id)
409
revision_id), parents, 'signature', revision_id)
398
410
except errors.NoSuchRevision:
532
552
added_inv = set()
533
553
target_revision = None
534
554
for bytes, metadata, repo_kind, revision_id, file_id in\
535
self._container.iter_records():
555
self._container.iter_records():
536
556
if repo_kind == 'info':
537
557
if self._info is not None:
538
558
raise AssertionError()
539
559
self._handle_info(metadata)
540
560
if (pending_file_records and
541
(repo_kind, file_id) != ('file', current_file)):
561
(repo_kind, file_id) != ('file', current_file)):
542
562
# Flush the data for a single file - prevents memory
543
563
# spiking due to buffering all files in memory.
544
564
self._install_mp_records_keys(self._repository.texts,
545
pending_file_records)
565
pending_file_records)
546
566
current_file = None
547
567
del pending_file_records[:]
548
568
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
549
569
self._install_inventory_records(pending_inventory_records)
550
570
pending_inventory_records = []
551
571
if repo_kind == 'inventory':
552
pending_inventory_records.append(
553
((revision_id,), metadata, bytes))
572
pending_inventory_records.append(((revision_id,), metadata, bytes))
554
573
if repo_kind == 'revision':
555
574
target_revision = revision_id
556
575
self._install_revision(revision_id, metadata, bytes)
558
577
self._install_signature(revision_id, metadata, bytes)
559
578
if repo_kind == 'file':
560
579
current_file = file_id
561
pending_file_records.append(
562
((file_id, revision_id), metadata, bytes))
563
self._install_mp_records_keys(
564
self._repository.texts, pending_file_records)
580
pending_file_records.append(((file_id, revision_id), metadata, bytes))
581
self._install_mp_records_keys(self._repository.texts, pending_file_records)
565
582
return target_revision
567
584
def _handle_info(self, info):
568
585
"""Extract data from an info record"""
569
586
self._info = info
570
587
self._source_serializer = self._serializer.get_source_serializer(info)
571
if (info[b'supports_rich_root'] == 0 and
572
self._repository.supports_rich_root()):
588
if (info['supports_rich_root'] == 0 and
589
self._repository.supports_rich_root()):
573
590
self.update_root = True
575
592
self.update_root = False
598
parents = [prefix + (parent,) for parent in meta[b'parents']]
599
vf_records.append((key, parents, meta[b'sha1'], d_func(text)))
615
parents = [prefix + (parent,) for parent in meta['parents']]
616
vf_records.append((key, parents, meta['sha1'], d_func(text)))
600
617
versionedfile.add_mpdiffs(vf_records)
602
619
def _get_parent_inventory_texts(self, inventory_text_cache,
626
643
present_parent_ids.append(p_id)
629
to_lines = self._source_serializer.write_inventory_to_chunks
646
to_string = self._source_serializer.write_inventory_to_string
630
647
for parent_inv in self._repository.iter_inventories(
632
p_text = b''.join(to_lines(parent_inv))
649
p_text = to_string(parent_inv)
633
650
inventory_cache[parent_inv.revision_id] = parent_inv
634
651
cached_parent_texts[parent_inv.revision_id] = p_text
635
652
inventory_text_cache[parent_inv.revision_id] = p_text
637
654
parent_texts = [cached_parent_texts[parent_id]
638
655
for parent_id in parent_ids
639
if parent_id not in ghosts]
656
if parent_id not in ghosts]
640
657
return parent_texts
642
659
def _install_inventory_records(self, records):
643
if (self._info[b'serializer'] == self._repository._serializer.format_num
644
and self._repository._serializer.support_altered_by_hack):
660
if (self._info['serializer'] == self._repository._serializer.format_num
661
and self._repository._serializer.support_altered_by_hack):
645
662
return self._install_mp_records_keys(self._repository.inventories,
647
664
# Use a 10MB text cache, since these are string xml inventories. Note
648
665
# that 10MB is fairly small for large projects (a single inventory can
649
666
# be >5MB). Another possibility is to cache 10-20 inventory texts
651
inventory_text_cache = lru_cache.LRUSizeCache(10 * 1024 * 1024)
668
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
652
669
# Also cache the in-memory representation. This allows us to create
653
670
# inventory deltas to apply rather than calling add_inventory from
654
671
# scratch each time.
655
672
inventory_cache = lru_cache.LRUCache(10)
656
with ui.ui_factory.nested_progress_bar() as pb:
673
pb = ui.ui_factory.nested_progress_bar()
657
675
num_records = len(records)
658
676
for idx, (key, metadata, bytes) in enumerate(records):
659
677
pb.update('installing inventory', idx, num_records)
660
678
revision_id = key[-1]
661
parent_ids = metadata[b'parents']
679
parent_ids = metadata['parents']
662
680
# Note: This assumes the local ghosts are identical to the
663
681
# ghosts in the source, as the Bundle serialization
664
682
# format doesn't record ghosts.
669
687
# it would have to cast to a list of lines, which we get back
670
688
# as lines and then cast back to a string.
671
689
target_lines = multiparent.MultiParent.from_patch(bytes
673
sha1 = osutils.sha_strings(target_lines)
674
if sha1 != metadata[b'sha1']:
691
inv_text = ''.join(target_lines)
693
sha1 = osutils.sha_string(inv_text)
694
if sha1 != metadata['sha1']:
675
695
raise errors.BadBundle("Can't convert to target format")
676
696
# Add this to the cache so we don't have to extract it again.
677
inventory_text_cache[revision_id] = b''.join(target_lines)
678
target_inv = self._source_serializer.read_inventory_from_lines(
697
inventory_text_cache[revision_id] = inv_text
698
target_inv = self._source_serializer.read_inventory_from_string(
681
700
self._handle_root(target_inv, parent_ids)
682
701
parent_inv = None
690
709
delta = target_inv._make_delta(parent_inv)
691
710
self._repository.add_inventory_by_delta(parent_ids[0],
692
delta, revision_id, parent_ids)
711
delta, revision_id, parent_ids)
693
712
except errors.UnsupportedInventoryKind:
694
713
raise errors.IncompatibleRevision(repr(self._repository))
695
714
inventory_cache[revision_id] = target_inv
697
718
def _handle_root(self, target_inv, parent_ids):
698
719
revision_id = target_inv.revision_id
699
720
if self.update_root:
700
721
text_key = (target_inv.root.file_id, revision_id)
701
722
parent_keys = [(target_inv.root.file_id, parent) for
702
parent in parent_ids]
723
parent in parent_ids]
703
724
self._repository.texts.add_lines(text_key, parent_keys, [])
704
725
elif not self._repository.supports_rich_root():
705
726
if target_inv.root.revision != revision_id: