14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
30
repository as _mod_repository,
31
29
revision as _mod_revision,
38
36
versionedfile as _mod_versionedfile,
40
from .. import bundle_data, serializer as bundle_serializer
41
from ....i18n import ngettext
38
from ...bundle import bundle_data, serializer as bundle_serializer
39
from ...i18n import ngettext
40
from ...sixish import (
44
46
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
59
61
# parents first, and then grab the ordered requests.
60
62
needed_ids = [k[-1] for k in self.present_parents]
61
63
needed_ids.extend([k[-1] for k in self.ordered_keys])
62
inv_to_lines = self.repo._serializer.write_inventory_to_chunks
64
inv_to_str = self.repo._serializer.write_inventory_to_string
63
65
for inv in self.repo.iter_inventories(needed_ids):
64
66
revision_id = inv.revision_id
65
67
key = (revision_id,)
71
73
parent_ids = [k[-1] for k in self.parent_map[key]]
72
as_chunks = inv_to_lines(inv)
73
self._process_one_record(key, as_chunks)
74
as_bytes = inv_to_str(inv)
75
self._process_one_record(key, (as_bytes,))
74
76
if parent_ids is None:
76
78
diff = self.diffs.pop(key)
77
sha1 = osutils.sha_strings(as_chunks)
79
sha1 = osutils.sha_string(as_bytes)
78
80
yield revision_id, parent_ids, sha1, diff
101
103
"""Start writing the bundle"""
102
self._fileobj.write(bundle_serializer._get_bundle_header('4'))
103
self._fileobj.write(b'#\n')
104
self._fileobj.write(bundle_serializer._get_bundle_header(
105
bundle_serializer.v4_string))
106
self._fileobj.write('#\n')
104
107
self._container.begin()
120
123
:revision_id: The revision id of the mpdiff being added.
121
124
:file_id: The file-id of the file, or None for inventories.
123
metadata = {b'parents': parents,
124
b'storage_kind': b'mpdiff',
126
metadata = {'parents': parents,
127
'storage_kind': 'mpdiff',
126
129
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
128
131
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
135
138
:revision_id: The revision id of the fulltext being added.
137
metadata = {b'parents': parents,
138
b'storage_kind': b'mpdiff'}
139
self._add_record(bytes, {b'parents': parents,
140
b'storage_kind': b'fulltext'}, repo_kind, revision_id, None)
140
metadata = {'parents': parents,
141
'storage_kind': 'mpdiff'}
142
self._add_record(bytes, {'parents': parents,
143
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
142
def add_info_record(self, kwargs):
145
def add_info_record(self, **kwargs):
143
146
"""Add an info record to the bundle
145
148
Any parameters may be supplied, except 'self' and 'storage_kind'.
146
149
Values must be lists, strings, integers, dicts, or a combination.
148
kwargs[b'storage_kind'] = b'header'
151
kwargs['storage_kind'] = 'header'
149
152
self._add_record(None, kwargs, 'info', None, None)
152
155
def encode_name(content_kind, revision_id, file_id=None):
153
156
"""Encode semantic ids as a container name"""
154
157
if content_kind not in ('revision', 'file', 'inventory', 'signature',
156
159
raise ValueError(content_kind)
157
160
if content_kind == 'file':
158
161
if file_id is None:
165
168
raise AssertionError()
166
169
elif revision_id is None:
167
170
raise AssertionError()
168
names = [n.replace(b'/', b'//') for n in
169
(content_kind.encode('ascii'), revision_id, file_id) if n is not None]
170
return b'/'.join(names)
171
names = [n.replace('/', '//') for n in
172
(content_kind, revision_id, file_id) if n is not None]
173
return '/'.join(names)
172
175
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
173
176
"""Add a bundle record to the container.
179
182
name = self.encode_name(repo_kind, revision_id, file_id)
180
183
encoded_metadata = bencode.bencode(metadata)
181
self._container.add_bytes_record([encoded_metadata], len(encoded_metadata), [(name, )])
182
if metadata[b'storage_kind'] != b'header':
183
self._container.add_bytes_record([bytes], len(bytes), [])
184
self._container.add_bytes_record(encoded_metadata, [(name, )])
185
if metadata['storage_kind'] != 'header':
186
self._container.add_bytes_record(bytes, [])
186
189
class BundleReader(object):
255
258
raise errors.BadBundle('Record has %d names instead of 1'
257
260
metadata = bencode.bdecode(bytes)
258
if metadata[b'storage_kind'] == b'header':
261
if metadata['storage_kind'] == 'header':
261
264
_unused, bytes = next(iterator)
265
268
class BundleSerializerV4(bundle_serializer.BundleSerializer):
266
269
"""Implement the high-level bundle interface"""
271
def write(self, repository, revision_ids, forced_bases, fileobj):
272
"""Write a bundle to a file-like object
274
For backwards-compatibility only
276
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
277
forced_bases, fileobj)
278
return write_op.do_write()
268
280
def write_bundle(self, repository, target, base, fileobj):
269
281
"""Write a bundle to a file object
275
287
:param fileobj: The file-like object to write to
277
write_op = BundleWriteOperation(base, target, repository, fileobj)
289
write_op = BundleWriteOperation(base, target, repository, fileobj)
278
290
return write_op.do_write()
280
292
def read(self, file):
286
298
def get_source_serializer(info):
287
299
"""Retrieve the serializer for a given info object"""
288
return serializer.format_registry.get(info[b'serializer'].decode('ascii'))
300
return serializer.format_registry.get(info['serializer'])
291
303
class BundleWriteOperation(object):
292
304
"""Perform the operation of writing revisions to a bundle"""
307
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
308
"""Create a BundleWriteOperation from old-style arguments"""
309
base, target = cls.get_base_target(revision_ids, forced_bases,
311
return BundleWriteOperation(base, target, repository, fileobj,
294
314
def __init__(self, base, target, repository, fileobj, revision_ids=None):
296
316
self.target = target
324
344
serializer_format = self.repository.get_serializer_format()
325
345
supports_rich_root = {True: 1, False: 0}[
326
346
self.repository.supports_rich_root()]
327
self.bundle.add_info_record({b'serializer': serializer_format,
328
b'supports_rich_root': supports_rich_root})
347
self.bundle.add_info_record(serializer=serializer_format,
348
supports_rich_root=supports_rich_root)
330
350
def write_files(self):
331
351
"""Write bundle records for all revisions of all files"""
333
353
altered_fileids = self.repository.fileids_altered_by_revision_ids(
335
for file_id, revision_ids in altered_fileids.items():
355
for file_id, revision_ids in viewitems(altered_fileids):
336
356
for revision_id in revision_ids:
337
357
text_keys.append((file_id, revision_id))
338
358
self._add_mp_records_keys('file', self.repository.texts, text_keys)
341
361
"""Write bundle records for all revisions and signatures"""
342
362
inv_vf = self.repository.inventories
343
363
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
344
inv_vf, self.revision_keys)]
364
inv_vf, self.revision_keys)]
345
365
revision_order = topological_order
346
366
if self.target is not None and self.target in self.revision_ids:
347
367
# Make sure the target revision is always the last entry
365
385
"""Generate mpdiffs by serializing inventories.
367
387
The current repository only has part of the tree shape information in
368
the 'inventories' vf. So we use serializer.write_inventory_to_lines to
388
the 'inventories' vf. So we use serializer.write_inventory_to_string to
369
389
get a 'full' representation of the tree shape, and then generate
370
390
mpdiffs on that data stream. This stream can then be reconstructed on
374
394
generator = _MPDiffInventoryGenerator(self.repository,
375
395
inventory_key_order)
376
396
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
377
text = b''.join(diff.to_patch())
397
text = ''.join(diff.to_patch())
378
398
self.bundle.add_multiparent_record(text, sha1, parent_ids,
379
399
'inventory', revision_id, None)
381
401
def _add_revision_texts(self, revision_order):
382
402
parent_map = self.repository.get_parent_map(revision_order)
383
revision_to_bytes = self.repository._serializer.write_revision_to_string
403
revision_to_str = self.repository._serializer.write_revision_to_string
384
404
revisions = self.repository.get_revisions(revision_order)
385
405
for revision in revisions:
386
406
revision_id = revision.revision_id
387
407
parents = parent_map.get(revision_id, None)
388
revision_text = revision_to_bytes(revision)
408
revision_text = revision_to_str(revision)
389
409
self.bundle.add_fulltext_record(revision_text, parents,
390
'revision', revision_id)
410
'revision', revision_id)
392
412
self.bundle.add_fulltext_record(
393
413
self.repository.get_signature_text(
394
revision_id), parents, 'signature', revision_id)
414
revision_id), parents, 'signature', revision_id)
395
415
except errors.NoSuchRevision:
419
439
for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
420
440
sha1 = sha1s[item_key]
421
441
parents = [key[-1] for key in parent_map[item_key]]
422
text = b''.join(mpdiff.to_patch())
442
text = ''.join(mpdiff.to_patch())
423
443
# Infer file id records as appropriate.
424
444
if len(item_key) == 2:
425
445
file_id = item_key[0]
450
469
all into memory at once. Reading it into memory all at once is
451
470
(currently) faster.
453
with repository.lock_write():
472
repository.lock_write()
454
474
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
455
475
self._serializer, repository)
456
476
return ri.install()
458
480
def get_merge_request(self, target_repo):
459
481
"""Provide data for performing a merge
477
499
self.__real_revisions = []
478
500
bundle_reader = self.get_bundle_reader()
479
501
for bytes, metadata, repo_kind, revision_id, file_id in \
480
bundle_reader.iter_records():
502
bundle_reader.iter_records():
481
503
if repo_kind == 'info':
483
505
self._serializer.get_source_serializer(metadata)
518
540
Must be called with the Repository locked.
520
with _mod_repository.WriteGroup(self._repository):
521
return self._install_in_write_group()
542
self._repository.start_write_group()
544
result = self._install_in_write_group()
546
self._repository.abort_write_group()
548
self._repository.commit_write_group()
523
551
def _install_in_write_group(self):
524
552
current_file = None
529
557
added_inv = set()
530
558
target_revision = None
531
559
for bytes, metadata, repo_kind, revision_id, file_id in\
532
self._container.iter_records():
560
self._container.iter_records():
533
561
if repo_kind == 'info':
534
562
if self._info is not None:
535
563
raise AssertionError()
536
564
self._handle_info(metadata)
537
565
if (pending_file_records and
538
(repo_kind, file_id) != ('file', current_file)):
566
(repo_kind, file_id) != ('file', current_file)):
539
567
# Flush the data for a single file - prevents memory
540
568
# spiking due to buffering all files in memory.
541
569
self._install_mp_records_keys(self._repository.texts,
542
pending_file_records)
570
pending_file_records)
543
571
current_file = None
544
572
del pending_file_records[:]
545
573
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
546
574
self._install_inventory_records(pending_inventory_records)
547
575
pending_inventory_records = []
548
576
if repo_kind == 'inventory':
549
pending_inventory_records.append(
550
((revision_id,), metadata, bytes))
577
pending_inventory_records.append(((revision_id,), metadata, bytes))
551
578
if repo_kind == 'revision':
552
579
target_revision = revision_id
553
580
self._install_revision(revision_id, metadata, bytes)
555
582
self._install_signature(revision_id, metadata, bytes)
556
583
if repo_kind == 'file':
557
584
current_file = file_id
558
pending_file_records.append(
559
((file_id, revision_id), metadata, bytes))
560
self._install_mp_records_keys(
561
self._repository.texts, pending_file_records)
585
pending_file_records.append(((file_id, revision_id), metadata, bytes))
586
self._install_mp_records_keys(self._repository.texts, pending_file_records)
562
587
return target_revision
564
589
def _handle_info(self, info):
565
590
"""Extract data from an info record"""
566
591
self._info = info
567
592
self._source_serializer = self._serializer.get_source_serializer(info)
568
if (info[b'supports_rich_root'] == 0 and
569
self._repository.supports_rich_root()):
593
if (info['supports_rich_root'] == 0 and
594
self._repository.supports_rich_root()):
570
595
self.update_root = True
572
597
self.update_root = False
595
parents = [prefix + (parent,) for parent in meta[b'parents']]
596
vf_records.append((key, parents, meta[b'sha1'], d_func(text)))
620
parents = [prefix + (parent,) for parent in meta['parents']]
621
vf_records.append((key, parents, meta['sha1'], d_func(text)))
597
622
versionedfile.add_mpdiffs(vf_records)
599
624
def _get_parent_inventory_texts(self, inventory_text_cache,
623
648
present_parent_ids.append(p_id)
626
to_lines = self._source_serializer.write_inventory_to_chunks
651
to_string = self._source_serializer.write_inventory_to_string
627
652
for parent_inv in self._repository.iter_inventories(
629
p_text = b''.join(to_lines(parent_inv))
654
p_text = to_string(parent_inv)
630
655
inventory_cache[parent_inv.revision_id] = parent_inv
631
656
cached_parent_texts[parent_inv.revision_id] = p_text
632
657
inventory_text_cache[parent_inv.revision_id] = p_text
634
659
parent_texts = [cached_parent_texts[parent_id]
635
660
for parent_id in parent_ids
636
if parent_id not in ghosts]
661
if parent_id not in ghosts]
637
662
return parent_texts
639
664
def _install_inventory_records(self, records):
640
if (self._info[b'serializer'] == self._repository._serializer.format_num
641
and self._repository._serializer.support_altered_by_hack):
665
if (self._info['serializer'] == self._repository._serializer.format_num
666
and self._repository._serializer.support_altered_by_hack):
642
667
return self._install_mp_records_keys(self._repository.inventories,
644
669
# Use a 10MB text cache, since these are string xml inventories. Note
645
670
# that 10MB is fairly small for large projects (a single inventory can
646
671
# be >5MB). Another possibility is to cache 10-20 inventory texts
648
inventory_text_cache = lru_cache.LRUSizeCache(10 * 1024 * 1024)
673
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
649
674
# Also cache the in-memory representation. This allows us to create
650
675
# inventory deltas to apply rather than calling add_inventory from
651
676
# scratch each time.
652
677
inventory_cache = lru_cache.LRUCache(10)
653
with ui.ui_factory.nested_progress_bar() as pb:
678
pb = ui.ui_factory.nested_progress_bar()
654
680
num_records = len(records)
655
681
for idx, (key, metadata, bytes) in enumerate(records):
656
682
pb.update('installing inventory', idx, num_records)
657
683
revision_id = key[-1]
658
parent_ids = metadata[b'parents']
684
parent_ids = metadata['parents']
659
685
# Note: This assumes the local ghosts are identical to the
660
686
# ghosts in the source, as the Bundle serialization
661
687
# format doesn't record ghosts.
666
692
# it would have to cast to a list of lines, which we get back
667
693
# as lines and then cast back to a string.
668
694
target_lines = multiparent.MultiParent.from_patch(bytes
670
sha1 = osutils.sha_strings(target_lines)
671
if sha1 != metadata[b'sha1']:
696
inv_text = ''.join(target_lines)
698
sha1 = osutils.sha_string(inv_text)
699
if sha1 != metadata['sha1']:
672
700
raise errors.BadBundle("Can't convert to target format")
673
701
# Add this to the cache so we don't have to extract it again.
674
inventory_text_cache[revision_id] = b''.join(target_lines)
675
target_inv = self._source_serializer.read_inventory_from_lines(
702
inventory_text_cache[revision_id] = inv_text
703
target_inv = self._source_serializer.read_inventory_from_string(
678
705
self._handle_root(target_inv, parent_ids)
679
706
parent_inv = None
687
714
delta = target_inv._make_delta(parent_inv)
688
715
self._repository.add_inventory_by_delta(parent_ids[0],
689
delta, revision_id, parent_ids)
690
except serializer.UnsupportedInventoryKind:
716
delta, revision_id, parent_ids)
717
except errors.UnsupportedInventoryKind:
691
718
raise errors.IncompatibleRevision(repr(self._repository))
692
719
inventory_cache[revision_id] = target_inv
694
723
def _handle_root(self, target_inv, parent_ids):
695
724
revision_id = target_inv.revision_id
696
725
if self.update_root:
697
726
text_key = (target_inv.root.file_id, revision_id)
698
727
parent_keys = [(target_inv.root.file_id, parent) for
699
parent in parent_ids]
728
parent in parent_ids]
700
729
self._repository.texts.add_lines(text_key, parent_keys, [])
701
730
elif not self._repository.supports_rich_root():
702
731
if target_inv.root.revision != revision_id: