14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
30
repository as _mod_repository,
31
29
revision as _mod_revision,
38
36
versionedfile as _mod_versionedfile,
40
from .. import bundle_data, serializer as bundle_serializer
41
from ....i18n import ngettext
38
from ...bundle import bundle_data, serializer as bundle_serializer
39
from ...i18n import ngettext
40
from ...sixish import (
44
46
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
59
61
# parents first, and then grab the ordered requests.
60
62
needed_ids = [k[-1] for k in self.present_parents]
61
63
needed_ids.extend([k[-1] for k in self.ordered_keys])
62
inv_to_lines = self.repo._serializer.write_inventory_to_chunks
64
inv_to_str = self.repo._serializer.write_inventory_to_string
63
65
for inv in self.repo.iter_inventories(needed_ids):
64
66
revision_id = inv.revision_id
65
67
key = (revision_id,)
71
73
parent_ids = [k[-1] for k in self.parent_map[key]]
72
as_chunks = inv_to_lines(inv)
73
self._process_one_record(key, as_chunks)
74
as_bytes = inv_to_str(inv)
75
self._process_one_record(key, (as_bytes,))
74
76
if parent_ids is None:
76
78
diff = self.diffs.pop(key)
77
sha1 = osutils.sha_strings(as_chunks)
79
sha1 = osutils.sha_string(as_bytes)
78
80
yield revision_id, parent_ids, sha1, diff
101
103
"""Start writing the bundle"""
102
self._fileobj.write(bundle_serializer._get_bundle_header('4'))
103
self._fileobj.write(b'#\n')
104
self._fileobj.write(bundle_serializer._get_bundle_header(
105
bundle_serializer.v4_string))
106
self._fileobj.write('#\n')
104
107
self._container.begin()
120
123
:revision_id: The revision id of the mpdiff being added.
121
124
:file_id: The file-id of the file, or None for inventories.
123
metadata = {b'parents': parents,
124
b'storage_kind': b'mpdiff',
126
metadata = {'parents': parents,
127
'storage_kind': 'mpdiff',
126
129
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
128
131
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
135
138
:revision_id: The revision id of the fulltext being added.
137
metadata = {b'parents': parents,
138
b'storage_kind': b'mpdiff'}
139
self._add_record(bytes, {b'parents': parents,
140
b'storage_kind': b'fulltext'}, repo_kind, revision_id, None)
140
metadata = {'parents': parents,
141
'storage_kind': 'mpdiff'}
142
self._add_record(bytes, {'parents': parents,
143
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
142
def add_info_record(self, kwargs):
145
def add_info_record(self, **kwargs):
143
146
"""Add an info record to the bundle
145
148
Any parameters may be supplied, except 'self' and 'storage_kind'.
146
149
Values must be lists, strings, integers, dicts, or a combination.
148
kwargs[b'storage_kind'] = b'header'
151
kwargs['storage_kind'] = 'header'
149
152
self._add_record(None, kwargs, 'info', None, None)
152
155
def encode_name(content_kind, revision_id, file_id=None):
153
156
"""Encode semantic ids as a container name"""
154
157
if content_kind not in ('revision', 'file', 'inventory', 'signature',
156
159
raise ValueError(content_kind)
157
160
if content_kind == 'file':
158
161
if file_id is None:
165
168
raise AssertionError()
166
169
elif revision_id is None:
167
170
raise AssertionError()
168
names = [n.replace(b'/', b'//') for n in
169
(content_kind.encode('ascii'), revision_id, file_id) if n is not None]
170
return b'/'.join(names)
171
names = [n.replace('/', '//') for n in
172
(content_kind, revision_id, file_id) if n is not None]
173
return '/'.join(names)
172
175
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
173
176
"""Add a bundle record to the container.
179
182
name = self.encode_name(repo_kind, revision_id, file_id)
180
183
encoded_metadata = bencode.bencode(metadata)
181
self._container.add_bytes_record([encoded_metadata], len(encoded_metadata), [(name, )])
182
if metadata[b'storage_kind'] != b'header':
183
self._container.add_bytes_record([bytes], len(bytes), [])
184
self._container.add_bytes_record(encoded_metadata, [(name, )])
185
if metadata['storage_kind'] != 'header':
186
self._container.add_bytes_record(bytes, [])
186
189
class BundleReader(object):
255
258
raise errors.BadBundle('Record has %d names instead of 1'
257
260
metadata = bencode.bdecode(bytes)
258
if metadata[b'storage_kind'] == b'header':
261
if metadata['storage_kind'] == 'header':
261
264
_unused, bytes = next(iterator)
265
268
class BundleSerializerV4(bundle_serializer.BundleSerializer):
266
269
"""Implement the high-level bundle interface"""
271
def write(self, repository, revision_ids, forced_bases, fileobj):
272
"""Write a bundle to a file-like object
274
For backwards-compatibility only
276
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
277
forced_bases, fileobj)
278
return write_op.do_write()
268
280
def write_bundle(self, repository, target, base, fileobj):
269
281
"""Write a bundle to a file object
275
287
:param fileobj: The file-like object to write to
277
write_op = BundleWriteOperation(base, target, repository, fileobj)
289
write_op = BundleWriteOperation(base, target, repository, fileobj)
278
290
return write_op.do_write()
280
292
def read(self, file):
286
298
def get_source_serializer(info):
287
299
"""Retrieve the serializer for a given info object"""
288
return serializer.format_registry.get(info[b'serializer'].decode('ascii'))
300
return serializer.format_registry.get(info['serializer'])
291
303
class BundleWriteOperation(object):
292
304
"""Perform the operation of writing revisions to a bundle"""
307
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
308
"""Create a BundleWriteOperation from old-style arguments"""
309
base, target = cls.get_base_target(revision_ids, forced_bases,
311
return BundleWriteOperation(base, target, repository, fileobj,
294
314
def __init__(self, base, target, repository, fileobj, revision_ids=None):
296
316
self.target = target
311
331
"""Write all data to the bundle"""
312
332
trace.note(ngettext('Bundling %d revision.', 'Bundling %d revisions.',
313
333
len(self.revision_ids)), len(self.revision_ids))
314
with self.repository.lock_read():
334
self.repository.lock_read()
315
336
self.bundle.begin()
316
337
self.write_info()
317
338
self.write_files()
318
339
self.write_revisions()
319
340
self.bundle.end()
342
self.repository.unlock()
320
343
return self.revision_ids
322
345
def write_info(self):
324
347
serializer_format = self.repository.get_serializer_format()
325
348
supports_rich_root = {True: 1, False: 0}[
326
349
self.repository.supports_rich_root()]
327
self.bundle.add_info_record({b'serializer': serializer_format,
328
b'supports_rich_root': supports_rich_root})
350
self.bundle.add_info_record(serializer=serializer_format,
351
supports_rich_root=supports_rich_root)
330
353
def write_files(self):
331
354
"""Write bundle records for all revisions of all files"""
333
356
altered_fileids = self.repository.fileids_altered_by_revision_ids(
335
for file_id, revision_ids in altered_fileids.items():
358
for file_id, revision_ids in viewitems(altered_fileids):
336
359
for revision_id in revision_ids:
337
360
text_keys.append((file_id, revision_id))
338
361
self._add_mp_records_keys('file', self.repository.texts, text_keys)
341
364
"""Write bundle records for all revisions and signatures"""
342
365
inv_vf = self.repository.inventories
343
366
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
344
inv_vf, self.revision_keys)]
367
inv_vf, self.revision_keys)]
345
368
revision_order = topological_order
346
369
if self.target is not None and self.target in self.revision_ids:
347
370
# Make sure the target revision is always the last entry
365
388
"""Generate mpdiffs by serializing inventories.
367
390
The current repository only has part of the tree shape information in
368
the 'inventories' vf. So we use serializer.write_inventory_to_lines to
391
the 'inventories' vf. So we use serializer.write_inventory_to_string to
369
392
get a 'full' representation of the tree shape, and then generate
370
393
mpdiffs on that data stream. This stream can then be reconstructed on
374
397
generator = _MPDiffInventoryGenerator(self.repository,
375
398
inventory_key_order)
376
399
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
377
text = b''.join(diff.to_patch())
400
text = ''.join(diff.to_patch())
378
401
self.bundle.add_multiparent_record(text, sha1, parent_ids,
379
402
'inventory', revision_id, None)
381
404
def _add_revision_texts(self, revision_order):
382
405
parent_map = self.repository.get_parent_map(revision_order)
383
revision_to_bytes = self.repository._serializer.write_revision_to_string
406
revision_to_str = self.repository._serializer.write_revision_to_string
384
407
revisions = self.repository.get_revisions(revision_order)
385
408
for revision in revisions:
386
409
revision_id = revision.revision_id
387
410
parents = parent_map.get(revision_id, None)
388
revision_text = revision_to_bytes(revision)
411
revision_text = revision_to_str(revision)
389
412
self.bundle.add_fulltext_record(revision_text, parents,
390
'revision', revision_id)
413
'revision', revision_id)
392
415
self.bundle.add_fulltext_record(
393
416
self.repository.get_signature_text(
394
revision_id), parents, 'signature', revision_id)
417
revision_id), parents, 'signature', revision_id)
395
418
except errors.NoSuchRevision:
419
442
for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
420
443
sha1 = sha1s[item_key]
421
444
parents = [key[-1] for key in parent_map[item_key]]
422
text = b''.join(mpdiff.to_patch())
445
text = ''.join(mpdiff.to_patch())
423
446
# Infer file id records as appropriate.
424
447
if len(item_key) == 2:
425
448
file_id = item_key[0]
450
472
all into memory at once. Reading it into memory all at once is
451
473
(currently) faster.
453
with repository.lock_write():
475
repository.lock_write()
454
477
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
455
478
self._serializer, repository)
456
479
return ri.install()
458
483
def get_merge_request(self, target_repo):
459
484
"""Provide data for performing a merge
477
502
self.__real_revisions = []
478
503
bundle_reader = self.get_bundle_reader()
479
504
for bytes, metadata, repo_kind, revision_id, file_id in \
480
bundle_reader.iter_records():
505
bundle_reader.iter_records():
481
506
if repo_kind == 'info':
483
508
self._serializer.get_source_serializer(metadata)
518
543
Must be called with the Repository locked.
520
with _mod_repository.WriteGroup(self._repository):
521
return self._install_in_write_group()
545
self._repository.start_write_group()
547
result = self._install_in_write_group()
549
self._repository.abort_write_group()
551
self._repository.commit_write_group()
523
554
def _install_in_write_group(self):
524
555
current_file = None
529
560
added_inv = set()
530
561
target_revision = None
531
562
for bytes, metadata, repo_kind, revision_id, file_id in\
532
self._container.iter_records():
563
self._container.iter_records():
533
564
if repo_kind == 'info':
534
565
if self._info is not None:
535
566
raise AssertionError()
536
567
self._handle_info(metadata)
537
568
if (pending_file_records and
538
(repo_kind, file_id) != ('file', current_file)):
569
(repo_kind, file_id) != ('file', current_file)):
539
570
# Flush the data for a single file - prevents memory
540
571
# spiking due to buffering all files in memory.
541
572
self._install_mp_records_keys(self._repository.texts,
542
pending_file_records)
573
pending_file_records)
543
574
current_file = None
544
575
del pending_file_records[:]
545
576
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
546
577
self._install_inventory_records(pending_inventory_records)
547
578
pending_inventory_records = []
548
579
if repo_kind == 'inventory':
549
pending_inventory_records.append(
550
((revision_id,), metadata, bytes))
580
pending_inventory_records.append(((revision_id,), metadata, bytes))
551
581
if repo_kind == 'revision':
552
582
target_revision = revision_id
553
583
self._install_revision(revision_id, metadata, bytes)
555
585
self._install_signature(revision_id, metadata, bytes)
556
586
if repo_kind == 'file':
557
587
current_file = file_id
558
pending_file_records.append(
559
((file_id, revision_id), metadata, bytes))
560
self._install_mp_records_keys(
561
self._repository.texts, pending_file_records)
588
pending_file_records.append(((file_id, revision_id), metadata, bytes))
589
self._install_mp_records_keys(self._repository.texts, pending_file_records)
562
590
return target_revision
564
592
def _handle_info(self, info):
565
593
"""Extract data from an info record"""
566
594
self._info = info
567
595
self._source_serializer = self._serializer.get_source_serializer(info)
568
if (info[b'supports_rich_root'] == 0 and
569
self._repository.supports_rich_root()):
596
if (info['supports_rich_root'] == 0 and
597
self._repository.supports_rich_root()):
570
598
self.update_root = True
572
600
self.update_root = False
595
parents = [prefix + (parent,) for parent in meta[b'parents']]
596
vf_records.append((key, parents, meta[b'sha1'], d_func(text)))
623
parents = [prefix + (parent,) for parent in meta['parents']]
624
vf_records.append((key, parents, meta['sha1'], d_func(text)))
597
625
versionedfile.add_mpdiffs(vf_records)
599
627
def _get_parent_inventory_texts(self, inventory_text_cache,
623
651
present_parent_ids.append(p_id)
626
to_lines = self._source_serializer.write_inventory_to_chunks
654
to_string = self._source_serializer.write_inventory_to_string
627
655
for parent_inv in self._repository.iter_inventories(
629
p_text = b''.join(to_lines(parent_inv))
657
p_text = to_string(parent_inv)
630
658
inventory_cache[parent_inv.revision_id] = parent_inv
631
659
cached_parent_texts[parent_inv.revision_id] = p_text
632
660
inventory_text_cache[parent_inv.revision_id] = p_text
634
662
parent_texts = [cached_parent_texts[parent_id]
635
663
for parent_id in parent_ids
636
if parent_id not in ghosts]
664
if parent_id not in ghosts]
637
665
return parent_texts
639
667
def _install_inventory_records(self, records):
640
if (self._info[b'serializer'] == self._repository._serializer.format_num
641
and self._repository._serializer.support_altered_by_hack):
668
if (self._info['serializer'] == self._repository._serializer.format_num
669
and self._repository._serializer.support_altered_by_hack):
642
670
return self._install_mp_records_keys(self._repository.inventories,
644
672
# Use a 10MB text cache, since these are string xml inventories. Note
645
673
# that 10MB is fairly small for large projects (a single inventory can
646
674
# be >5MB). Another possibility is to cache 10-20 inventory texts
648
inventory_text_cache = lru_cache.LRUSizeCache(10 * 1024 * 1024)
676
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
649
677
# Also cache the in-memory representation. This allows us to create
650
678
# inventory deltas to apply rather than calling add_inventory from
651
679
# scratch each time.
652
680
inventory_cache = lru_cache.LRUCache(10)
653
with ui.ui_factory.nested_progress_bar() as pb:
681
pb = ui.ui_factory.nested_progress_bar()
654
683
num_records = len(records)
655
684
for idx, (key, metadata, bytes) in enumerate(records):
656
685
pb.update('installing inventory', idx, num_records)
657
686
revision_id = key[-1]
658
parent_ids = metadata[b'parents']
687
parent_ids = metadata['parents']
659
688
# Note: This assumes the local ghosts are identical to the
660
689
# ghosts in the source, as the Bundle serialization
661
690
# format doesn't record ghosts.
666
695
# it would have to cast to a list of lines, which we get back
667
696
# as lines and then cast back to a string.
668
697
target_lines = multiparent.MultiParent.from_patch(bytes
670
sha1 = osutils.sha_strings(target_lines)
671
if sha1 != metadata[b'sha1']:
699
inv_text = ''.join(target_lines)
701
sha1 = osutils.sha_string(inv_text)
702
if sha1 != metadata['sha1']:
672
703
raise errors.BadBundle("Can't convert to target format")
673
704
# Add this to the cache so we don't have to extract it again.
674
inventory_text_cache[revision_id] = b''.join(target_lines)
675
target_inv = self._source_serializer.read_inventory_from_lines(
705
inventory_text_cache[revision_id] = inv_text
706
target_inv = self._source_serializer.read_inventory_from_string(
678
708
self._handle_root(target_inv, parent_ids)
679
709
parent_inv = None
687
717
delta = target_inv._make_delta(parent_inv)
688
718
self._repository.add_inventory_by_delta(parent_ids[0],
689
delta, revision_id, parent_ids)
690
except serializer.UnsupportedInventoryKind:
719
delta, revision_id, parent_ids)
720
except errors.UnsupportedInventoryKind:
691
721
raise errors.IncompatibleRevision(repr(self._repository))
692
722
inventory_cache[revision_id] = target_inv
694
726
def _handle_root(self, target_inv, parent_ids):
695
727
revision_id = target_inv.revision_id
696
728
if self.update_root:
697
729
text_key = (target_inv.root.file_id, revision_id)
698
730
parent_keys = [(target_inv.root.file_id, parent) for
699
parent in parent_ids]
731
parent in parent_ids]
700
732
self._repository.texts.add_lines(text_key, parent_keys, [])
701
733
elif not self._repository.supports_rich_root():
702
734
if target_inv.root.revision != revision_id: