14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
19
from cStringIO import StringIO
30
repository as _mod_repository,
31
30
revision as _mod_revision,
38
34
versionedfile as _mod_versionedfile,
40
from .. import bundle_data, serializer as bundle_serializer
41
from ....i18n import ngettext
36
from brzlib.bundle import bundle_data, serializer as bundle_serializer
37
from brzlib.i18n import ngettext
38
from brzlib import bencode
44
41
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
59
56
# parents first, and then grab the ordered requests.
60
57
needed_ids = [k[-1] for k in self.present_parents]
61
58
needed_ids.extend([k[-1] for k in self.ordered_keys])
62
inv_to_lines = self.repo._serializer.write_inventory_to_chunks
59
inv_to_str = self.repo._serializer.write_inventory_to_string
63
60
for inv in self.repo.iter_inventories(needed_ids):
64
61
revision_id = inv.revision_id
65
62
key = (revision_id,)
71
68
parent_ids = [k[-1] for k in self.parent_map[key]]
72
as_chunks = inv_to_lines(inv)
73
self._process_one_record(key, as_chunks)
69
as_bytes = inv_to_str(inv)
70
self._process_one_record(key, (as_bytes,))
74
71
if parent_ids is None:
76
73
diff = self.diffs.pop(key)
77
sha1 = osutils.sha_strings(as_chunks)
74
sha1 = osutils.sha_string(as_bytes)
78
75
yield revision_id, parent_ids, sha1, diff
101
98
"""Start writing the bundle"""
102
self._fileobj.write(bundle_serializer._get_bundle_header('4'))
103
self._fileobj.write(b'#\n')
99
self._fileobj.write(bundle_serializer._get_bundle_header(
100
bundle_serializer.v4_string))
101
self._fileobj.write('#\n')
104
102
self._container.begin()
120
118
:revision_id: The revision id of the mpdiff being added.
121
119
:file_id: The file-id of the file, or None for inventories.
123
metadata = {b'parents': parents,
124
b'storage_kind': b'mpdiff',
121
metadata = {'parents': parents,
122
'storage_kind': 'mpdiff',
126
124
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
128
126
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
135
133
:revision_id: The revision id of the fulltext being added.
137
metadata = {b'parents': parents,
138
b'storage_kind': b'mpdiff'}
139
self._add_record(bytes, {b'parents': parents,
140
b'storage_kind': b'fulltext'}, repo_kind, revision_id, None)
135
metadata = {'parents': parents,
136
'storage_kind': 'mpdiff'}
137
self._add_record(bytes, {'parents': parents,
138
'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
142
def add_info_record(self, kwargs):
140
def add_info_record(self, **kwargs):
143
141
"""Add an info record to the bundle
145
143
Any parameters may be supplied, except 'self' and 'storage_kind'.
146
144
Values must be lists, strings, integers, dicts, or a combination.
148
kwargs[b'storage_kind'] = b'header'
146
kwargs['storage_kind'] = 'header'
149
147
self._add_record(None, kwargs, 'info', None, None)
152
150
def encode_name(content_kind, revision_id, file_id=None):
153
151
"""Encode semantic ids as a container name"""
154
152
if content_kind not in ('revision', 'file', 'inventory', 'signature',
156
154
raise ValueError(content_kind)
157
155
if content_kind == 'file':
158
156
if file_id is None:
165
163
raise AssertionError()
166
164
elif revision_id is None:
167
165
raise AssertionError()
168
names = [n.replace(b'/', b'//') for n in
169
(content_kind.encode('ascii'), revision_id, file_id) if n is not None]
170
return b'/'.join(names)
166
names = [n.replace('/', '//') for n in
167
(content_kind, revision_id, file_id) if n is not None]
168
return '/'.join(names)
172
170
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
173
171
"""Add a bundle record to the container.
179
177
name = self.encode_name(repo_kind, revision_id, file_id)
180
178
encoded_metadata = bencode.bencode(metadata)
181
self._container.add_bytes_record([encoded_metadata], len(encoded_metadata), [(name, )])
182
if metadata[b'storage_kind'] != b'header':
183
self._container.add_bytes_record([bytes], len(bytes), [])
179
self._container.add_bytes_record(encoded_metadata, [(name, )])
180
if metadata['storage_kind'] != 'header':
181
self._container.add_bytes_record(bytes, [])
186
184
class BundleReader(object):
255
253
raise errors.BadBundle('Record has %d names instead of 1'
257
255
metadata = bencode.bdecode(bytes)
258
if metadata[b'storage_kind'] == b'header':
256
if metadata['storage_kind'] == 'header':
261
_unused, bytes = next(iterator)
259
_unused, bytes = iterator.next()
262
260
yield (bytes, metadata) + self.decode_name(names[0][0])
265
263
class BundleSerializerV4(bundle_serializer.BundleSerializer):
266
264
"""Implement the high-level bundle interface"""
266
def write(self, repository, revision_ids, forced_bases, fileobj):
267
"""Write a bundle to a file-like object
269
For backwards-compatibility only
271
write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
272
forced_bases, fileobj)
273
return write_op.do_write()
268
275
def write_bundle(self, repository, target, base, fileobj):
269
276
"""Write a bundle to a file object
275
282
:param fileobj: The file-like object to write to
277
write_op = BundleWriteOperation(base, target, repository, fileobj)
284
write_op = BundleWriteOperation(base, target, repository, fileobj)
278
285
return write_op.do_write()
280
287
def read(self, file):
286
293
def get_source_serializer(info):
287
294
"""Retrieve the serializer for a given info object"""
288
return serializer.format_registry.get(info[b'serializer'].decode('ascii'))
295
return serializer.format_registry.get(info['serializer'])
291
298
class BundleWriteOperation(object):
292
299
"""Perform the operation of writing revisions to a bundle"""
302
def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
303
"""Create a BundleWriteOperation from old-style arguments"""
304
base, target = cls.get_base_target(revision_ids, forced_bases,
306
return BundleWriteOperation(base, target, repository, fileobj,
294
309
def __init__(self, base, target, repository, fileobj, revision_ids=None):
296
311
self.target = target
306
321
parents = graph.get_parent_map(revision_ids)
307
322
self.revision_ids = [r for r in revision_ids if r in parents]
308
self.revision_keys = {(revid,) for revid in self.revision_ids}
323
self.revision_keys = set([(revid,) for revid in self.revision_ids])
310
325
def do_write(self):
311
326
"""Write all data to the bundle"""
312
327
trace.note(ngettext('Bundling %d revision.', 'Bundling %d revisions.',
313
328
len(self.revision_ids)), len(self.revision_ids))
314
with self.repository.lock_read():
329
self.repository.lock_read()
315
331
self.bundle.begin()
316
332
self.write_info()
317
333
self.write_files()
318
334
self.write_revisions()
319
335
self.bundle.end()
337
self.repository.unlock()
320
338
return self.revision_ids
322
340
def write_info(self):
324
342
serializer_format = self.repository.get_serializer_format()
325
343
supports_rich_root = {True: 1, False: 0}[
326
344
self.repository.supports_rich_root()]
327
self.bundle.add_info_record({b'serializer': serializer_format,
328
b'supports_rich_root': supports_rich_root})
345
self.bundle.add_info_record(serializer=serializer_format,
346
supports_rich_root=supports_rich_root)
330
348
def write_files(self):
331
349
"""Write bundle records for all revisions of all files"""
333
351
altered_fileids = self.repository.fileids_altered_by_revision_ids(
335
for file_id, revision_ids in altered_fileids.items():
353
for file_id, revision_ids in altered_fileids.iteritems():
336
354
for revision_id in revision_ids:
337
355
text_keys.append((file_id, revision_id))
338
356
self._add_mp_records_keys('file', self.repository.texts, text_keys)
341
359
"""Write bundle records for all revisions and signatures"""
342
360
inv_vf = self.repository.inventories
343
361
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
344
inv_vf, self.revision_keys)]
362
inv_vf, self.revision_keys)]
345
363
revision_order = topological_order
346
364
if self.target is not None and self.target in self.revision_ids:
347
365
# Make sure the target revision is always the last entry
365
383
"""Generate mpdiffs by serializing inventories.
367
385
The current repository only has part of the tree shape information in
368
the 'inventories' vf. So we use serializer.write_inventory_to_lines to
386
the 'inventories' vf. So we use serializer.write_inventory_to_string to
369
387
get a 'full' representation of the tree shape, and then generate
370
388
mpdiffs on that data stream. This stream can then be reconstructed on
374
392
generator = _MPDiffInventoryGenerator(self.repository,
375
393
inventory_key_order)
376
394
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
377
text = b''.join(diff.to_patch())
395
text = ''.join(diff.to_patch())
378
396
self.bundle.add_multiparent_record(text, sha1, parent_ids,
379
397
'inventory', revision_id, None)
381
399
def _add_revision_texts(self, revision_order):
382
400
parent_map = self.repository.get_parent_map(revision_order)
383
revision_to_bytes = self.repository._serializer.write_revision_to_string
401
revision_to_str = self.repository._serializer.write_revision_to_string
384
402
revisions = self.repository.get_revisions(revision_order)
385
403
for revision in revisions:
386
404
revision_id = revision.revision_id
387
405
parents = parent_map.get(revision_id, None)
388
revision_text = revision_to_bytes(revision)
406
revision_text = revision_to_str(revision)
389
407
self.bundle.add_fulltext_record(revision_text, parents,
390
'revision', revision_id)
408
'revision', revision_id)
392
410
self.bundle.add_fulltext_record(
393
411
self.repository.get_signature_text(
394
revision_id), parents, 'signature', revision_id)
412
revision_id), parents, 'signature', revision_id)
395
413
except errors.NoSuchRevision:
419
437
for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
420
438
sha1 = sha1s[item_key]
421
439
parents = [key[-1] for key in parent_map[item_key]]
422
text = b''.join(mpdiff.to_patch())
440
text = ''.join(mpdiff.to_patch())
423
441
# Infer file id records as appropriate.
424
442
if len(item_key) == 2:
425
443
file_id = item_key[0]
450
467
all into memory at once. Reading it into memory all at once is
451
468
(currently) faster.
453
with repository.lock_write():
470
repository.lock_write()
454
472
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
455
473
self._serializer, repository)
456
474
return ri.install()
458
478
def get_merge_request(self, target_repo):
459
479
"""Provide data for performing a merge
477
497
self.__real_revisions = []
478
498
bundle_reader = self.get_bundle_reader()
479
499
for bytes, metadata, repo_kind, revision_id, file_id in \
480
bundle_reader.iter_records():
500
bundle_reader.iter_records():
481
501
if repo_kind == 'info':
483
503
self._serializer.get_source_serializer(metadata)
518
538
Must be called with the Repository locked.
520
with _mod_repository.WriteGroup(self._repository):
521
return self._install_in_write_group()
540
self._repository.start_write_group()
542
result = self._install_in_write_group()
544
self._repository.abort_write_group()
546
self._repository.commit_write_group()
523
549
def _install_in_write_group(self):
524
550
current_file = None
529
555
added_inv = set()
530
556
target_revision = None
531
557
for bytes, metadata, repo_kind, revision_id, file_id in\
532
self._container.iter_records():
558
self._container.iter_records():
533
559
if repo_kind == 'info':
534
560
if self._info is not None:
535
561
raise AssertionError()
536
562
self._handle_info(metadata)
537
563
if (pending_file_records and
538
(repo_kind, file_id) != ('file', current_file)):
564
(repo_kind, file_id) != ('file', current_file)):
539
565
# Flush the data for a single file - prevents memory
540
566
# spiking due to buffering all files in memory.
541
567
self._install_mp_records_keys(self._repository.texts,
542
pending_file_records)
568
pending_file_records)
543
569
current_file = None
544
570
del pending_file_records[:]
545
571
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
546
572
self._install_inventory_records(pending_inventory_records)
547
573
pending_inventory_records = []
548
574
if repo_kind == 'inventory':
549
pending_inventory_records.append(
550
((revision_id,), metadata, bytes))
575
pending_inventory_records.append(((revision_id,), metadata, bytes))
551
576
if repo_kind == 'revision':
552
577
target_revision = revision_id
553
578
self._install_revision(revision_id, metadata, bytes)
555
580
self._install_signature(revision_id, metadata, bytes)
556
581
if repo_kind == 'file':
557
582
current_file = file_id
558
pending_file_records.append(
559
((file_id, revision_id), metadata, bytes))
560
self._install_mp_records_keys(
561
self._repository.texts, pending_file_records)
583
pending_file_records.append(((file_id, revision_id), metadata, bytes))
584
self._install_mp_records_keys(self._repository.texts, pending_file_records)
562
585
return target_revision
564
587
def _handle_info(self, info):
565
588
"""Extract data from an info record"""
566
589
self._info = info
567
590
self._source_serializer = self._serializer.get_source_serializer(info)
568
if (info[b'supports_rich_root'] == 0 and
569
self._repository.supports_rich_root()):
591
if (info['supports_rich_root'] == 0 and
592
self._repository.supports_rich_root()):
570
593
self.update_root = True
572
595
self.update_root = False
595
parents = [prefix + (parent,) for parent in meta[b'parents']]
596
vf_records.append((key, parents, meta[b'sha1'], d_func(text)))
618
parents = [prefix + (parent,) for parent in meta['parents']]
619
vf_records.append((key, parents, meta['sha1'], d_func(text)))
597
620
versionedfile.add_mpdiffs(vf_records)
599
622
def _get_parent_inventory_texts(self, inventory_text_cache,
623
646
present_parent_ids.append(p_id)
626
to_lines = self._source_serializer.write_inventory_to_chunks
649
to_string = self._source_serializer.write_inventory_to_string
627
650
for parent_inv in self._repository.iter_inventories(
629
p_text = b''.join(to_lines(parent_inv))
652
p_text = to_string(parent_inv)
630
653
inventory_cache[parent_inv.revision_id] = parent_inv
631
654
cached_parent_texts[parent_inv.revision_id] = p_text
632
655
inventory_text_cache[parent_inv.revision_id] = p_text
634
657
parent_texts = [cached_parent_texts[parent_id]
635
658
for parent_id in parent_ids
636
if parent_id not in ghosts]
659
if parent_id not in ghosts]
637
660
return parent_texts
639
662
def _install_inventory_records(self, records):
640
if (self._info[b'serializer'] == self._repository._serializer.format_num
641
and self._repository._serializer.support_altered_by_hack):
663
if (self._info['serializer'] == self._repository._serializer.format_num
664
and self._repository._serializer.support_altered_by_hack):
642
665
return self._install_mp_records_keys(self._repository.inventories,
644
667
# Use a 10MB text cache, since these are string xml inventories. Note
645
668
# that 10MB is fairly small for large projects (a single inventory can
646
669
# be >5MB). Another possibility is to cache 10-20 inventory texts
648
inventory_text_cache = lru_cache.LRUSizeCache(10 * 1024 * 1024)
671
inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
649
672
# Also cache the in-memory representation. This allows us to create
650
673
# inventory deltas to apply rather than calling add_inventory from
651
674
# scratch each time.
652
675
inventory_cache = lru_cache.LRUCache(10)
653
with ui.ui_factory.nested_progress_bar() as pb:
676
pb = ui.ui_factory.nested_progress_bar()
654
678
num_records = len(records)
655
679
for idx, (key, metadata, bytes) in enumerate(records):
656
680
pb.update('installing inventory', idx, num_records)
657
681
revision_id = key[-1]
658
parent_ids = metadata[b'parents']
682
parent_ids = metadata['parents']
659
683
# Note: This assumes the local ghosts are identical to the
660
684
# ghosts in the source, as the Bundle serialization
661
685
# format doesn't record ghosts.
666
690
# it would have to cast to a list of lines, which we get back
667
691
# as lines and then cast back to a string.
668
692
target_lines = multiparent.MultiParent.from_patch(bytes
670
sha1 = osutils.sha_strings(target_lines)
671
if sha1 != metadata[b'sha1']:
694
inv_text = ''.join(target_lines)
696
sha1 = osutils.sha_string(inv_text)
697
if sha1 != metadata['sha1']:
672
698
raise errors.BadBundle("Can't convert to target format")
673
699
# Add this to the cache so we don't have to extract it again.
674
inventory_text_cache[revision_id] = b''.join(target_lines)
675
target_inv = self._source_serializer.read_inventory_from_lines(
700
inventory_text_cache[revision_id] = inv_text
701
target_inv = self._source_serializer.read_inventory_from_string(
678
703
self._handle_root(target_inv, parent_ids)
679
704
parent_inv = None
687
712
delta = target_inv._make_delta(parent_inv)
688
713
self._repository.add_inventory_by_delta(parent_ids[0],
689
delta, revision_id, parent_ids)
714
delta, revision_id, parent_ids)
690
715
except errors.UnsupportedInventoryKind:
691
716
raise errors.IncompatibleRevision(repr(self._repository))
692
717
inventory_cache[revision_id] = target_inv
694
721
def _handle_root(self, target_inv, parent_ids):
695
722
revision_id = target_inv.revision_id
696
723
if self.update_root:
697
724
text_key = (target_inv.root.file_id, revision_id)
698
725
parent_keys = [(target_inv.root.file_id, parent) for
699
parent in parent_ids]
726
parent in parent_ids]
700
727
self._repository.texts.add_lines(text_key, parent_keys, [])
701
728
elif not self._repository.supports_rich_root():
702
729
if target_inv.root.revision != revision_id: