1
# Copyright (C) 2007-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
29
revision as _mod_revision,
36
versionedfile as _mod_versionedfile,
38
from ...bundle import bundle_data, serializer as bundle_serializer
39
from ...i18n import ngettext
40
from ...sixish import (
46
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
47
"""Generate Inventory diffs serialized inventories."""
49
def __init__(self, repo, inventory_keys):
50
super(_MPDiffInventoryGenerator, self).__init__(repo.inventories,
56
"""Compute the diffs one at a time."""
57
# This is instead of compute_diffs() since we guarantee our ordering of
58
# inventories, we don't have to do any buffering
59
self._find_needed_keys()
60
# We actually use a slightly different ordering. We grab all of the
61
# parents first, and then grab the ordered requests.
62
needed_ids = [k[-1] for k in self.present_parents]
63
needed_ids.extend([k[-1] for k in self.ordered_keys])
64
inv_to_bytes = self.repo._serializer.write_inventory_to_string
65
for inv in self.repo.iter_inventories(needed_ids):
66
revision_id = inv.revision_id
68
if key in self.present_parents:
69
# Not a key we will transmit, which is a shame, since because
70
# of that bundles don't work with stacked branches
73
parent_ids = [k[-1] for k in self.parent_map[key]]
74
as_bytes = inv_to_bytes(inv)
75
self._process_one_record(key, (as_bytes,))
76
if parent_ids is None:
78
diff = self.diffs.pop(key)
79
sha1 = osutils.sha_string(as_bytes)
80
yield revision_id, parent_ids, sha1, diff
83
class BundleWriter(object):
84
"""Writer for bundle-format files.
86
This serves roughly the same purpose as ContainerReader, but acts as a
89
Provides ways of writing the specific record types supported this bundle
93
def __init__(self, fileobj):
94
self._container = pack.ContainerWriter(self._write_encoded)
95
self._fileobj = fileobj
96
self._compressor = bz2.BZ2Compressor()
98
def _write_encoded(self, bytes):
99
"""Write bzip2-encoded bytes to the file"""
100
self._fileobj.write(self._compressor.compress(bytes))
103
"""Start writing the bundle"""
104
self._fileobj.write(bundle_serializer._get_bundle_header('4'))
105
self._fileobj.write(b'#\n')
106
self._container.begin()
109
"""Finish writing the bundle"""
110
self._container.end()
111
self._fileobj.write(self._compressor.flush())
113
def add_multiparent_record(self, mp_bytes, sha1, parents, repo_kind,
114
revision_id, file_id):
115
"""Add a record for a multi-parent diff
117
:mp_bytes: A multi-parent diff, as a bytestring
118
:sha1: The sha1 hash of the fulltext
119
:parents: a list of revision-ids of the parents
120
:repo_kind: The kind of object in the repository. May be 'file' or
122
:revision_id: The revision id of the mpdiff being added.
123
:file_id: The file-id of the file, or None for inventories.
125
metadata = {b'parents': parents,
126
b'storage_kind': b'mpdiff',
128
self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
130
def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
131
"""Add a record for a fulltext
133
:bytes: The fulltext, as a bytestring
134
:parents: a list of revision-ids of the parents
135
:repo_kind: The kind of object in the repository. May be 'revision' or
137
:revision_id: The revision id of the fulltext being added.
139
metadata = {b'parents': parents,
140
b'storage_kind': b'mpdiff'}
141
self._add_record(bytes, {b'parents': parents,
142
b'storage_kind': b'fulltext'}, repo_kind, revision_id, None)
144
def add_info_record(self, kwargs):
145
"""Add an info record to the bundle
147
Any parameters may be supplied, except 'self' and 'storage_kind'.
148
Values must be lists, strings, integers, dicts, or a combination.
150
kwargs[b'storage_kind'] = b'header'
151
self._add_record(None, kwargs, 'info', None, None)
154
def encode_name(content_kind, revision_id, file_id=None):
155
"""Encode semantic ids as a container name"""
156
if content_kind not in ('revision', 'file', 'inventory', 'signature',
158
raise ValueError(content_kind)
159
if content_kind == 'file':
161
raise AssertionError()
163
if file_id is not None:
164
raise AssertionError()
165
if content_kind == 'info':
166
if revision_id is not None:
167
raise AssertionError()
168
elif revision_id is None:
169
raise AssertionError()
170
names = [n.replace(b'/', b'//') for n in
171
(content_kind.encode('ascii'), revision_id, file_id) if n is not None]
172
return b'/'.join(names)
174
def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
175
"""Add a bundle record to the container.
177
Most bundle records are recorded as header/body pairs, with the
178
body being nameless. Records with storage_kind 'header' have no
181
name = self.encode_name(repo_kind, revision_id, file_id)
182
encoded_metadata = bencode.bencode(metadata)
183
self._container.add_bytes_record(encoded_metadata, [(name, )])
184
if metadata[b'storage_kind'] != b'header':
185
self._container.add_bytes_record(bytes, [])
188
class BundleReader(object):
189
"""Reader for bundle-format files.
191
This serves roughly the same purpose as ContainerReader, but acts as a
192
layer on top of it, providing metadata, a semantic name, and a record
196
def __init__(self, fileobj, stream_input=True):
199
:param fileobj: a file containing a bzip-encoded container
200
:param stream_input: If True, the BundleReader stream input rather than
201
reading it all into memory at once. Reading it into memory all at
202
once is (currently) faster.
204
line = fileobj.readline()
207
self.patch_lines = []
209
source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
211
source_file = BytesIO(bz2.decompress(fileobj.read()))
212
self._container_file = source_file
215
def iter_decode(fileobj):
216
"""Iterate through decoded fragments of the file"""
217
decompressor = bz2.BZ2Decompressor()
220
yield decompressor.decompress(line)
225
def decode_name(name):
226
"""Decode a name from its container form into a semantic form
228
:retval: content_kind, revision_id, file_id
230
segments = re.split(b'(//?)', name)
232
for segment in segments:
235
elif segment == b'/':
239
content_kind = names[0]
243
revision_id = names[1]
246
return content_kind.decode('ascii'), revision_id, file_id
248
def iter_records(self):
249
"""Iterate through bundle records
251
:return: a generator of (bytes, metadata, content_kind, revision_id,
254
iterator = pack.iter_records_from_file(self._container_file)
255
for names, bytes in iterator:
257
raise errors.BadBundle('Record has %d names instead of 1'
259
metadata = bencode.bdecode(bytes)
260
if metadata[b'storage_kind'] == b'header':
263
_unused, bytes = next(iterator)
264
yield (bytes, metadata) + self.decode_name(names[0][0])
267
class BundleSerializerV4(bundle_serializer.BundleSerializer):
268
"""Implement the high-level bundle interface"""
270
def write_bundle(self, repository, target, base, fileobj):
271
"""Write a bundle to a file object
273
:param repository: The repository to retrieve revision data from
274
:param target: The head revision to include ancestors of
275
:param base: The ancestor of the target to stop including acestors
277
:param fileobj: The file-like object to write to
279
write_op = BundleWriteOperation(base, target, repository, fileobj)
280
return write_op.do_write()
282
def read(self, file):
283
"""return a reader object for a given file"""
284
bundle = BundleInfoV4(file, self)
288
def get_source_serializer(info):
289
"""Retrieve the serializer for a given info object"""
290
return serializer.format_registry.get(info[b'serializer'].decode('ascii'))
293
class BundleWriteOperation(object):
294
"""Perform the operation of writing revisions to a bundle"""
296
def __init__(self, base, target, repository, fileobj, revision_ids=None):
299
self.repository = repository
300
bundle = BundleWriter(fileobj)
302
if revision_ids is not None:
303
self.revision_ids = revision_ids
305
graph = repository.get_graph()
306
revision_ids = graph.find_unique_ancestors(target, [base])
308
parents = graph.get_parent_map(revision_ids)
309
self.revision_ids = [r for r in revision_ids if r in parents]
310
self.revision_keys = {(revid,) for revid in self.revision_ids}
313
"""Write all data to the bundle"""
314
trace.note(ngettext('Bundling %d revision.', 'Bundling %d revisions.',
315
len(self.revision_ids)), len(self.revision_ids))
316
with self.repository.lock_read():
320
self.write_revisions()
322
return self.revision_ids
324
def write_info(self):
325
"""Write format info"""
326
serializer_format = self.repository.get_serializer_format()
327
supports_rich_root = {True: 1, False: 0}[
328
self.repository.supports_rich_root()]
329
self.bundle.add_info_record({b'serializer': serializer_format,
330
b'supports_rich_root': supports_rich_root})
332
def write_files(self):
333
"""Write bundle records for all revisions of all files"""
335
altered_fileids = self.repository.fileids_altered_by_revision_ids(
337
for file_id, revision_ids in viewitems(altered_fileids):
338
for revision_id in revision_ids:
339
text_keys.append((file_id, revision_id))
340
self._add_mp_records_keys('file', self.repository.texts, text_keys)
342
def write_revisions(self):
343
"""Write bundle records for all revisions and signatures"""
344
inv_vf = self.repository.inventories
345
topological_order = [key[-1] for key in multiparent.topo_iter_keys(
346
inv_vf, self.revision_keys)]
347
revision_order = topological_order
348
if self.target is not None and self.target in self.revision_ids:
349
# Make sure the target revision is always the last entry
350
revision_order = list(topological_order)
351
revision_order.remove(self.target)
352
revision_order.append(self.target)
353
if self.repository._serializer.support_altered_by_hack:
354
# Repositories that support_altered_by_hack means that
355
# inventories.make_mpdiffs() contains all the data about the tree
356
# shape. Formats without support_altered_by_hack require
357
# chk_bytes/etc, so we use a different code path.
358
self._add_mp_records_keys('inventory', inv_vf,
359
[(revid,) for revid in topological_order])
361
# Inventories should always be added in pure-topological order, so
362
# that we can apply the mpdiff for the child to the parent texts.
363
self._add_inventory_mpdiffs_from_serializer(topological_order)
364
self._add_revision_texts(revision_order)
366
def _add_inventory_mpdiffs_from_serializer(self, revision_order):
367
"""Generate mpdiffs by serializing inventories.
369
The current repository only has part of the tree shape information in
370
the 'inventories' vf. So we use serializer.write_inventory_to_string to
371
get a 'full' representation of the tree shape, and then generate
372
mpdiffs on that data stream. This stream can then be reconstructed on
375
inventory_key_order = [(r,) for r in revision_order]
376
generator = _MPDiffInventoryGenerator(self.repository,
378
for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
379
text = b''.join(diff.to_patch())
380
self.bundle.add_multiparent_record(text, sha1, parent_ids,
381
'inventory', revision_id, None)
383
def _add_revision_texts(self, revision_order):
384
parent_map = self.repository.get_parent_map(revision_order)
385
revision_to_bytes = self.repository._serializer.write_revision_to_string
386
revisions = self.repository.get_revisions(revision_order)
387
for revision in revisions:
388
revision_id = revision.revision_id
389
parents = parent_map.get(revision_id, None)
390
revision_text = revision_to_bytes(revision)
391
self.bundle.add_fulltext_record(revision_text, parents,
392
'revision', revision_id)
394
self.bundle.add_fulltext_record(
395
self.repository.get_signature_text(
396
revision_id), parents, 'signature', revision_id)
397
except errors.NoSuchRevision:
401
def get_base_target(revision_ids, forced_bases, repository):
402
"""Determine the base and target from old-style revision ids"""
403
if len(revision_ids) == 0:
405
target = revision_ids[0]
406
base = forced_bases.get(target)
408
parents = repository.get_revision(target).parent_ids
409
if len(parents) == 0:
410
base = _mod_revision.NULL_REVISION
415
def _add_mp_records_keys(self, repo_kind, vf, keys):
416
"""Add multi-parent diff records to a bundle"""
417
ordered_keys = list(multiparent.topo_iter_keys(vf, keys))
418
mpdiffs = vf.make_mpdiffs(ordered_keys)
419
sha1s = vf.get_sha1s(ordered_keys)
420
parent_map = vf.get_parent_map(ordered_keys)
421
for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
422
sha1 = sha1s[item_key]
423
parents = [key[-1] for key in parent_map[item_key]]
424
text = b''.join(mpdiff.to_patch())
425
# Infer file id records as appropriate.
426
if len(item_key) == 2:
427
file_id = item_key[0]
430
self.bundle.add_multiparent_record(text, sha1, parents, repo_kind,
431
item_key[-1], file_id)
434
class BundleInfoV4(object):
436
"""Provide (most of) the BundleInfo interface"""
438
def __init__(self, fileobj, serializer):
439
self._fileobj = fileobj
440
self._serializer = serializer
441
self.__real_revisions = None
442
self.__revisions = None
444
def install(self, repository):
445
return self.install_revisions(repository)
447
def install_revisions(self, repository, stream_input=True):
448
"""Install this bundle's revisions into the specified repository
450
:param target_repo: The repository to install into
451
:param stream_input: If True, will stream input rather than reading it
452
all into memory at once. Reading it into memory all at once is
455
with repository.lock_write():
456
ri = RevisionInstaller(self.get_bundle_reader(stream_input),
457
self._serializer, repository)
460
def get_merge_request(self, target_repo):
461
"""Provide data for performing a merge
463
Returns suggested base, suggested target, and patch verification status
465
return None, self.target, 'inapplicable'
467
def get_bundle_reader(self, stream_input=True):
468
"""Return a new BundleReader for the associated bundle
470
:param stream_input: If True, the BundleReader stream input rather than
471
reading it all into memory at once. Reading it into memory all at
472
once is (currently) faster.
474
self._fileobj.seek(0)
475
return BundleReader(self._fileobj, stream_input)
477
def _get_real_revisions(self):
478
if self.__real_revisions is None:
479
self.__real_revisions = []
480
bundle_reader = self.get_bundle_reader()
481
for bytes, metadata, repo_kind, revision_id, file_id in \
482
bundle_reader.iter_records():
483
if repo_kind == 'info':
485
self._serializer.get_source_serializer(metadata)
486
if repo_kind == 'revision':
487
rev = serializer.read_revision_from_string(bytes)
488
self.__real_revisions.append(rev)
489
return self.__real_revisions
490
real_revisions = property(_get_real_revisions)
492
def _get_revisions(self):
493
if self.__revisions is None:
494
self.__revisions = []
495
for revision in self.real_revisions:
496
self.__revisions.append(
497
bundle_data.RevisionInfo.from_revision(revision))
498
return self.__revisions
500
revisions = property(_get_revisions)
502
def _get_target(self):
503
return self.revisions[-1].revision_id
505
target = property(_get_target)
508
class RevisionInstaller(object):
509
"""Installs revisions into a repository"""
511
def __init__(self, container, serializer, repository):
512
self._container = container
513
self._serializer = serializer
514
self._repository = repository
518
"""Perform the installation.
520
Must be called with the Repository locked.
522
self._repository.start_write_group()
524
result = self._install_in_write_group()
526
self._repository.abort_write_group()
528
self._repository.commit_write_group()
531
def _install_in_write_group(self):
533
current_versionedfile = None
534
pending_file_records = []
536
pending_inventory_records = []
538
target_revision = None
539
for bytes, metadata, repo_kind, revision_id, file_id in\
540
self._container.iter_records():
541
if repo_kind == 'info':
542
if self._info is not None:
543
raise AssertionError()
544
self._handle_info(metadata)
545
if (pending_file_records and
546
(repo_kind, file_id) != ('file', current_file)):
547
# Flush the data for a single file - prevents memory
548
# spiking due to buffering all files in memory.
549
self._install_mp_records_keys(self._repository.texts,
550
pending_file_records)
552
del pending_file_records[:]
553
if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
554
self._install_inventory_records(pending_inventory_records)
555
pending_inventory_records = []
556
if repo_kind == 'inventory':
557
pending_inventory_records.append(
558
((revision_id,), metadata, bytes))
559
if repo_kind == 'revision':
560
target_revision = revision_id
561
self._install_revision(revision_id, metadata, bytes)
562
if repo_kind == 'signature':
563
self._install_signature(revision_id, metadata, bytes)
564
if repo_kind == 'file':
565
current_file = file_id
566
pending_file_records.append(
567
((file_id, revision_id), metadata, bytes))
568
self._install_mp_records_keys(
569
self._repository.texts, pending_file_records)
570
return target_revision
572
def _handle_info(self, info):
573
"""Extract data from an info record"""
575
self._source_serializer = self._serializer.get_source_serializer(info)
576
if (info[b'supports_rich_root'] == 0 and
577
self._repository.supports_rich_root()):
578
self.update_root = True
580
self.update_root = False
582
def _install_mp_records(self, versionedfile, records):
583
if len(records) == 0:
585
d_func = multiparent.MultiParent.from_patch
586
vf_records = [(r, m['parents'], m['sha1'], d_func(t)) for r, m, t in
587
records if r not in versionedfile]
588
versionedfile.add_mpdiffs(vf_records)
590
def _install_mp_records_keys(self, versionedfile, records):
591
d_func = multiparent.MultiParent.from_patch
593
for key, meta, text in records:
594
# Adapt to tuple interface: A length two key is a file_id,
595
# revision_id pair, a length 1 key is a
596
# revision/signature/inventory. We need to do this because
597
# the metadata extraction from the bundle has not yet been updated
598
# to use the consistent tuple interface itself.
603
parents = [prefix + (parent,) for parent in meta[b'parents']]
604
vf_records.append((key, parents, meta[b'sha1'], d_func(text)))
605
versionedfile.add_mpdiffs(vf_records)
607
def _get_parent_inventory_texts(self, inventory_text_cache,
608
inventory_cache, parent_ids):
609
cached_parent_texts = {}
610
remaining_parent_ids = []
611
for parent_id in parent_ids:
612
p_text = inventory_text_cache.get(parent_id, None)
614
remaining_parent_ids.append(parent_id)
616
cached_parent_texts[parent_id] = p_text
618
# TODO: Use inventory_cache to grab inventories we already have in
620
if remaining_parent_ids:
621
# first determine what keys are actually present in the local
622
# inventories object (don't use revisions as they haven't been
624
parent_keys = [(r,) for r in remaining_parent_ids]
625
present_parent_map = self._repository.inventories.get_parent_map(
627
present_parent_ids = []
629
for p_id in remaining_parent_ids:
630
if (p_id,) in present_parent_map:
631
present_parent_ids.append(p_id)
634
to_string = self._source_serializer.write_inventory_to_string
635
for parent_inv in self._repository.iter_inventories(
637
p_text = to_string(parent_inv)
638
inventory_cache[parent_inv.revision_id] = parent_inv
639
cached_parent_texts[parent_inv.revision_id] = p_text
640
inventory_text_cache[parent_inv.revision_id] = p_text
642
parent_texts = [cached_parent_texts[parent_id]
643
for parent_id in parent_ids
644
if parent_id not in ghosts]
647
def _install_inventory_records(self, records):
648
if (self._info[b'serializer'] == self._repository._serializer.format_num
649
and self._repository._serializer.support_altered_by_hack):
650
return self._install_mp_records_keys(self._repository.inventories,
652
# Use a 10MB text cache, since these are string xml inventories. Note
653
# that 10MB is fairly small for large projects (a single inventory can
654
# be >5MB). Another possibility is to cache 10-20 inventory texts
656
inventory_text_cache = lru_cache.LRUSizeCache(10 * 1024 * 1024)
657
# Also cache the in-memory representation. This allows us to create
658
# inventory deltas to apply rather than calling add_inventory from
660
inventory_cache = lru_cache.LRUCache(10)
661
with ui.ui_factory.nested_progress_bar() as pb:
662
num_records = len(records)
663
for idx, (key, metadata, bytes) in enumerate(records):
664
pb.update('installing inventory', idx, num_records)
665
revision_id = key[-1]
666
parent_ids = metadata[b'parents']
667
# Note: This assumes the local ghosts are identical to the
668
# ghosts in the source, as the Bundle serialization
669
# format doesn't record ghosts.
670
p_texts = self._get_parent_inventory_texts(inventory_text_cache,
673
# Why does to_lines() take strings as the source, it seems that
674
# it would have to cast to a list of lines, which we get back
675
# as lines and then cast back to a string.
676
target_lines = multiparent.MultiParent.from_patch(bytes
678
inv_text = b''.join(target_lines)
680
sha1 = osutils.sha_string(inv_text)
681
if sha1 != metadata[b'sha1']:
682
raise errors.BadBundle("Can't convert to target format")
683
# Add this to the cache so we don't have to extract it again.
684
inventory_text_cache[revision_id] = inv_text
685
target_inv = self._source_serializer.read_inventory_from_string(
687
self._handle_root(target_inv, parent_ids)
690
parent_inv = inventory_cache.get(parent_ids[0], None)
692
if parent_inv is None:
693
self._repository.add_inventory(revision_id, target_inv,
696
delta = target_inv._make_delta(parent_inv)
697
self._repository.add_inventory_by_delta(parent_ids[0],
698
delta, revision_id, parent_ids)
699
except errors.UnsupportedInventoryKind:
700
raise errors.IncompatibleRevision(repr(self._repository))
701
inventory_cache[revision_id] = target_inv
703
def _handle_root(self, target_inv, parent_ids):
704
revision_id = target_inv.revision_id
706
text_key = (target_inv.root.file_id, revision_id)
707
parent_keys = [(target_inv.root.file_id, parent) for
708
parent in parent_ids]
709
self._repository.texts.add_lines(text_key, parent_keys, [])
710
elif not self._repository.supports_rich_root():
711
if target_inv.root.revision != revision_id:
712
raise errors.IncompatibleRevision(repr(self._repository))
714
def _install_revision(self, revision_id, metadata, text):
715
if self._repository.has_revision(revision_id):
717
revision = self._source_serializer.read_revision_from_string(text)
718
self._repository.add_revision(revision.revision_id, revision)
720
def _install_signature(self, revision_id, metadata, text):
721
transaction = self._repository.get_transaction()
722
if self._repository.has_signature_for_revision_id(revision_id):
724
self._repository.add_signature_text(revision_id, text)