/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/bundle/serializer/v4.py

  • Committer: Jelmer Vernooij
  • Date: 2018-02-18 21:42:57 UTC
  • mto: This revision was merged to the branch mainline in revision 6859.
  • Revision ID: jelmer@jelmer.uk-20180218214257-jpevutp1wa30tz3v
Update TODO to reference Breezy, not Bazaar.

Show diffs side-by-side

added added

removed removed

Lines of Context:
14
14
# along with this program; if not, write to the Free Software
15
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
 
17
from __future__ import absolute_import
 
18
 
17
19
import bz2
18
 
from io import (
19
 
    BytesIO,
20
 
    )
21
20
import re
22
21
 
23
 
from .... import (
 
22
from ... import (
24
23
    bencode,
25
24
    errors,
26
25
    iterablefile,
27
26
    lru_cache,
28
27
    multiparent,
29
28
    osutils,
30
 
    repository as _mod_repository,
31
29
    revision as _mod_revision,
32
30
    trace,
33
31
    ui,
34
32
    )
35
 
from ... import (
 
33
from ...bzr import (
36
34
    pack,
37
35
    serializer,
38
36
    versionedfile as _mod_versionedfile,
39
37
    )
40
 
from .. import bundle_data, serializer as bundle_serializer
41
 
from ....i18n import ngettext
 
38
from ...bundle import bundle_data, serializer as bundle_serializer
 
39
from ...i18n import ngettext
 
40
from ...sixish import (
 
41
    BytesIO,
 
42
    viewitems,
 
43
    )
42
44
 
43
45
 
44
46
class _MPDiffInventoryGenerator(_mod_versionedfile._MPDiffGenerator):
46
48
 
47
49
    def __init__(self, repo, inventory_keys):
48
50
        super(_MPDiffInventoryGenerator, self).__init__(repo.inventories,
49
 
                                                        inventory_keys)
 
51
            inventory_keys)
50
52
        self.repo = repo
51
53
        self.sha1s = {}
52
54
 
59
61
        # parents first, and then grab the ordered requests.
60
62
        needed_ids = [k[-1] for k in self.present_parents]
61
63
        needed_ids.extend([k[-1] for k in self.ordered_keys])
62
 
        inv_to_lines = self.repo._serializer.write_inventory_to_chunks
 
64
        inv_to_str = self.repo._serializer.write_inventory_to_string
63
65
        for inv in self.repo.iter_inventories(needed_ids):
64
66
            revision_id = inv.revision_id
65
67
            key = (revision_id,)
69
71
                parent_ids = None
70
72
            else:
71
73
                parent_ids = [k[-1] for k in self.parent_map[key]]
72
 
            as_chunks = inv_to_lines(inv)
73
 
            self._process_one_record(key, as_chunks)
 
74
            as_bytes = inv_to_str(inv)
 
75
            self._process_one_record(key, (as_bytes,))
74
76
            if parent_ids is None:
75
77
                continue
76
78
            diff = self.diffs.pop(key)
77
 
            sha1 = osutils.sha_strings(as_chunks)
 
79
            sha1 = osutils.sha_string(as_bytes)
78
80
            yield revision_id, parent_ids, sha1, diff
79
81
 
80
82
 
99
101
 
100
102
    def begin(self):
101
103
        """Start writing the bundle"""
102
 
        self._fileobj.write(bundle_serializer._get_bundle_header('4'))
103
 
        self._fileobj.write(b'#\n')
 
104
        self._fileobj.write(bundle_serializer._get_bundle_header(
 
105
            bundle_serializer.v4_string))
 
106
        self._fileobj.write('#\n')
104
107
        self._container.begin()
105
108
 
106
109
    def end(self):
120
123
        :revision_id: The revision id of the mpdiff being added.
121
124
        :file_id: The file-id of the file, or None for inventories.
122
125
        """
123
 
        metadata = {b'parents': parents,
124
 
                    b'storage_kind': b'mpdiff',
125
 
                    b'sha1': sha1}
 
126
        metadata = {'parents': parents,
 
127
                    'storage_kind': 'mpdiff',
 
128
                    'sha1': sha1}
126
129
        self._add_record(mp_bytes, metadata, repo_kind, revision_id, file_id)
127
130
 
128
131
    def add_fulltext_record(self, bytes, parents, repo_kind, revision_id):
134
137
            'signature'
135
138
        :revision_id: The revision id of the fulltext being added.
136
139
        """
137
 
        metadata = {b'parents': parents,
138
 
                    b'storage_kind': b'mpdiff'}
139
 
        self._add_record(bytes, {b'parents': parents,
140
 
                                 b'storage_kind': b'fulltext'}, repo_kind, revision_id, None)
 
140
        metadata = {'parents': parents,
 
141
                    'storage_kind': 'mpdiff'}
 
142
        self._add_record(bytes, {'parents': parents,
 
143
            'storage_kind': 'fulltext'}, repo_kind, revision_id, None)
141
144
 
142
 
    def add_info_record(self, kwargs):
 
145
    def add_info_record(self, **kwargs):
143
146
        """Add an info record to the bundle
144
147
 
145
148
        Any parameters may be supplied, except 'self' and 'storage_kind'.
146
149
        Values must be lists, strings, integers, dicts, or a combination.
147
150
        """
148
 
        kwargs[b'storage_kind'] = b'header'
 
151
        kwargs['storage_kind'] = 'header'
149
152
        self._add_record(None, kwargs, 'info', None, None)
150
153
 
151
154
    @staticmethod
152
155
    def encode_name(content_kind, revision_id, file_id=None):
153
156
        """Encode semantic ids as a container name"""
154
157
        if content_kind not in ('revision', 'file', 'inventory', 'signature',
155
 
                                'info'):
 
158
                'info'):
156
159
            raise ValueError(content_kind)
157
160
        if content_kind == 'file':
158
161
            if file_id is None:
165
168
                raise AssertionError()
166
169
        elif revision_id is None:
167
170
            raise AssertionError()
168
 
        names = [n.replace(b'/', b'//') for n in
169
 
                 (content_kind.encode('ascii'), revision_id, file_id) if n is not None]
170
 
        return b'/'.join(names)
 
171
        names = [n.replace('/', '//') for n in
 
172
                 (content_kind, revision_id, file_id) if n is not None]
 
173
        return '/'.join(names)
171
174
 
172
175
    def _add_record(self, bytes, metadata, repo_kind, revision_id, file_id):
173
176
        """Add a bundle record to the container.
178
181
        """
179
182
        name = self.encode_name(repo_kind, revision_id, file_id)
180
183
        encoded_metadata = bencode.bencode(metadata)
181
 
        self._container.add_bytes_record([encoded_metadata], len(encoded_metadata), [(name, )])
182
 
        if metadata[b'storage_kind'] != b'header':
183
 
            self._container.add_bytes_record([bytes], len(bytes), [])
 
184
        self._container.add_bytes_record(encoded_metadata, [(name, )])
 
185
        if metadata['storage_kind'] != 'header':
 
186
            self._container.add_bytes_record(bytes, [])
184
187
 
185
188
 
186
189
class BundleReader(object):
225
228
 
226
229
        :retval: content_kind, revision_id, file_id
227
230
        """
228
 
        segments = re.split(b'(//?)', name)
229
 
        names = [b'']
 
231
        segments = re.split('(//?)', name)
 
232
        names = ['']
230
233
        for segment in segments:
231
 
            if segment == b'//':
232
 
                names[-1] += b'/'
233
 
            elif segment == b'/':
234
 
                names.append(b'')
 
234
            if segment == '//':
 
235
                names[-1] += '/'
 
236
            elif segment == '/':
 
237
                names.append('')
235
238
            else:
236
239
                names[-1] += segment
237
240
        content_kind = names[0]
241
244
            revision_id = names[1]
242
245
        if len(names) > 2:
243
246
            file_id = names[2]
244
 
        return content_kind.decode('ascii'), revision_id, file_id
 
247
        return content_kind, revision_id, file_id
245
248
 
246
249
    def iter_records(self):
247
250
        """Iterate through bundle records
255
258
                raise errors.BadBundle('Record has %d names instead of 1'
256
259
                                       % len(names))
257
260
            metadata = bencode.bdecode(bytes)
258
 
            if metadata[b'storage_kind'] == b'header':
 
261
            if metadata['storage_kind'] == 'header':
259
262
                bytes = None
260
263
            else:
261
264
                _unused, bytes = next(iterator)
265
268
class BundleSerializerV4(bundle_serializer.BundleSerializer):
266
269
    """Implement the high-level bundle interface"""
267
270
 
 
271
    def write(self, repository, revision_ids, forced_bases, fileobj):
 
272
        """Write a bundle to a file-like object
 
273
 
 
274
        For backwards-compatibility only
 
275
        """
 
276
        write_op = BundleWriteOperation.from_old_args(repository, revision_ids,
 
277
                                                      forced_bases, fileobj)
 
278
        return write_op.do_write()
 
279
 
268
280
    def write_bundle(self, repository, target, base, fileobj):
269
281
        """Write a bundle to a file object
270
282
 
274
286
            at.
275
287
        :param fileobj: The file-like object to write to
276
288
        """
277
 
        write_op = BundleWriteOperation(base, target, repository, fileobj)
 
289
        write_op =  BundleWriteOperation(base, target, repository, fileobj)
278
290
        return write_op.do_write()
279
291
 
280
292
    def read(self, file):
285
297
    @staticmethod
286
298
    def get_source_serializer(info):
287
299
        """Retrieve the serializer for a given info object"""
288
 
        return serializer.format_registry.get(info[b'serializer'].decode('ascii'))
 
300
        return serializer.format_registry.get(info['serializer'])
289
301
 
290
302
 
291
303
class BundleWriteOperation(object):
292
304
    """Perform the operation of writing revisions to a bundle"""
293
305
 
 
306
    @classmethod
 
307
    def from_old_args(cls, repository, revision_ids, forced_bases, fileobj):
 
308
        """Create a BundleWriteOperation from old-style arguments"""
 
309
        base, target = cls.get_base_target(revision_ids, forced_bases,
 
310
                                           repository)
 
311
        return BundleWriteOperation(base, target, repository, fileobj,
 
312
                                    revision_ids)
 
313
 
294
314
    def __init__(self, base, target, repository, fileobj, revision_ids=None):
295
315
        self.base = base
296
316
        self.target = target
324
344
        serializer_format = self.repository.get_serializer_format()
325
345
        supports_rich_root = {True: 1, False: 0}[
326
346
            self.repository.supports_rich_root()]
327
 
        self.bundle.add_info_record({b'serializer': serializer_format,
328
 
                                     b'supports_rich_root': supports_rich_root})
 
347
        self.bundle.add_info_record(serializer=serializer_format,
 
348
                                    supports_rich_root=supports_rich_root)
329
349
 
330
350
    def write_files(self):
331
351
        """Write bundle records for all revisions of all files"""
332
352
        text_keys = []
333
353
        altered_fileids = self.repository.fileids_altered_by_revision_ids(
334
 
            self.revision_ids)
335
 
        for file_id, revision_ids in altered_fileids.items():
 
354
                self.revision_ids)
 
355
        for file_id, revision_ids in viewitems(altered_fileids):
336
356
            for revision_id in revision_ids:
337
357
                text_keys.append((file_id, revision_id))
338
358
        self._add_mp_records_keys('file', self.repository.texts, text_keys)
341
361
        """Write bundle records for all revisions and signatures"""
342
362
        inv_vf = self.repository.inventories
343
363
        topological_order = [key[-1] for key in multiparent.topo_iter_keys(
344
 
            inv_vf, self.revision_keys)]
 
364
                                inv_vf, self.revision_keys)]
345
365
        revision_order = topological_order
346
366
        if self.target is not None and self.target in self.revision_ids:
347
367
            # Make sure the target revision is always the last entry
365
385
        """Generate mpdiffs by serializing inventories.
366
386
 
367
387
        The current repository only has part of the tree shape information in
368
 
        the 'inventories' vf. So we use serializer.write_inventory_to_lines to
 
388
        the 'inventories' vf. So we use serializer.write_inventory_to_string to
369
389
        get a 'full' representation of the tree shape, and then generate
370
390
        mpdiffs on that data stream. This stream can then be reconstructed on
371
391
        the other side.
374
394
        generator = _MPDiffInventoryGenerator(self.repository,
375
395
                                              inventory_key_order)
376
396
        for revision_id, parent_ids, sha1, diff in generator.iter_diffs():
377
 
            text = b''.join(diff.to_patch())
 
397
            text = ''.join(diff.to_patch())
378
398
            self.bundle.add_multiparent_record(text, sha1, parent_ids,
379
399
                                               'inventory', revision_id, None)
380
400
 
381
401
    def _add_revision_texts(self, revision_order):
382
402
        parent_map = self.repository.get_parent_map(revision_order)
383
 
        revision_to_bytes = self.repository._serializer.write_revision_to_string
 
403
        revision_to_str = self.repository._serializer.write_revision_to_string
384
404
        revisions = self.repository.get_revisions(revision_order)
385
405
        for revision in revisions:
386
406
            revision_id = revision.revision_id
387
407
            parents = parent_map.get(revision_id, None)
388
 
            revision_text = revision_to_bytes(revision)
 
408
            revision_text = revision_to_str(revision)
389
409
            self.bundle.add_fulltext_record(revision_text, parents,
390
 
                                            'revision', revision_id)
 
410
                                       'revision', revision_id)
391
411
            try:
392
412
                self.bundle.add_fulltext_record(
393
413
                    self.repository.get_signature_text(
394
 
                        revision_id), parents, 'signature', revision_id)
 
414
                    revision_id), parents, 'signature', revision_id)
395
415
            except errors.NoSuchRevision:
396
416
                pass
397
417
 
419
439
        for mpdiff, item_key, in zip(mpdiffs, ordered_keys):
420
440
            sha1 = sha1s[item_key]
421
441
            parents = [key[-1] for key in parent_map[item_key]]
422
 
            text = b''.join(mpdiff.to_patch())
 
442
            text = ''.join(mpdiff.to_patch())
423
443
            # Infer file id records as appropriate.
424
444
            if len(item_key) == 2:
425
445
                file_id = item_key[0]
432
452
class BundleInfoV4(object):
433
453
 
434
454
    """Provide (most of) the BundleInfo interface"""
435
 
 
436
455
    def __init__(self, fileobj, serializer):
437
456
        self._fileobj = fileobj
438
457
        self._serializer = serializer
450
469
            all into memory at once.  Reading it into memory all at once is
451
470
            (currently) faster.
452
471
        """
453
 
        with repository.lock_write():
 
472
        repository.lock_write()
 
473
        try:
454
474
            ri = RevisionInstaller(self.get_bundle_reader(stream_input),
455
475
                                   self._serializer, repository)
456
476
            return ri.install()
 
477
        finally:
 
478
            repository.unlock()
457
479
 
458
480
    def get_merge_request(self, target_repo):
459
481
        """Provide data for performing a merge
477
499
            self.__real_revisions = []
478
500
            bundle_reader = self.get_bundle_reader()
479
501
            for bytes, metadata, repo_kind, revision_id, file_id in \
480
 
                    bundle_reader.iter_records():
 
502
                bundle_reader.iter_records():
481
503
                if repo_kind == 'info':
482
504
                    serializer =\
483
505
                        self._serializer.get_source_serializer(metadata)
517
539
 
518
540
        Must be called with the Repository locked.
519
541
        """
520
 
        with _mod_repository.WriteGroup(self._repository):
521
 
            return self._install_in_write_group()
 
542
        self._repository.start_write_group()
 
543
        try:
 
544
            result = self._install_in_write_group()
 
545
        except:
 
546
            self._repository.abort_write_group()
 
547
            raise
 
548
        self._repository.commit_write_group()
 
549
        return result
522
550
 
523
551
    def _install_in_write_group(self):
524
552
        current_file = None
529
557
        added_inv = set()
530
558
        target_revision = None
531
559
        for bytes, metadata, repo_kind, revision_id, file_id in\
532
 
                self._container.iter_records():
 
560
            self._container.iter_records():
533
561
            if repo_kind == 'info':
534
562
                if self._info is not None:
535
563
                    raise AssertionError()
536
564
                self._handle_info(metadata)
537
565
            if (pending_file_records and
538
 
                    (repo_kind, file_id) != ('file', current_file)):
 
566
                (repo_kind, file_id) != ('file', current_file)):
539
567
                # Flush the data for a single file - prevents memory
540
568
                # spiking due to buffering all files in memory.
541
569
                self._install_mp_records_keys(self._repository.texts,
542
 
                                              pending_file_records)
 
570
                    pending_file_records)
543
571
                current_file = None
544
572
                del pending_file_records[:]
545
573
            if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
546
574
                self._install_inventory_records(pending_inventory_records)
547
575
                pending_inventory_records = []
548
576
            if repo_kind == 'inventory':
549
 
                pending_inventory_records.append(
550
 
                    ((revision_id,), metadata, bytes))
 
577
                pending_inventory_records.append(((revision_id,), metadata, bytes))
551
578
            if repo_kind == 'revision':
552
579
                target_revision = revision_id
553
580
                self._install_revision(revision_id, metadata, bytes)
555
582
                self._install_signature(revision_id, metadata, bytes)
556
583
            if repo_kind == 'file':
557
584
                current_file = file_id
558
 
                pending_file_records.append(
559
 
                    ((file_id, revision_id), metadata, bytes))
560
 
        self._install_mp_records_keys(
561
 
            self._repository.texts, pending_file_records)
 
585
                pending_file_records.append(((file_id, revision_id), metadata, bytes))
 
586
        self._install_mp_records_keys(self._repository.texts, pending_file_records)
562
587
        return target_revision
563
588
 
564
589
    def _handle_info(self, info):
565
590
        """Extract data from an info record"""
566
591
        self._info = info
567
592
        self._source_serializer = self._serializer.get_source_serializer(info)
568
 
        if (info[b'supports_rich_root'] == 0 and
569
 
                self._repository.supports_rich_root()):
 
593
        if (info['supports_rich_root'] == 0 and
 
594
            self._repository.supports_rich_root()):
570
595
            self.update_root = True
571
596
        else:
572
597
            self.update_root = False
592
617
                prefix = key[:1]
593
618
            else:
594
619
                prefix = ()
595
 
            parents = [prefix + (parent,) for parent in meta[b'parents']]
596
 
            vf_records.append((key, parents, meta[b'sha1'], d_func(text)))
 
620
            parents = [prefix + (parent,) for parent in meta['parents']]
 
621
            vf_records.append((key, parents, meta['sha1'], d_func(text)))
597
622
        versionedfile.add_mpdiffs(vf_records)
598
623
 
599
624
    def _get_parent_inventory_texts(self, inventory_text_cache,
615
640
            # installed yet.)
616
641
            parent_keys = [(r,) for r in remaining_parent_ids]
617
642
            present_parent_map = self._repository.inventories.get_parent_map(
618
 
                parent_keys)
 
643
                                        parent_keys)
619
644
            present_parent_ids = []
620
645
            ghosts = set()
621
646
            for p_id in remaining_parent_ids:
623
648
                    present_parent_ids.append(p_id)
624
649
                else:
625
650
                    ghosts.add(p_id)
626
 
            to_lines = self._source_serializer.write_inventory_to_chunks
 
651
            to_string = self._source_serializer.write_inventory_to_string
627
652
            for parent_inv in self._repository.iter_inventories(
628
 
                    present_parent_ids):
629
 
                p_text = b''.join(to_lines(parent_inv))
 
653
                                    present_parent_ids):
 
654
                p_text = to_string(parent_inv)
630
655
                inventory_cache[parent_inv.revision_id] = parent_inv
631
656
                cached_parent_texts[parent_inv.revision_id] = p_text
632
657
                inventory_text_cache[parent_inv.revision_id] = p_text
633
658
 
634
659
        parent_texts = [cached_parent_texts[parent_id]
635
660
                        for parent_id in parent_ids
636
 
                        if parent_id not in ghosts]
 
661
                         if parent_id not in ghosts]
637
662
        return parent_texts
638
663
 
639
664
    def _install_inventory_records(self, records):
640
 
        if (self._info[b'serializer'] == self._repository._serializer.format_num
641
 
                and self._repository._serializer.support_altered_by_hack):
 
665
        if (self._info['serializer'] == self._repository._serializer.format_num
 
666
            and self._repository._serializer.support_altered_by_hack):
642
667
            return self._install_mp_records_keys(self._repository.inventories,
643
 
                                                 records)
 
668
                records)
644
669
        # Use a 10MB text cache, since these are string xml inventories. Note
645
670
        # that 10MB is fairly small for large projects (a single inventory can
646
671
        # be >5MB). Another possibility is to cache 10-20 inventory texts
647
672
        # instead
648
 
        inventory_text_cache = lru_cache.LRUSizeCache(10 * 1024 * 1024)
 
673
        inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
649
674
        # Also cache the in-memory representation. This allows us to create
650
675
        # inventory deltas to apply rather than calling add_inventory from
651
676
        # scratch each time.
652
677
        inventory_cache = lru_cache.LRUCache(10)
653
 
        with ui.ui_factory.nested_progress_bar() as pb:
 
678
        pb = ui.ui_factory.nested_progress_bar()
 
679
        try:
654
680
            num_records = len(records)
655
681
            for idx, (key, metadata, bytes) in enumerate(records):
656
682
                pb.update('installing inventory', idx, num_records)
657
683
                revision_id = key[-1]
658
 
                parent_ids = metadata[b'parents']
 
684
                parent_ids = metadata['parents']
659
685
                # Note: This assumes the local ghosts are identical to the
660
686
                #       ghosts in the source, as the Bundle serialization
661
687
                #       format doesn't record ghosts.
666
692
                # it would have to cast to a list of lines, which we get back
667
693
                # as lines and then cast back to a string.
668
694
                target_lines = multiparent.MultiParent.from_patch(bytes
669
 
                                                                  ).to_lines(p_texts)
670
 
                sha1 = osutils.sha_strings(target_lines)
671
 
                if sha1 != metadata[b'sha1']:
 
695
                            ).to_lines(p_texts)
 
696
                inv_text = ''.join(target_lines)
 
697
                del target_lines
 
698
                sha1 = osutils.sha_string(inv_text)
 
699
                if sha1 != metadata['sha1']:
672
700
                    raise errors.BadBundle("Can't convert to target format")
673
701
                # Add this to the cache so we don't have to extract it again.
674
 
                inventory_text_cache[revision_id] = b''.join(target_lines)
675
 
                target_inv = self._source_serializer.read_inventory_from_lines(
676
 
                    target_lines)
677
 
                del target_lines
 
702
                inventory_text_cache[revision_id] = inv_text
 
703
                target_inv = self._source_serializer.read_inventory_from_string(
 
704
                    inv_text)
678
705
                self._handle_root(target_inv, parent_ids)
679
706
                parent_inv = None
680
707
                if parent_ids:
686
713
                    else:
687
714
                        delta = target_inv._make_delta(parent_inv)
688
715
                        self._repository.add_inventory_by_delta(parent_ids[0],
689
 
                                                                delta, revision_id, parent_ids)
690
 
                except serializer.UnsupportedInventoryKind:
 
716
                            delta, revision_id, parent_ids)
 
717
                except errors.UnsupportedInventoryKind:
691
718
                    raise errors.IncompatibleRevision(repr(self._repository))
692
719
                inventory_cache[revision_id] = target_inv
 
720
        finally:
 
721
            pb.finished()
693
722
 
694
723
    def _handle_root(self, target_inv, parent_ids):
695
724
        revision_id = target_inv.revision_id
696
725
        if self.update_root:
697
726
            text_key = (target_inv.root.file_id, revision_id)
698
727
            parent_keys = [(target_inv.root.file_id, parent) for
699
 
                           parent in parent_ids]
 
728
                parent in parent_ids]
700
729
            self._repository.texts.add_lines(text_key, parent_keys, [])
701
730
        elif not self._repository.supports_rich_root():
702
731
            if target_inv.root.revision != revision_id: