/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/bzr/vf_repository.py

  • Committer: Jelmer Vernooij
  • Date: 2018-05-06 11:48:54 UTC
  • mto: This revision was merged to the branch mainline in revision 6960.
  • Revision ID: jelmer@jelmer.uk-20180506114854-h4qd9ojaqy8wxjsd
Move .mailmap to root.

Show diffs side-by-side

added added

removed removed

Lines of Context:
26
26
from breezy import (
27
27
    config as _mod_config,
28
28
    debug,
 
29
    fetch as _mod_fetch,
29
30
    fifo_cache,
30
31
    gpg,
31
32
    graph,
32
33
    lru_cache,
33
34
    osutils,
34
35
    revision as _mod_revision,
 
36
    serializer as _mod_serializer,
35
37
    static_tuple,
36
38
    tsort,
37
39
    ui,
38
40
    )
39
41
from breezy.bzr import (
40
 
    fetch as _mod_fetch,
41
42
    check,
42
 
    generate_ids,
43
43
    inventory_delta,
44
44
    inventorytree,
45
45
    versionedfile,
46
46
    vf_search,
47
47
    )
48
 
from breezy.bzr.bundle import serializer
49
48
 
50
49
from breezy.recordcounter import RecordCounter
 
50
from breezy.testament import Testament
51
51
from breezy.i18n import gettext
52
 
from breezy.bzr.testament import Testament
53
52
""")
54
53
 
55
54
from .. import (
60
59
    )
61
60
from .inventory import (
62
61
    Inventory,
 
62
    InventoryDirectory,
63
63
    ROOT_ID,
64
64
    entry_factory,
65
65
    )
66
66
 
67
67
from ..repository import (
68
68
    CommitBuilder,
69
 
    FetchResult,
70
69
    InterRepository,
71
70
    Repository,
72
71
    RepositoryFormat,
73
 
    WriteGroup,
74
72
    )
75
73
from .repository import (
76
74
    MetaDirRepository,
87
85
from ..trace import (
88
86
    mutter
89
87
    )
90
 
from ..tree import TreeChange
91
88
 
92
89
 
93
90
class VersionedFileRepositoryFormat(RepositoryFormat):
114
111
    """Commit builder implementation for versioned files based repositories.
115
112
    """
116
113
 
 
114
    # the default CommitBuilder does not manage trees whose root is versioned.
 
115
    _versioned_root = False
 
116
 
117
117
    def __init__(self, repository, parents, config_stack, timestamp=None,
118
118
                 timezone=None, committer=None, revprops=None,
119
119
                 revision_id=None, lossy=False):
120
120
        super(VersionedFileCommitBuilder, self).__init__(repository,
121
 
                                                         parents, config_stack, timestamp, timezone, committer, revprops,
122
 
                                                         revision_id, lossy)
 
121
            parents, config_stack, timestamp, timezone, committer, revprops,
 
122
            revision_id, lossy)
123
123
        try:
124
124
            basis_id = self.parents[0]
125
125
        except IndexError:
153
153
            return
154
154
        if not self.repository._format.supports_chks:
155
155
            raise errors.BzrError("Cannot commit directly to a stacked branch"
156
 
                                  " in pre-2a formats. See "
157
 
                                  "https://bugs.launchpad.net/bzr/+bug/375013 for details.")
 
156
                " in pre-2a formats. See "
 
157
                "https://bugs.launchpad.net/bzr/+bug/375013 for details.")
158
158
        # This is a stacked repo, we need to make sure we have the parent
159
159
        # inventories for the parents.
160
160
        parent_keys = [(p,) for p in self.parents]
161
 
        parent_map = self.repository.inventories._index.get_parent_map(
162
 
            parent_keys)
 
161
        parent_map = self.repository.inventories._index.get_parent_map(parent_keys)
163
162
        missing_parent_keys = {pk for pk in parent_keys
164
 
                               if pk not in parent_map}
 
163
                                       if pk not in parent_map}
165
164
        fallback_repos = list(reversed(self.repository._fallback_repositories))
166
165
        missing_keys = [('inventories', pk[0])
167
166
                        for pk in missing_parent_keys]
170
169
            fallback_repo = fallback_repos.pop()
171
170
            source = fallback_repo._get_source(self.repository._format)
172
171
            sink = self.repository._get_sink()
173
 
            missing_keys = sink.insert_missing_keys(source, missing_keys)
 
172
            stream = source.get_stream_for_missing_keys(missing_keys)
 
173
            missing_keys = sink.insert_stream_without_locking(stream,
 
174
                self.repository._format)
174
175
        if missing_keys:
175
176
            raise errors.BzrError('Unable to fill in parent inventories for a'
176
177
                                  ' stacked branch')
182
183
        """
183
184
        self._validate_unicode_text(message, 'commit message')
184
185
        rev = _mod_revision.Revision(
185
 
            timestamp=self._timestamp,
186
 
            timezone=self._timezone,
187
 
            committer=self._committer,
188
 
            message=message,
189
 
            inventory_sha1=self.inv_sha1,
190
 
            revision_id=self._new_revision_id,
191
 
            properties=self._revprops)
 
186
                       timestamp=self._timestamp,
 
187
                       timezone=self._timezone,
 
188
                       committer=self._committer,
 
189
                       message=message,
 
190
                       inventory_sha1=self.inv_sha1,
 
191
                       revision_id=self._new_revision_id,
 
192
                       properties=self._revprops)
192
193
        rev.parent_ids = self.parents
193
194
        if self._config_stack.get('create_signatures') == _mod_config.SIGN_ALWAYS:
194
195
            testament = Testament(rev, self.revision_tree())
219
220
            self._new_inventory = self.repository.get_inventory(
220
221
                self._new_revision_id)
221
222
        return inventorytree.InventoryRevisionTree(self.repository,
222
 
                                                   self._new_inventory, self._new_revision_id)
 
223
            self._new_inventory, self._new_revision_id)
223
224
 
224
225
    def finish_inventory(self):
225
226
        """Tell the builder that the inventory is finished.
235
236
            self.parents)
236
237
        return self._new_revision_id
237
238
 
238
 
    def _gen_revision_id(self):
239
 
        """Return new revision-id."""
240
 
        return generate_ids.gen_revision_id(self._committer, self._timestamp)
241
 
 
242
239
    def _require_root_change(self, tree):
243
240
        """Enforce an appropriate root object change.
244
241
 
247
244
 
248
245
        :param tree: The tree which is being committed.
249
246
        """
250
 
        if self.repository.supports_rich_root():
251
 
            return
252
247
        if len(self.parents) == 0:
253
248
            raise errors.RootMissing()
254
249
        entry = entry_factory['directory'](tree.path2id(''), '',
255
 
                                           None)
 
250
            None)
256
251
        entry.revision = self._new_revision_id
257
252
        self._basis_delta.append(('', '', entry.file_id, entry))
258
253
 
290
285
        return self._basis_delta
291
286
 
292
287
    def record_iter_changes(self, tree, basis_revision_id, iter_changes,
293
 
                            _entry_factory=entry_factory):
 
288
        _entry_factory=entry_factory):
294
289
        """Record a new tree via iter_changes.
295
290
 
296
291
        :param tree: The tree to obtain text contents from for changed objects.
303
298
            or errored-on before record_iter_changes sees the item.
304
299
        :param _entry_factory: Private method to bind entry_factory locally for
305
300
            performance.
306
 
        :return: A generator of (relpath, fs_hash) tuples for use with
 
301
        :return: A generator of (file_id, relpath, fs_hash) tuples for use with
307
302
            tree._observed_sha1.
308
303
        """
309
304
        # Create an inventory delta based on deltas between all the parents and
310
 
        # deltas between all the parent inventories. We use inventory delta's
 
305
        # deltas between all the parent inventories. We use inventory delta's 
311
306
        # between the inventory objects because iter_changes masks
312
307
        # last-changed-field only changes.
313
308
        # Working data:
332
327
                        ghost_basis = True
333
328
                    revtrees.append(self.repository.revision_tree(
334
329
                        _mod_revision.NULL_REVISION))
335
 
        # The basis inventory from a repository
 
330
        # The basis inventory from a repository 
336
331
        if revtrees:
337
332
            basis_tree = revtrees[0]
338
333
        else:
359
354
                            parent_entries[change[2]] = {
360
355
                                # basis parent
361
356
                                basis_entry.revision: basis_entry,
362
 
                                # this parent
 
357
                                # this parent 
363
358
                                change[3].revision: change[3],
364
359
                                }
365
360
                        else:
366
361
                            merged_ids[change[2]] = [change[3].revision]
367
 
                            parent_entries[change[2]] = {
368
 
                                change[3].revision: change[3]}
 
362
                            parent_entries[change[2]] = {change[3].revision:change[3]}
369
363
                    else:
370
364
                        merged_ids[change[2]].append(change[3].revision)
371
 
                        parent_entries[change[2]
372
 
                                       ][change[3].revision] = change[3]
 
365
                        parent_entries[change[2]][change[3].revision] = change[3]
373
366
        else:
374
367
            merged_ids = {}
375
368
        # Setup the changes from the tree:
376
369
        # changes maps file_id -> (change, [parent revision_ids])
377
 
        changes = {}
 
370
        changes= {}
378
371
        for change in iter_changes:
379
372
            # This probably looks up in basis_inv way to much.
380
 
            if change.path[0] is not None:
381
 
                head_candidate = [basis_inv.get_entry(change.file_id).revision]
 
373
            if change[1][0] is not None:
 
374
                head_candidate = [basis_inv.get_entry(change[0]).revision]
382
375
            else:
383
376
                head_candidate = []
384
 
            changes[change.file_id] = change, merged_ids.get(
385
 
                change.file_id, head_candidate)
 
377
            changes[change[0]] = change, merged_ids.get(change[0],
 
378
                head_candidate)
386
379
        unchanged_merged = set(merged_ids) - set(changes)
387
380
        # Extend the changes dict with synthetic changes to record merges of
388
381
        # texts.
406
399
                # by the user. So we discard this change.
407
400
                pass
408
401
            else:
409
 
                change = TreeChange(
410
 
                    file_id,
 
402
                change = (file_id,
411
403
                    (basis_inv.id2path(file_id), tree.id2path(file_id)),
412
404
                    False, (True, True),
413
405
                    (basis_entry.parent_id, basis_entry.parent_id),
419
411
        # candidates for the file.
420
412
        # inv delta is:
421
413
        # old_path, new_path, file_id, new_inventory_entry
422
 
        seen_root = False  # Is the root in the basis delta?
 
414
        seen_root = False # Is the root in the basis delta?
423
415
        inv_delta = self._basis_delta
424
416
        modified_rev = self._new_revision_id
425
417
        for change, head_candidates in viewvalues(changes):
426
 
            if change.versioned[1]:  # versioned in target.
 
418
            if change[3][1]: # versioned in target.
427
419
                # Several things may be happening here:
428
420
                # We may have a fork in the per-file graph
429
421
                #  - record a change with the content from tree
431
423
                #  - carry over the tree that hasn't changed
432
424
                # We may have a change against all trees
433
425
                #  - record the change with the content from tree
434
 
                kind = change.kind[1]
435
 
                file_id = change.file_id
436
 
                entry = _entry_factory[kind](file_id, change.name[1],
437
 
                                             change.parent_id[1])
438
 
                head_set = self._heads(change.file_id, set(head_candidates))
 
426
                kind = change[6][1]
 
427
                file_id = change[0]
 
428
                entry = _entry_factory[kind](file_id, change[5][1],
 
429
                    change[4][1])
 
430
                head_set = self._heads(change[0], set(head_candidates))
439
431
                heads = []
440
432
                # Preserve ordering.
441
433
                for head_candidate in head_candidates:
461
453
                        # we need to check the content against the source of the
462
454
                        # merge to determine if it was changed after the merge
463
455
                        # or carried over.
464
 
                        if (parent_entry.kind != entry.kind
465
 
                            or parent_entry.parent_id != entry.parent_id
466
 
                                or parent_entry.name != entry.name):
 
456
                        if (parent_entry.kind != entry.kind or
 
457
                            parent_entry.parent_id != entry.parent_id or
 
458
                            parent_entry.name != entry.name):
467
459
                            # Metadata common to all entries has changed
468
460
                            # against per-file parent
469
461
                            carry_over_possible = False
476
468
                    carry_over_possible = False
477
469
                # Populate the entry in the delta
478
470
                if kind == 'file':
479
 
                    # XXX: There is still a small race here: If someone reverts
480
 
                    # the content of a file after iter_changes examines and
481
 
                    # decides it has changed, we will unconditionally record a
482
 
                    # new version even if some other process reverts it while
483
 
                    # commit is running (with the revert happening after
484
 
                    # iter_changes did its examination).
485
 
                    if change.executable[1]:
 
471
                    # XXX: There is still a small race here: If someone reverts the content of a file
 
472
                    # after iter_changes examines and decides it has changed,
 
473
                    # we will unconditionally record a new version even if some
 
474
                    # other process reverts it while commit is running (with
 
475
                    # the revert happening after iter_changes did its
 
476
                    # examination).
 
477
                    if change[7][1]:
486
478
                        entry.executable = True
487
479
                    else:
488
480
                        entry.executable = False
489
 
                    if (carry_over_possible
490
 
                            and parent_entry.executable == entry.executable):
491
 
                        # Check the file length, content hash after reading
492
 
                        # the file.
493
 
                        nostore_sha = parent_entry.text_sha1
 
481
                    if (carry_over_possible and
 
482
                        parent_entry.executable == entry.executable):
 
483
                            # Check the file length, content hash after reading
 
484
                            # the file.
 
485
                            nostore_sha = parent_entry.text_sha1
494
486
                    else:
495
487
                        nostore_sha = None
496
 
                    file_obj, stat_value = tree.get_file_with_stat(change.path[1])
 
488
                    file_obj, stat_value = tree.get_file_with_stat(change[1][1], file_id)
497
489
                    try:
498
 
                        entry.text_sha1, entry.text_size = self._add_file_to_weave(
499
 
                            file_id, file_obj, heads, nostore_sha,
500
 
                            size=(stat_value.st_size if stat_value else None))
501
 
                        yield change.path[1], (entry.text_sha1, stat_value)
 
490
                        entry.text_sha1, entry.text_size = self._add_lines_to_weave(
 
491
                            file_id, file_obj.readlines(), heads, nostore_sha)
 
492
                        yield file_id, change[1][1], (entry.text_sha1, stat_value)
502
493
                    except errors.ExistingContent:
503
494
                        # No content change against a carry_over parent
504
495
                        # Perhaps this should also yield a fs hash update?
509
500
                        file_obj.close()
510
501
                elif kind == 'symlink':
511
502
                    # Wants a path hint?
512
 
                    entry.symlink_target = tree.get_symlink_target(
513
 
                        change.path[1])
 
503
                    entry.symlink_target = tree.get_symlink_target(change[1][1], file_id)
514
504
                    if (carry_over_possible and
515
 
                            parent_entry.symlink_target ==
516
 
                            entry.symlink_target):
 
505
                        parent_entry.symlink_target == entry.symlink_target):
517
506
                        carried_over = True
518
507
                    else:
519
 
                        self._add_file_to_weave(
520
 
                            change.file_id, BytesIO(), heads, None, size=0)
 
508
                        self._add_lines_to_weave(change[0], [], heads, None)
521
509
                elif kind == 'directory':
522
510
                    if carry_over_possible:
523
511
                        carried_over = True
524
512
                    else:
525
513
                        # Nothing to set on the entry.
526
514
                        # XXX: split into the Root and nonRoot versions.
527
 
                        if change.path[1] != '' or self.repository.supports_rich_root():
528
 
                            self._add_file_to_weave(
529
 
                                change.file_id, BytesIO(), heads, None, size=0)
 
515
                        if change[1][1] != '' or self.repository.supports_rich_root():
 
516
                            self._add_lines_to_weave(change[0], [], heads, None)
530
517
                elif kind == 'tree-reference':
531
518
                    if not self.repository._format.supports_tree_reference:
532
519
                        # This isn't quite sane as an error, but we shouldn't
533
520
                        # ever see this code path in practice: tree's don't
534
521
                        # permit references when the repo doesn't support tree
535
522
                        # references.
536
 
                        raise errors.UnsupportedOperation(
537
 
                            tree.add_reference, self.repository)
538
 
                    reference_revision = tree.get_reference_revision(
539
 
                        change.path[1])
 
523
                        raise errors.UnsupportedOperation(tree.add_reference,
 
524
                            self.repository)
 
525
                    reference_revision = tree.get_reference_revision(change[1][1], change[0])
540
526
                    entry.reference_revision = reference_revision
541
 
                    if (carry_over_possible
542
 
                            and parent_entry.reference_revision ==
543
 
                            reference_revision):
 
527
                    if (carry_over_possible and
 
528
                        parent_entry.reference_revision == reference_revision):
544
529
                        carried_over = True
545
530
                    else:
546
 
                        self._add_file_to_weave(
547
 
                            change.file_id, BytesIO(), heads, None, size=0)
 
531
                        self._add_lines_to_weave(change[0], [], heads, None)
548
532
                else:
549
533
                    raise AssertionError('unknown kind %r' % kind)
550
534
                if not carried_over:
553
537
                    entry.revision = parent_entry.revision
554
538
            else:
555
539
                entry = None
556
 
            new_path = change.path[1]
557
 
            inv_delta.append((change.path[0], new_path, change.file_id, entry))
 
540
            new_path = change[1][1]
 
541
            inv_delta.append((change[1][0], new_path, change[0], entry))
558
542
            if new_path == '':
559
543
                seen_root = True
560
544
        # The initial commit adds a root directory, but this in itself is not
561
545
        # a worthwhile commit.
562
 
        if ((len(inv_delta) > 0 and basis_revision_id != _mod_revision.NULL_REVISION)
563
 
                or (len(inv_delta) > 1 and basis_revision_id == _mod_revision.NULL_REVISION)):
 
546
        if ((len(inv_delta) > 0 and basis_revision_id != _mod_revision.NULL_REVISION) or
 
547
            (len(inv_delta) > 1 and basis_revision_id == _mod_revision.NULL_REVISION)):
564
548
            # This should perhaps be guarded by a check that the basis we
565
549
            # commit against is the basis for the commit and if not do a delta
566
550
            # against the basis.
570
554
            self._require_root_change(tree)
571
555
        self.basis_delta_revision = basis_revision_id
572
556
 
573
 
    def _add_file_to_weave(self, file_id, fileobj, parents, nostore_sha, size):
 
557
    def _add_lines_to_weave(self, file_id, lines, parents, nostore_sha):
574
558
        parent_keys = tuple([(file_id, parent) for parent in parents])
575
 
        return self.repository.texts.add_content(
576
 
            versionedfile.FileContentFactory(
577
 
                (file_id, self._new_revision_id), parent_keys, fileobj, size=size),
 
559
        return self.repository.texts.add_lines(
 
560
            (file_id, self._new_revision_id), parent_keys, lines,
578
561
            nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
579
562
 
580
563
 
 
564
class VersionedFileRootCommitBuilder(VersionedFileCommitBuilder):
 
565
    """This commitbuilder actually records the root id"""
 
566
 
 
567
    # the root entry gets versioned properly by this builder.
 
568
    _versioned_root = True
 
569
 
 
570
    def _require_root_change(self, tree):
 
571
        """Enforce an appropriate root object change.
 
572
 
 
573
        This is called once when record_iter_changes is called, if and only if
 
574
        the root was not in the delta calculated by record_iter_changes.
 
575
 
 
576
        :param tree: The tree which is being committed.
 
577
        """
 
578
        # versioned roots do not change unless the tree found a change.
 
579
 
 
580
 
581
581
class VersionedFileRepository(Repository):
582
582
    """Repository holding history for one or more branches.
583
583
 
658
658
        if self.chk_bytes is not None:
659
659
            self.chk_bytes.add_fallback_versioned_files(repository.chk_bytes)
660
660
 
661
 
    def create_bundle(self, target, base, fileobj, format=None):
662
 
        return serializer.write_bundle(self, target, base, fileobj, format)
663
 
 
664
661
    @only_raises(errors.LockNotHeld, errors.LockBroken)
665
662
    def unlock(self):
666
663
        super(VersionedFileRepository, self).unlock()
697
694
        """
698
695
        inv_lines = self._serializer.write_inventory_to_lines(inv)
699
696
        return self._inventory_add_lines(revision_id, parents,
700
 
                                         inv_lines, check_content=False)
 
697
            inv_lines, check_content=False)
701
698
 
702
699
    def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
703
700
                               parents, basis_inv=None, propagate_caches=False):
744
741
                    basis_inv)
745
742
 
746
743
    def _inventory_add_lines(self, revision_id, parents, lines,
747
 
                             check_content=True):
 
744
        check_content=True):
748
745
        """Store lines in inv_vf and return the sha1 of the inventory."""
749
746
        parents = [(parent,) for parent in parents]
750
747
        result = self.inventories.add_lines((revision_id,), parents, lines,
751
 
                                            check_content=check_content)[0]
 
748
            check_content=check_content)[0]
752
749
        self.inventories._access.flush()
753
750
        return result
754
751
 
778
775
        self._add_revision(rev)
779
776
 
780
777
    def _add_revision(self, revision):
781
 
        lines = self._serializer.write_revision_to_lines(revision)
 
778
        text = self._serializer.write_revision_to_string(revision)
782
779
        key = (revision.revision_id,)
783
780
        parents = tuple((parent,) for parent in revision.parent_ids)
784
 
        self.revisions.add_lines(key, parents, lines)
 
781
        self.revisions.add_lines(key, parents, osutils.split_lines(text))
785
782
 
786
783
    def _check_inventories(self, checker):
787
784
        """Check the inventories found from the revision scan.
788
 
 
 
785
        
789
786
        This is responsible for verifying the sha1 of inventories and
790
787
        creating a pending_keys set that covers data referenced by inventories.
791
788
        """
795
792
    def _do_check_inventories(self, checker, bar):
796
793
        """Helper for _check_inventories."""
797
794
        revno = 0
798
 
        keys = {'chk_bytes': set(), 'inventories': set(), 'texts': set()}
 
795
        keys = {'chk_bytes':set(), 'inventories':set(), 'texts':set()}
799
796
        kinds = ['chk_bytes', 'texts']
800
797
        count = len(checker.pending_keys)
801
798
        bar.update(gettext("inventories"), 0, 2)
819
816
                        'Missing inventory {%s}' % (record.key,))
820
817
                else:
821
818
                    last_object = self._check_record('inventories', record,
822
 
                                                     checker, last_object,
823
 
                                                     current_keys[('inventories',) + record.key])
 
819
                        checker, last_object,
 
820
                        current_keys[('inventories',) + record.key])
824
821
            del keys['inventories']
825
822
        else:
826
823
            return
827
824
        bar.update(gettext("texts"), 1)
828
 
        while (checker.pending_keys or keys['chk_bytes'] or
829
 
               keys['texts']):
 
825
        while (checker.pending_keys or keys['chk_bytes']
 
826
            or keys['texts']):
830
827
            # Something to check.
831
828
            current_keys = checker.pending_keys
832
829
            checker.pending_keys = {}
833
830
            # Accumulate current checks.
834
831
            for key in current_keys:
835
832
                if key[0] not in kinds:
836
 
                    checker._report_items.append(
837
 
                        'unknown key type %r' % (key,))
 
833
                    checker._report_items.append('unknown key type %r' % (key,))
838
834
                keys[key[0]].add(key[1:])
839
835
            # Check the outermost kind only - inventories || chk_bytes || texts
840
836
            for kind in kinds:
846
842
                                'Missing %s {%s}' % (kind, record.key,))
847
843
                        else:
848
844
                            last_object = self._check_record(kind, record,
849
 
                                                             checker, last_object, current_keys[(kind,) + record.key])
 
845
                                checker, last_object, current_keys[(kind,) + record.key])
850
846
                    keys[kind] = set()
851
847
                    break
852
848
 
854
850
        """Check a single text from this repository."""
855
851
        if kind == 'inventories':
856
852
            rev_id = record.key[0]
857
 
            inv = self._deserialise_inventory(
858
 
                rev_id, record.get_bytes_as('lines'))
 
853
            inv = self._deserialise_inventory(rev_id,
 
854
                record.get_bytes_as('fulltext'))
859
855
            if last_object is not None:
860
856
                delta = inv._make_delta(last_object)
861
857
                for old_path, path, file_id, ie in delta:
881
877
        """Check a single text."""
882
878
        # Check it is extractable.
883
879
        # TODO: check length.
884
 
        chunks = record.get_bytes_as('chunked')
885
 
        sha1 = osutils.sha_strings(chunks)
886
 
        length = sum(map(len, chunks))
 
880
        if record.storage_kind == 'chunked':
 
881
            chunks = record.get_bytes_as(record.storage_kind)
 
882
            sha1 = osutils.sha_strings(chunks)
 
883
            length = sum(map(len, chunks))
 
884
        else:
 
885
            content = record.get_bytes_as('fulltext')
 
886
            sha1 = osutils.sha_string(content)
 
887
            length = len(content)
887
888
        if item_data and sha1 != item_data[1]:
888
889
            checker._report_items.append(
889
890
                'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
912
913
        # getting file texts, inventories and revisions, then
913
914
        # this construct will accept instances of those things.
914
915
        super(VersionedFileRepository, self).__init__(_format, a_controldir,
915
 
                                                      control_files)
 
916
            control_files)
916
917
        self._transport = control_files._transport
917
918
        self.base = self._transport.base
918
919
        # for tests
920
921
        self._reconcile_fixes_text_parents = False
921
922
        self._reconcile_backsup_inventory = True
922
923
        # An InventoryEntry cache, used during deserialization
923
 
        self._inventory_entry_cache = fifo_cache.FIFOCache(10 * 1024)
 
924
        self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)
924
925
        # Is it safe to return inventory entries directly from the entry cache,
925
926
        # rather copying them?
926
927
        self._safe_to_return_from_cache = False
927
928
 
928
929
    def fetch(self, source, revision_id=None, find_ghosts=False,
929
 
              fetch_spec=None, lossy=False):
 
930
            fetch_spec=None):
930
931
        """Fetch the content required to construct revision_id from source.
931
932
 
932
933
        If revision_id is None and fetch_spec is None, then all content is
956
957
        # fast path same-url fetch operations
957
958
        # TODO: lift out to somewhere common with RemoteRepository
958
959
        # <https://bugs.launchpad.net/bzr/+bug/401646>
959
 
        if (self.has_same_location(source) and
960
 
            fetch_spec is None and
961
 
                self._has_same_fallbacks(source)):
 
960
        if (self.has_same_location(source)
 
961
            and fetch_spec is None
 
962
            and self._has_same_fallbacks(source)):
962
963
            # check that last_revision is in 'from' and then return a
963
964
            # no-operation.
964
 
            if (revision_id is not None
965
 
                    and not _mod_revision.is_null(revision_id)):
 
965
            if (revision_id is not None and
 
966
                not _mod_revision.is_null(revision_id)):
966
967
                self.get_revision(revision_id)
967
 
            return FetchResult(0)
 
968
            return 0, []
968
969
        inter = InterRepository.get(source, self)
969
 
        if (fetch_spec is not None
970
 
                and not getattr(inter, "supports_fetch_spec", False)):
 
970
        if (fetch_spec is not None and
 
971
            not getattr(inter, "supports_fetch_spec", False)):
971
972
            raise errors.UnsupportedOperation(
972
973
                "fetch_spec not supported for %r" % inter)
973
974
        return inter.fetch(revision_id=revision_id,
974
 
                           find_ghosts=find_ghosts, fetch_spec=fetch_spec,
975
 
                           lossy=lossy)
 
975
            find_ghosts=find_ghosts, fetch_spec=fetch_spec)
976
976
 
977
977
    def gather_stats(self, revid=None, committers=None):
978
978
        """See Repository.gather_stats()."""
979
979
        with self.lock_read():
980
 
            result = super(VersionedFileRepository,
981
 
                           self).gather_stats(revid, committers)
 
980
            result = super(VersionedFileRepository, self).gather_stats(revid, committers)
982
981
            # now gather global repository information
983
982
            # XXX: This is available for many repos regardless of listability.
984
983
            if self.user_transport.listable():
1007
1006
        """
1008
1007
        if self._fallback_repositories and not self._format.supports_chks:
1009
1008
            raise errors.BzrError("Cannot commit directly to a stacked branch"
1010
 
                                  " in pre-2a formats. See "
1011
 
                                  "https://bugs.launchpad.net/bzr/+bug/375013 for details.")
 
1009
                " in pre-2a formats. See "
 
1010
                "https://bugs.launchpad.net/bzr/+bug/375013 for details.")
1012
1011
        result = self._commit_builder_class(self, parents, config_stack,
1013
 
                                            timestamp, timezone, committer, revprops, revision_id,
1014
 
                                            lossy)
 
1012
            timestamp, timezone, committer, revprops, revision_id,
 
1013
            lossy)
1015
1014
        self.start_write_group()
1016
1015
        return result
1017
1016
 
1109
1108
        with self.lock_read():
1110
1109
            for rev_id in revision_ids:
1111
1110
                if not rev_id or not isinstance(rev_id, bytes):
1112
 
                    raise errors.InvalidRevisionId(
1113
 
                        revision_id=rev_id, branch=self)
 
1111
                    raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1114
1112
            keys = [(key,) for key in revision_ids]
1115
1113
            stream = self.revisions.get_record_stream(keys, 'unordered', True)
1116
1114
            for record in stream:
1130
1128
        """
1131
1129
        with self.lock_write():
1132
1130
            self.signatures.add_lines((revision_id,), (),
1133
 
                                      osutils.split_lines(signature))
1134
 
 
1135
 
    def sign_revision(self, revision_id, gpg_strategy):
1136
 
        with self.lock_write():
1137
 
            testament = Testament.from_revision(
1138
 
                self, revision_id)
1139
 
            plaintext = testament.as_short_text()
1140
 
            self.store_revision_signature(gpg_strategy, plaintext, revision_id)
1141
 
 
1142
 
    def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
1143
 
        with self.lock_write():
1144
 
            signature = gpg_strategy.sign(plaintext, gpg.MODE_CLEAR)
1145
 
            self.add_signature_text(revision_id, signature)
1146
 
 
1147
 
    def verify_revision_signature(self, revision_id, gpg_strategy):
1148
 
        """Verify the signature on a revision.
1149
 
 
1150
 
        :param revision_id: the revision to verify
1151
 
        :gpg_strategy: the GPGStrategy object to used
1152
 
 
1153
 
        :return: gpg.SIGNATURE_VALID or a failed SIGNATURE_ value
1154
 
        """
1155
 
        with self.lock_read():
1156
 
            if not self.has_signature_for_revision_id(revision_id):
1157
 
                return gpg.SIGNATURE_NOT_SIGNED, None
1158
 
            signature = self.get_signature_text(revision_id)
1159
 
 
1160
 
            testament = Testament.from_revision(
1161
 
                self, revision_id)
1162
 
 
1163
 
            (status, key, signed_plaintext) = gpg_strategy.verify(signature)
1164
 
            if testament.as_short_text() != signed_plaintext:
1165
 
                return gpg.SIGNATURE_NOT_VALID, None
1166
 
            return (status, key)
 
1131
                osutils.split_lines(signature))
1167
1132
 
1168
1133
    def find_text_key_references(self):
1169
1134
        """Find the text key references within the repository.
1193
1158
        stream = self.inventories.get_record_stream(keys, 'unordered', True)
1194
1159
        for record in stream:
1195
1160
            if record.storage_kind != 'absent':
 
1161
                chunks = record.get_bytes_as('chunked')
1196
1162
                revid = record.key[-1]
1197
 
                for line in record.get_bytes_as('lines'):
 
1163
                lines = osutils.chunks_to_lines(chunks)
 
1164
                for line in lines:
1198
1165
                    yield line, revid
1199
1166
 
1200
1167
    def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1201
 
                                                revision_keys):
 
1168
        revision_keys):
1202
1169
        """Helper routine for fileids_altered_by_revision_ids.
1203
1170
 
1204
1171
        This performs the translation of xml lines to revision ids.
1282
1249
            yield text_keys[record.key], record.get_bytes_as('chunked')
1283
1250
 
1284
1251
    def _generate_text_key_index(self, text_key_references=None,
1285
 
                                 ancestors=None):
 
1252
        ancestors=None):
1286
1253
        """Generate a new text key index for the repository.
1287
1254
 
1288
1255
        This is an expensive function that will take considerable time to run.
1299
1266
            text_key_references = self.find_text_key_references()
1300
1267
        with ui.ui_factory.nested_progress_bar() as pb:
1301
1268
            return self._do_generate_text_key_index(ancestors,
1302
 
                                                    text_key_references, pb)
 
1269
                text_key_references, pb)
1303
1270
 
1304
1271
    def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1305
1272
        """Helper for _generate_text_key_index to avoid deep nesting."""
1328
1295
        # could gauge this by looking at available real memory etc, but this is
1329
1296
        # always a tricky proposition.
1330
1297
        inventory_cache = lru_cache.LRUCache(10)
1331
 
        batch_size = 10  # should be ~150MB on a 55K path tree
1332
 
        batch_count = len(revision_order) // batch_size + 1
 
1298
        batch_size = 10 # should be ~150MB on a 55K path tree
 
1299
        batch_count = len(revision_order) / batch_size + 1
1333
1300
        processed_texts = 0
1334
 
        pb.update(gettext("Calculating text parents"),
1335
 
                  processed_texts, text_count)
 
1301
        pb.update(gettext("Calculating text parents"), processed_texts, text_count)
1336
1302
        for offset in range(batch_count):
1337
 
            to_query = revision_order[offset * batch_size:(offset + 1)
1338
 
                                      * batch_size]
 
1303
            to_query = revision_order[offset * batch_size:(offset + 1) *
 
1304
                batch_size]
1339
1305
            if not to_query:
1340
1306
                break
1341
1307
            for revision_id in to_query:
1342
1308
                parent_ids = ancestors[revision_id]
1343
1309
                for text_key in revision_keys[revision_id]:
1344
 
                    pb.update(gettext("Calculating text parents"),
1345
 
                              processed_texts)
 
1310
                    pb.update(gettext("Calculating text parents"), processed_texts)
1346
1311
                    processed_texts += 1
1347
1312
                    candidate_parents = []
1348
1313
                    for parent_id in parent_ids:
1362
1327
                            try:
1363
1328
                                inv = inventory_cache[parent_id]
1364
1329
                            except KeyError:
1365
 
                                inv = self.revision_tree(
1366
 
                                    parent_id).root_inventory
 
1330
                                inv = self.revision_tree(parent_id).root_inventory
1367
1331
                                inventory_cache[parent_id] = inv
1368
1332
                            try:
1369
1333
                                parent_entry = inv.get_entry(text_key[0])
1379
1343
                                text_key_cache[parent_text_key])
1380
1344
                    parent_heads = text_graph.heads(candidate_parents)
1381
1345
                    new_parents = list(parent_heads)
1382
 
                    new_parents.sort(key=lambda x: candidate_parents.index(x))
 
1346
                    new_parents.sort(key=lambda x:candidate_parents.index(x))
1383
1347
                    if new_parents == []:
1384
1348
                        new_parents = [NULL_REVISION]
1385
1349
                    text_index[text_key] = new_parents
1458
1422
            buffering if necessary).
1459
1423
        :return: An iterator of inventories.
1460
1424
        """
1461
 
        if ((None in revision_ids) or
1462
 
                (_mod_revision.NULL_REVISION in revision_ids)):
 
1425
        if ((None in revision_ids)
 
1426
            or (_mod_revision.NULL_REVISION in revision_ids)):
1463
1427
            raise ValueError('cannot get null revision inventory')
1464
1428
        for inv, revid in self._iter_inventories(revision_ids, ordering):
1465
1429
            if inv is None:
1469
1433
    def _iter_inventories(self, revision_ids, ordering):
1470
1434
        """single-document based inventory iteration."""
1471
1435
        inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
1472
 
        for lines, revision_id in inv_xmls:
1473
 
            if lines is None:
 
1436
        for text, revision_id in inv_xmls:
 
1437
            if text is None:
1474
1438
                yield None, revision_id
1475
1439
            else:
1476
 
                yield self._deserialise_inventory(revision_id, lines), revision_id
 
1440
                yield self._deserialise_inventory(revision_id, text), revision_id
1477
1441
 
1478
1442
    def _iter_inventory_xmls(self, revision_ids, ordering):
1479
1443
        if ordering is None:
1488
1452
            key_iter = iter(keys)
1489
1453
            next_key = next(key_iter)
1490
1454
        stream = self.inventories.get_record_stream(keys, ordering, True)
1491
 
        text_lines = {}
 
1455
        text_chunks = {}
1492
1456
        for record in stream:
1493
1457
            if record.storage_kind != 'absent':
1494
 
                lines = record.get_bytes_as('lines')
 
1458
                chunks = record.get_bytes_as('chunked')
1495
1459
                if order_as_requested:
1496
 
                    text_lines[record.key] = lines
 
1460
                    text_chunks[record.key] = chunks
1497
1461
                else:
1498
 
                    yield lines, record.key[-1]
 
1462
                    yield ''.join(chunks), record.key[-1]
1499
1463
            else:
1500
1464
                yield None, record.key[-1]
1501
1465
            if order_as_requested:
1502
1466
                # Yield as many results as we can while preserving order.
1503
 
                while next_key in text_lines:
1504
 
                    lines = text_lines.pop(next_key)
1505
 
                    yield lines, next_key[-1]
 
1467
                while next_key in text_chunks:
 
1468
                    chunks = text_chunks.pop(next_key)
 
1469
                    yield ''.join(chunks), next_key[-1]
1506
1470
                    try:
1507
1471
                        next_key = next(key_iter)
1508
1472
                    except StopIteration:
1517
1481
        :param revision_id: The expected revision id of the inventory.
1518
1482
        :param xml: A serialised inventory.
1519
1483
        """
1520
 
        result = self._serializer.read_inventory_from_lines(
1521
 
            xml, revision_id, entry_cache=self._inventory_entry_cache,
1522
 
            return_from_cache=self._safe_to_return_from_cache)
 
1484
        result = self._serializer.read_inventory_from_string(xml, revision_id,
 
1485
                    entry_cache=self._inventory_entry_cache,
 
1486
                    return_from_cache=self._safe_to_return_from_cache)
1523
1487
        if result.revision_id != revision_id:
1524
1488
            raise AssertionError('revision id mismatch %s != %s' % (
1525
1489
                result.revision_id, revision_id))
1532
1496
        """Get serialized inventory as a string."""
1533
1497
        with self.lock_read():
1534
1498
            texts = self._iter_inventory_xmls([revision_id], 'unordered')
1535
 
            lines, revision_id = next(texts)
1536
 
            if lines is None:
 
1499
            text, revision_id = next(texts)
 
1500
            if text is None:
1537
1501
                raise errors.NoSuchRevision(self, revision_id)
1538
 
            return lines
 
1502
            return text
1539
1503
 
1540
1504
    def revision_tree(self, revision_id):
1541
1505
        """Return Tree for a revision on this branch.
1547
1511
        # so we don't need to read it in twice.
1548
1512
        if revision_id == _mod_revision.NULL_REVISION:
1549
1513
            return inventorytree.InventoryRevisionTree(self,
1550
 
                                                       Inventory(root_id=None), _mod_revision.NULL_REVISION)
 
1514
                Inventory(root_id=None), _mod_revision.NULL_REVISION)
1551
1515
        else:
1552
1516
            with self.lock_read():
1553
1517
                inv = self.get_inventory(revision_id)
1557
1521
        """Return Trees for revisions in this repository.
1558
1522
 
1559
1523
        :param revision_ids: a sequence of revision-ids;
1560
 
          a revision-id may not be None or b'null:'
 
1524
          a revision-id may not be None or 'null:'
1561
1525
        """
1562
1526
        inventories = self.iter_inventories(revision_ids)
1563
1527
        for inv in inventories:
1588
1552
        # currently easier.
1589
1553
        if specific_fileids is None:
1590
1554
            trees = dict((t.get_revision_id(), t) for
1591
 
                         t in self.revision_trees(required_trees))
 
1555
                t in self.revision_trees(required_trees))
1592
1556
        else:
1593
1557
            trees = dict((t.get_revision_id(), t) for
1594
 
                         t in self._filtered_revision_trees(required_trees,
1595
 
                                                            specific_fileids))
 
1558
                t in self._filtered_revision_trees(required_trees,
 
1559
                specific_fileids))
1596
1560
 
1597
1561
        # Calculate the deltas
1598
1562
        for revision in revisions:
1606
1570
        """Return Tree for a revision on this branch with only some files.
1607
1571
 
1608
1572
        :param revision_ids: a sequence of revision-ids;
1609
 
          a revision-id may not be None or b'null:'
 
1573
          a revision-id may not be None or 'null:'
1610
1574
        :param file_ids: if not None, the result is filtered
1611
1575
          so that only those file-ids, their parents and their
1612
1576
          children are included.
1635
1599
                self.revisions.get_parent_map(query_keys)):
1636
1600
            if parent_keys:
1637
1601
                result[revision_id] = tuple([parent_revid
1638
 
                                             for (parent_revid,) in parent_keys])
 
1602
                    for (parent_revid,) in parent_keys])
1639
1603
            else:
1640
1604
                result[revision_id] = (_mod_revision.NULL_REVISION,)
1641
1605
        return result
1646
1610
        st = static_tuple.StaticTuple
1647
1611
        revision_keys = [st(r_id).intern() for r_id in revision_ids]
1648
1612
        with self.lock_read():
1649
 
            known_graph = self.revisions.get_known_graph_ancestry(
1650
 
                revision_keys)
 
1613
            known_graph = self.revisions.get_known_graph_ancestry(revision_keys)
1651
1614
            return graph.GraphThunkIdsToKeys(known_graph)
1652
1615
 
1653
1616
    def get_file_graph(self):
1663
1626
        start_keys = result_set.difference(included_keys)
1664
1627
        exclude_keys = result_parents.difference(result_set)
1665
1628
        result = vf_search.SearchResult(start_keys, exclude_keys,
1666
 
                                        len(result_set), result_set)
 
1629
            len(result_set), result_set)
1667
1630
        return result
1668
1631
 
1669
1632
    def _get_versioned_file_checker(self, text_key_references=None,
1670
 
                                    ancestors=None):
 
1633
        ancestors=None):
1671
1634
        """Return an object suitable for checking versioned files.
1672
 
 
 
1635
        
1673
1636
        :param text_key_references: if non-None, an already built
1674
1637
            dictionary mapping text keys ((fileid, revision_id) tuples)
1675
1638
            to whether they were referred to by the inventory of the
1680
1643
            available.
1681
1644
        """
1682
1645
        return _VersionedFileChecker(self,
1683
 
                                     text_key_references=text_key_references, ancestors=ancestors)
 
1646
            text_key_references=text_key_references, ancestors=ancestors)
1684
1647
 
1685
1648
    def has_signature_for_revision_id(self, revision_id):
1686
1649
        """Query for a revision signature for revision_id in the repository."""
1695
1658
        """Return the text for a signature."""
1696
1659
        with self.lock_read():
1697
1660
            stream = self.signatures.get_record_stream([(revision_id,)],
1698
 
                                                       'unordered', True)
 
1661
                'unordered', True)
1699
1662
            record = next(stream)
1700
1663
            if record.storage_kind == 'absent':
1701
1664
                raise errors.NoSuchRevision(self, revision_id)
1726
1689
                pass
1727
1690
            parent_map = vf.get_parent_map([(revid,)])
1728
1691
            parents_according_to_index = tuple(parent[-1] for parent in
1729
 
                                               parent_map[(revid,)])
 
1692
                parent_map[(revid,)])
1730
1693
            parents_according_to_revision = tuple(revision.parent_ids)
1731
1694
            if parents_according_to_index != parents_according_to_revision:
1732
1695
                yield (revid, parents_according_to_index,
1733
 
                       parents_according_to_revision)
 
1696
                    parents_according_to_revision)
1734
1697
 
1735
1698
    def _check_for_inconsistent_revision_parents(self):
1736
1699
        inconsistencies = list(self._find_inconsistent_revision_parents())
1746
1709
        """Return a source for streaming from this repository."""
1747
1710
        return StreamSource(self, to_format)
1748
1711
 
1749
 
    def reconcile(self, other=None, thorough=False):
1750
 
        """Reconcile this repository."""
1751
 
        from .reconcile import VersionedFileRepoReconciler
1752
 
        with self.lock_write():
1753
 
            reconciler = VersionedFileRepoReconciler(self, thorough=thorough)
1754
 
            return reconciler.reconcile()
1755
 
 
1756
1712
 
1757
1713
class MetaDirVersionedFileRepository(MetaDirRepository,
1758
1714
                                     VersionedFileRepository):
1760
1716
 
1761
1717
    def __init__(self, _format, a_controldir, control_files):
1762
1718
        super(MetaDirVersionedFileRepository, self).__init__(_format, a_controldir,
1763
 
                                                             control_files)
 
1719
            control_files)
1764
1720
 
1765
1721
 
1766
1722
class MetaDirVersionedFileRepositoryFormat(RepositoryFormatMetaDir,
1767
 
                                           VersionedFileRepositoryFormat):
 
1723
        VersionedFileRepositoryFormat):
1768
1724
    """Base class for repository formats using versioned files in metadirs."""
1769
1725
 
1770
1726
 
1780
1736
    def __init__(self, target_repo):
1781
1737
        self.target_repo = target_repo
1782
1738
 
1783
 
    def insert_missing_keys(self, source, missing_keys):
1784
 
        """Insert missing keys from another source.
1785
 
 
1786
 
        :param source: StreamSource to stream from
1787
 
        :param missing_keys: Keys to insert
1788
 
        :return: keys still missing
1789
 
        """
1790
 
        stream = source.get_stream_for_missing_keys(missing_keys)
1791
 
        return self.insert_stream_without_locking(stream,
1792
 
                                                  self.target_repo._format)
1793
 
 
1794
1739
    def insert_stream(self, stream, src_format, resume_tokens):
1795
1740
        """Insert a stream's content into the target repository.
1796
1741
 
1799
1744
        :return: a list of resume tokens and an  iterable of keys additional
1800
1745
            items required before the insertion can be completed.
1801
1746
        """
1802
 
        with self.target_repo.lock_write():
 
1747
        self.target_repo.lock_write()
 
1748
        try:
1803
1749
            if resume_tokens:
1804
1750
                self.target_repo.resume_write_group(resume_tokens)
1805
1751
                is_resume = True
1809
1755
            try:
1810
1756
                # locked_insert_stream performs a commit|suspend.
1811
1757
                missing_keys = self.insert_stream_without_locking(stream,
1812
 
                                                                  src_format, is_resume)
 
1758
                                    src_format, is_resume)
1813
1759
                if missing_keys:
1814
1760
                    # suspend the write group and tell the caller what we is
1815
1761
                    # missing. We know we can suspend or else we would not have
1820
1766
                hint = self.target_repo.commit_write_group()
1821
1767
                to_serializer = self.target_repo._format._serializer
1822
1768
                src_serializer = src_format._serializer
1823
 
                if (to_serializer != src_serializer
1824
 
                        and self.target_repo._format.pack_compresses):
 
1769
                if (to_serializer != src_serializer and
 
1770
                    self.target_repo._format.pack_compresses):
1825
1771
                    self.target_repo.pack(hint=hint)
1826
1772
                return [], set()
1827
1773
            except:
1828
1774
                self.target_repo.abort_write_group(suppress_errors=True)
1829
1775
                raise
 
1776
        finally:
 
1777
            self.target_repo.unlock()
1830
1778
 
1831
1779
    def insert_stream_without_locking(self, stream, src_format,
1832
1780
                                      is_resume=False):
1866
1814
                # Not a pack repository
1867
1815
                pass
1868
1816
            else:
1869
 
                new_pack.set_write_cache_size(1024 * 1024)
 
1817
                new_pack.set_write_cache_size(1024*1024)
1870
1818
        for substream_type, substream in stream:
1871
1819
            if 'stream' in debug.debug_flags:
1872
1820
                mutter('inserting substream: %s', substream_type)
1894
1842
                    self.target_repo.revisions.insert_record_stream(substream)
1895
1843
                else:
1896
1844
                    self._extract_and_insert_revisions(substream,
1897
 
                                                       src_serializer)
 
1845
                        src_serializer)
1898
1846
            elif substream_type == 'signatures':
1899
1847
                self.target_repo.signatures.insert_record_stream(substream)
1900
1848
            else:
1909
1857
            check_for_missing_texts=is_resume)
1910
1858
        try:
1911
1859
            for prefix, versioned_file in (
1912
 
                    ('texts', self.target_repo.texts),
1913
 
                    ('inventories', self.target_repo.inventories),
1914
 
                    ('revisions', self.target_repo.revisions),
1915
 
                    ('signatures', self.target_repo.signatures),
1916
 
                    ('chk_bytes', self.target_repo.chk_bytes),
1917
 
                    ):
 
1860
                ('texts', self.target_repo.texts),
 
1861
                ('inventories', self.target_repo.inventories),
 
1862
                ('revisions', self.target_repo.revisions),
 
1863
                ('signatures', self.target_repo.signatures),
 
1864
                ('chk_bytes', self.target_repo.chk_bytes),
 
1865
                ):
1918
1866
                if versioned_file is None:
1919
1867
                    continue
1920
1868
                # TODO: key is often going to be a StaticTuple object
1925
1873
                #       object, so instead we could have:
1926
1874
                #       StaticTuple(prefix) + key here...
1927
1875
                missing_keys.update((prefix,) + key for key in
1928
 
                                    versioned_file.get_missing_compression_parent_keys())
 
1876
                    versioned_file.get_missing_compression_parent_keys())
1929
1877
        except NotImplementedError:
1930
1878
            # cannot even attempt suspending, and missing would have failed
1931
1879
            # during stream insertion.
1937
1885
        target_tree_refs = self.target_repo._format.supports_tree_reference
1938
1886
        for record in substream:
1939
1887
            # Insert the delta directly
1940
 
            inventory_delta_bytes = record.get_bytes_as('lines')
 
1888
            inventory_delta_bytes = record.get_bytes_as('fulltext')
1941
1889
            deserialiser = inventory_delta.InventoryDeltaDeserializer()
1942
1890
            try:
1943
1891
                parse_result = deserialiser.parse_text_bytes(
1952
1900
                basis_id, inv_delta, revision_id, parents)
1953
1901
 
1954
1902
    def _extract_and_insert_inventories(self, substream, serializer,
1955
 
                                        parse_delta=None):
 
1903
            parse_delta=None):
1956
1904
        """Generate a new inventory versionedfile in target, converting data.
1957
1905
 
1958
1906
        The inventory is retrieved from the source, (deserializing it), and
1963
1911
        for record in substream:
1964
1912
            # It's not a delta, so it must be a fulltext in the source
1965
1913
            # serializer's format.
1966
 
            lines = record.get_bytes_as('lines')
 
1914
            bytes = record.get_bytes_as('fulltext')
1967
1915
            revision_id = record.key[0]
1968
 
            inv = serializer.read_inventory_from_lines(lines, revision_id)
 
1916
            inv = serializer.read_inventory_from_string(bytes, revision_id)
1969
1917
            parents = [key[0] for key in record.parents]
1970
1918
            self.target_repo.add_inventory(revision_id, inv, parents)
1971
1919
            # No need to keep holding this full inv in memory when the rest of
2002
1950
        """
2003
1951
        src_serializer = self.from_repository._format._serializer
2004
1952
        target_serializer = self.to_format._serializer
2005
 
        return (self.to_format._fetch_uses_deltas
2006
 
                and src_serializer == target_serializer)
 
1953
        return (self.to_format._fetch_uses_deltas and
 
1954
            src_serializer == target_serializer)
2007
1955
 
2008
1956
    def _fetch_revision_texts(self, revs):
2009
1957
        # fetch signatures first and then the revision texts
2049
1997
            if knit_kind == "file":
2050
1998
                # Accumulate file texts
2051
1999
                text_keys.extend([(file_id, revision) for revision in
2052
 
                                  revisions])
 
2000
                    revisions])
2053
2001
            elif knit_kind == "inventory":
2054
2002
                # Now copy the file texts.
2055
2003
                from_texts = self.from_repository.texts
2102
2050
        for substream_kind, keys in viewitems(keys):
2103
2051
            vf = getattr(self.from_repository, substream_kind)
2104
2052
            if vf is None and keys:
2105
 
                raise AssertionError(
2106
 
                    "cannot fill in keys for a versioned file we don't"
2107
 
                    " have: %s needs %s" % (substream_kind, keys))
 
2053
                    raise AssertionError(
 
2054
                        "cannot fill in keys for a versioned file we don't"
 
2055
                        " have: %s needs %s" % (substream_kind, keys))
2108
2056
            if not keys:
2109
2057
                # No need to stream something we don't have
2110
2058
                continue
2127
2075
            # ensure that ghosts don't introduce missing data for future
2128
2076
            # fetches.
2129
2077
            stream = versionedfile.filter_absent(vf.get_record_stream(keys,
2130
 
                                                                      self.to_format._fetch_order, True))
 
2078
                self.to_format._fetch_order, True))
2131
2079
            yield substream_kind, stream
2132
2080
 
2133
2081
    def inventory_fetch_order(self):
2137
2085
            return self.to_format._fetch_order
2138
2086
 
2139
2087
    def _rich_root_upgrade(self):
2140
 
        return (not self.from_repository._format.rich_root_data
2141
 
                and self.to_format.rich_root_data)
 
2088
        return (not self.from_repository._format.rich_root_data and
 
2089
            self.to_format.rich_root_data)
2142
2090
 
2143
2091
    def _get_inventory_stream(self, revision_ids, missing=False):
2144
2092
        from_format = self.from_repository._format
2145
 
        if (from_format.supports_chks and self.to_format.supports_chks
2146
 
                and from_format.network_name() == self.to_format.network_name()):
 
2093
        if (from_format.supports_chks and self.to_format.supports_chks and
 
2094
            from_format.network_name() == self.to_format.network_name()):
2147
2095
            raise AssertionError(
2148
2096
                "this case should be handled by GroupCHKStreamSource")
2149
2097
        elif 'forceinvdeltas' in debug.debug_flags:
2150
2098
            return self._get_convertable_inventory_stream(revision_ids,
2151
 
                                                          delta_versus_null=missing)
 
2099
                    delta_versus_null=missing)
2152
2100
        elif from_format.network_name() == self.to_format.network_name():
2153
2101
            # Same format.
2154
2102
            return self._get_simple_inventory_stream(revision_ids,
2155
 
                                                     missing=missing)
2156
 
        elif (not from_format.supports_chks and not self.to_format.supports_chks and
2157
 
                from_format._serializer == self.to_format._serializer):
 
2103
                    missing=missing)
 
2104
        elif (not from_format.supports_chks and not self.to_format.supports_chks
 
2105
                and from_format._serializer == self.to_format._serializer):
2158
2106
            # Essentially the same format.
2159
2107
            return self._get_simple_inventory_stream(revision_ids,
2160
 
                                                     missing=missing)
 
2108
                    missing=missing)
2161
2109
        else:
2162
2110
            # Any time we switch serializations, we want to use an
2163
2111
            # inventory-delta based approach.
2164
2112
            return self._get_convertable_inventory_stream(revision_ids,
2165
 
                                                          delta_versus_null=missing)
 
2113
                    delta_versus_null=missing)
2166
2114
 
2167
2115
    def _get_simple_inventory_stream(self, revision_ids, missing=False):
2168
2116
        # NB: This currently reopens the inventory weave in source;
2186
2134
        # be converted.  That means inventory deltas (if the remote is <1.19,
2187
2135
        # RemoteStreamSink will fallback to VFS to insert the deltas).
2188
2136
        yield ('inventory-deltas',
2189
 
               self._stream_invs_as_deltas(revision_ids,
2190
 
                                           delta_versus_null=delta_versus_null))
 
2137
           self._stream_invs_as_deltas(revision_ids,
 
2138
                                       delta_versus_null=delta_versus_null))
2191
2139
 
2192
2140
    def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):
2193
2141
        """Return a stream of inventory-deltas for the given rev ids.
2237
2185
                        if parent_inv is None:
2238
2186
                            parent_inv = from_repo.get_inventory(parent_id)
2239
2187
                    candidate_delta = inv._make_delta(parent_inv)
2240
 
                    if (delta is None
2241
 
                            or len(delta) > len(candidate_delta)):
 
2188
                    if (delta is None or
 
2189
                        len(delta) > len(candidate_delta)):
2242
2190
                        delta = candidate_delta
2243
2191
                        basis_id = parent_id
2244
2192
            if delta is None:
2248
2196
                delta = inv._make_delta(null_inventory)
2249
2197
            invs_sent_so_far.add(inv.revision_id)
2250
2198
            inventory_cache[inv.revision_id] = inv
2251
 
            delta_serialized = serializer.delta_to_lines(basis_id, key[-1], delta)
2252
 
            yield versionedfile.ChunkedContentFactory(
2253
 
                key, parent_keys, None, delta_serialized, chunks_are_lines=True)
 
2199
            delta_serialized = ''.join(
 
2200
                serializer.delta_to_lines(basis_id, key[-1], delta))
 
2201
            yield versionedfile.FulltextContentFactory(
 
2202
                key, parent_keys, None, delta_serialized)
2254
2203
 
2255
2204
 
2256
2205
class _VersionedFileChecker(object):
2304
2253
        text_keys = self.repository.texts.keys()
2305
2254
        unused_keys = frozenset(text_keys) - set(self.text_index)
2306
2255
        for num, key in enumerate(self.text_index):
2307
 
            progress_bar.update(
2308
 
                gettext('checking text graph'), num, n_versions)
 
2256
            progress_bar.update(gettext('checking text graph'), num, n_versions)
2309
2257
            correct_parents = self.calculate_file_version_parents(key)
2310
2258
            try:
2311
2259
                knit_parents = parent_map[key]
2324
2272
    supports_fetch_spec = True
2325
2273
 
2326
2274
    def fetch(self, revision_id=None, find_ghosts=False,
2327
 
              fetch_spec=None, lossy=False):
 
2275
            fetch_spec=None):
2328
2276
        """Fetch the content required to construct revision_id.
2329
2277
 
2330
2278
        The content is copied from self.source to self.target.
2333
2281
                            content is copied.
2334
2282
        :return: None.
2335
2283
        """
2336
 
        if lossy:
2337
 
            raise errors.LossyPushToSameVCS(self.source, self.target)
2338
2284
        if self.target._format.experimental:
2339
 
            ui.ui_factory.show_user_warning(
2340
 
                'experimental_format_fetch',
 
2285
            ui.ui_factory.show_user_warning('experimental_format_fetch',
2341
2286
                from_format=self.source._format,
2342
2287
                to_format=self.target._format)
2343
 
        from breezy.bzr.fetch import RepoFetcher
 
2288
        from breezy.fetch import RepoFetcher
2344
2289
        # See <https://launchpad.net/bugs/456077> asking for a warning here
2345
2290
        if self.source._format.network_name() != self.target._format.network_name():
2346
 
            ui.ui_factory.show_user_warning(
2347
 
                'cross_format_fetch', from_format=self.source._format,
 
2291
            ui.ui_factory.show_user_warning('cross_format_fetch',
 
2292
                from_format=self.source._format,
2348
2293
                to_format=self.target._format)
2349
2294
        with self.lock_write():
2350
2295
            f = RepoFetcher(to_repository=self.target,
2351
 
                            from_repository=self.source,
2352
 
                            last_revision=revision_id,
2353
 
                            fetch_spec=fetch_spec,
2354
 
                            find_ghosts=find_ghosts)
2355
 
            return FetchResult()
 
2296
                                   from_repository=self.source,
 
2297
                                   last_revision=revision_id,
 
2298
                                   fetch_spec=fetch_spec,
 
2299
                                   find_ghosts=find_ghosts)
2356
2300
 
2357
2301
    def _walk_to_common_revisions(self, revision_ids, if_present_ids=None):
2358
2302
        """Walk out from revision_ids in source to revisions target has.
2410
2354
                break
2411
2355
        (started_keys, excludes, included_keys) = searcher.get_state()
2412
2356
        return vf_search.SearchResult(started_keys, excludes,
2413
 
                                      len(included_keys), included_keys)
 
2357
            len(included_keys), included_keys)
2414
2358
 
2415
2359
    def search_missing_revision_ids(self,
2416
 
                                    find_ghosts=True, revision_ids=None, if_present_ids=None,
2417
 
                                    limit=None):
 
2360
            find_ghosts=True, revision_ids=None, if_present_ids=None,
 
2361
            limit=None):
2418
2362
        """Return the revision ids that source has that target does not.
2419
2363
 
2420
2364
        :param revision_ids: return revision ids included by these
2431
2375
        with self.lock_read():
2432
2376
            # stop searching at found target revisions.
2433
2377
            if not find_ghosts and (revision_ids is not None or if_present_ids is
2434
 
                                    not None):
 
2378
                    not None):
2435
2379
                result = self._walk_to_common_revisions(revision_ids,
2436
 
                                                        if_present_ids=if_present_ids)
 
2380
                        if_present_ids=if_present_ids)
2437
2381
                if limit is None:
2438
2382
                    return result
2439
2383
                result_set = result.get_keys()
2474
2418
            found_ids = all_wanted_ids.intersection(present_revs)
2475
2419
            source_ids = [rev_id for (rev_id, parents) in
2476
2420
                          graph.iter_ancestry(found_ids)
2477
 
                          if rev_id != _mod_revision.NULL_REVISION and
2478
 
                          parents is not None]
 
2421
                          if rev_id != _mod_revision.NULL_REVISION
 
2422
                          and parents is not None]
2479
2423
        else:
2480
2424
            source_ids = self.source.all_revision_ids()
2481
2425
        return set(source_ids)
2487
2431
    @classmethod
2488
2432
    def is_compatible(cls, source, target):
2489
2433
        # The default implementation is compatible with everything
2490
 
        return (source._format.supports_full_versioned_files
2491
 
                and target._format.supports_full_versioned_files)
 
2434
        return (source._format.supports_full_versioned_files and
 
2435
                target._format.supports_full_versioned_files)
2492
2436
 
2493
2437
 
2494
2438
class InterDifferingSerializer(InterVersionedFileRepository):
2510
2454
            return False
2511
2455
        if source.supports_rich_root() and not target.supports_rich_root():
2512
2456
            return False
2513
 
        if (source._format.supports_tree_reference and
2514
 
                not target._format.supports_tree_reference):
 
2457
        if (source._format.supports_tree_reference
 
2458
            and not target._format.supports_tree_reference):
2515
2459
            return False
2516
2460
        if target._fallback_repositories and target._format.supports_chks:
2517
2461
            # IDS doesn't know how to copy CHKs for the parent inventories it
2637
2581
                                                           possible_trees)
2638
2582
            revision = self.source.get_revision(current_revision_id)
2639
2583
            pending_deltas.append((basis_id, delta,
2640
 
                                   current_revision_id, revision.parent_ids))
 
2584
                current_revision_id, revision.parent_ids))
2641
2585
            if self._converting_to_rich_root:
2642
2586
                self._revision_id_to_root_id[current_revision_id] = \
2643
 
                    tree.path2id('')
 
2587
                    tree.get_root_id()
2644
2588
            # Determine which texts are in present in this revision but not in
2645
2589
            # any of the available parents.
2646
2590
            texts_possibly_new_in_tree = set()
2716
2660
                    # ghosts, so just use the last converted tree.
2717
2661
                    possible_trees.append((basis_id, cache[basis_id]))
2718
2662
                basis_id, delta = self._get_delta_for_revision(parent_tree,
2719
 
                                                               parents_parents, possible_trees)
 
2663
                    parents_parents, possible_trees)
2720
2664
                self.target.add_inventory_by_delta(
2721
2665
                    basis_id, delta, current_revision_id, parents_parents)
2722
2666
        # insert signatures and revisions
2725
2669
                signature = self.source.get_signature_text(
2726
2670
                    revision.revision_id)
2727
2671
                self.target.add_signature_text(revision.revision_id,
2728
 
                                               signature)
 
2672
                    signature)
2729
2673
            except errors.NoSuchRevision:
2730
2674
                pass
2731
2675
            self.target.add_revision(revision.revision_id, revision)
2743
2687
        batch_size = 100
2744
2688
        cache = lru_cache.LRUCache(100)
2745
2689
        cache[basis_id] = basis_tree
2746
 
        del basis_tree  # We don't want to hang on to it here
 
2690
        del basis_tree # We don't want to hang on to it here
2747
2691
        hints = []
2748
2692
        a_graph = None
2749
2693
 
2752
2696
            try:
2753
2697
                pb.update(gettext('Transferring revisions'), offset,
2754
2698
                          len(revision_ids))
2755
 
                batch = revision_ids[offset:offset + batch_size]
 
2699
                batch = revision_ids[offset:offset+batch_size]
2756
2700
                basis_id = self._fetch_batch(batch, basis_id, cache)
2757
2701
            except:
2758
2702
                self.source._safe_to_return_from_cache = False
2768
2712
                  len(revision_ids))
2769
2713
 
2770
2714
    def fetch(self, revision_id=None, find_ghosts=False,
2771
 
              fetch_spec=None, lossy=False):
 
2715
            fetch_spec=None):
2772
2716
        """See InterRepository.fetch()."""
2773
 
        if lossy:
2774
 
            raise errors.LossyPushToSameVCS(self.source, self.target)
2775
2717
        if fetch_spec is not None:
2776
2718
            revision_ids = fetch_spec.get_keys()
2777
2719
        else:
2778
2720
            revision_ids = None
2779
2721
        if self.source._format.experimental:
2780
2722
            ui.ui_factory.show_user_warning('experimental_format_fetch',
2781
 
                                            from_format=self.source._format,
2782
 
                                            to_format=self.target._format)
2783
 
        if (not self.source.supports_rich_root() and
2784
 
                self.target.supports_rich_root()):
 
2723
                from_format=self.source._format,
 
2724
                to_format=self.target._format)
 
2725
        if (not self.source.supports_rich_root()
 
2726
            and self.target.supports_rich_root()):
2785
2727
            self._converting_to_rich_root = True
2786
2728
            self._revision_id_to_root_id = {}
2787
2729
        else:
2789
2731
        # See <https://launchpad.net/bugs/456077> asking for a warning here
2790
2732
        if self.source._format.network_name() != self.target._format.network_name():
2791
2733
            ui.ui_factory.show_user_warning('cross_format_fetch',
2792
 
                                            from_format=self.source._format,
2793
 
                                            to_format=self.target._format)
 
2734
                from_format=self.source._format,
 
2735
                to_format=self.target._format)
2794
2736
        with self.lock_write():
2795
2737
            if revision_ids is None:
2796
2738
                if revision_id:
2797
2739
                    search_revision_ids = [revision_id]
2798
2740
                else:
2799
2741
                    search_revision_ids = None
2800
 
                revision_ids = self.target.search_missing_revision_ids(
2801
 
                    self.source, revision_ids=search_revision_ids,
 
2742
                revision_ids = self.target.search_missing_revision_ids(self.source,
 
2743
                    revision_ids=search_revision_ids,
2802
2744
                    find_ghosts=find_ghosts).get_keys()
2803
2745
            if not revision_ids:
2804
 
                return FetchResult(0)
 
2746
                return 0, 0
2805
2747
            revision_ids = tsort.topo_sort(
2806
2748
                self.source.get_graph().get_parent_map(revision_ids))
2807
2749
            if not revision_ids:
2808
 
                return FetchResult(0)
 
2750
                return 0, 0
2809
2751
            # Walk though all revisions; get inventory deltas, copy referenced
2810
2752
            # texts that delta references, insert the delta, revision and
2811
2753
            # signature.
2812
2754
            with ui.ui_factory.nested_progress_bar() as pb:
2813
2755
                self._fetch_all_revisions(revision_ids, pb)
2814
 
            return FetchResult(len(revision_ids))
 
2756
            return len(revision_ids), 0
2815
2757
 
2816
2758
    def _get_basis(self, first_revision_id):
2817
2759
        """Get a revision and tree which exists in the target.
2855
2797
    @staticmethod
2856
2798
    def is_compatible(source, target):
2857
2799
        return (
2858
 
            InterRepository._same_model(source, target)
2859
 
            and source._format.supports_full_versioned_files
2860
 
            and target._format.supports_full_versioned_files)
 
2800
            InterRepository._same_model(source, target) and
 
2801
            source._format.supports_full_versioned_files and
 
2802
            target._format.supports_full_versioned_files)
2861
2803
 
2862
2804
 
2863
2805
InterRepository.register_optimiser(InterVersionedFileRepository)
2871
2813
    Accepts an iterable of revision, tree, signature tuples.  The signature
2872
2814
    may be None.
2873
2815
    """
2874
 
    with WriteGroup(repository):
 
2816
    repository.start_write_group()
 
2817
    try:
2875
2818
        inventory_cache = lru_cache.LRUCache(10)
2876
2819
        for n, (revision, revision_tree, signature) in enumerate(iterable):
2877
2820
            _install_revision(repository, revision, revision_tree, signature,
2878
 
                              inventory_cache)
 
2821
                inventory_cache)
2879
2822
            if pb is not None:
2880
 
                pb.update(gettext('Transferring revisions'),
2881
 
                          n + 1, num_revisions)
 
2823
                pb.update(gettext('Transferring revisions'), n + 1, num_revisions)
 
2824
    except:
 
2825
        repository.abort_write_group()
 
2826
        raise
 
2827
    else:
 
2828
        repository.commit_write_group()
2882
2829
 
2883
2830
 
2884
2831
def _install_revision(repository, rev, revision_tree, signature,
2885
 
                      inventory_cache):
 
2832
    inventory_cache):
2886
2833
    """Install all revision data into a repository."""
2887
2834
    present_parents = []
2888
2835
    parent_trees = {}
2892
2839
            parent_trees[p_id] = repository.revision_tree(p_id)
2893
2840
        else:
2894
2841
            parent_trees[p_id] = repository.revision_tree(
2895
 
                _mod_revision.NULL_REVISION)
 
2842
                                     _mod_revision.NULL_REVISION)
2896
2843
 
2897
2844
    # FIXME: Support nested trees
2898
2845
    inv = revision_tree.root_inventory
2916
2863
        # the parents inserted are not those commit would do - in particular
2917
2864
        # they are not filtered by heads(). RBC, AB
2918
2865
        for revision, tree in viewitems(parent_trees):
2919
 
            try:
2920
 
                path = tree.id2path(ie.file_id)
2921
 
            except errors.NoSuchId:
 
2866
            if not tree.has_id(ie.file_id):
2922
2867
                continue
2923
 
            parent_id = tree.get_file_revision(path)
 
2868
            path = tree.id2path(ie.file_id)
 
2869
            parent_id = tree.get_file_revision(path, ie.file_id)
2924
2870
            if parent_id in text_parents:
2925
2871
                continue
2926
2872
            text_parents.append((ie.file_id, parent_id))
2927
2873
        revision_tree_path = revision_tree.id2path(ie.file_id)
2928
 
        with revision_tree.get_file(revision_tree_path) as f:
2929
 
            lines = f.readlines()
 
2874
        lines = revision_tree.get_file(revision_tree_path, ie.file_id).readlines()
2930
2875
        repository.texts.add_lines(text_key, text_parents, lines)
2931
2876
    try:
2932
2877
        # install the inventory
2940
2885
            else:
2941
2886
                delta = inv._make_delta(basis_inv)
2942
2887
                repository.add_inventory_by_delta(rev.parent_ids[0], delta,
2943
 
                                                  rev.revision_id, present_parents)
 
2888
                    rev.revision_id, present_parents)
2944
2889
        else:
2945
2890
            repository.add_inventory(rev.revision_id, inv, present_parents)
2946
2891
    except errors.RevisionAlreadyPresent: