1
# Copyright (C) 2005-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Repository formats built around versioned files."""
19
from io import BytesIO
21
from ..lazy_import import lazy_import
22
lazy_import(globals(), """
26
config as _mod_config,
33
revision as _mod_revision,
38
from breezy.bzr import (
47
from breezy.bzr.bundle import serializer
49
from breezy.recordcounter import RecordCounter
50
from breezy.i18n import gettext
51
from breezy.bzr.testament import Testament
57
from ..decorators import (
60
from .inventory import (
66
from ..repository import (
74
from .repository import (
76
RepositoryFormatMetaDir,
83
from ..tree import TreeChange
86
class VersionedFileRepositoryFormat(RepositoryFormat):
87
"""Base class for all repository formats that are VersionedFiles-based."""
89
supports_full_versioned_files = True
90
supports_versioned_directories = True
91
supports_unreferenced_revisions = True
93
# Should commit add an inventory, or an inventory delta to the repository.
94
_commit_inv_deltas = True
95
# What order should fetch operations request streams in?
96
# The default is unordered as that is the cheapest for an origin to
98
_fetch_order = 'unordered'
99
# Does this repository format use deltas that can be fetched as-deltas ?
100
# (E.g. knits, where the knit deltas can be transplanted intact.
101
# We default to False, which will ensure that enough data to get
102
# a full text out of any fetch stream will be grabbed.
103
_fetch_uses_deltas = False
106
class VersionedFileCommitBuilder(CommitBuilder):
107
"""Commit builder implementation for versioned files based repositories.
110
def __init__(self, repository, parents, config_stack, timestamp=None,
111
timezone=None, committer=None, revprops=None,
112
revision_id=None, lossy=False):
113
super(VersionedFileCommitBuilder, self).__init__(repository,
114
parents, config_stack, timestamp, timezone, committer, revprops,
117
basis_id = self.parents[0]
119
basis_id = _mod_revision.NULL_REVISION
120
self.basis_delta_revision = basis_id
121
self._new_inventory = None
122
self._basis_delta = []
123
self.__heads = graph.HeadsCache(repository.get_graph()).heads
124
# memo'd check for no-op commits.
125
self._any_changes = False
127
def any_changes(self):
128
"""Return True if any entries were changed.
130
This includes merge-only changes. It is the core for the --unchanged
133
:return: True if any changes have occured.
135
return self._any_changes
137
def _ensure_fallback_inventories(self):
138
"""Ensure that appropriate inventories are available.
140
This only applies to repositories that are stacked, and is about
141
enusring the stacking invariants. Namely, that for any revision that is
142
present, we either have all of the file content, or we have the parent
143
inventory and the delta file content.
145
if not self.repository._fallback_repositories:
147
if not self.repository._format.supports_chks:
148
raise errors.BzrError("Cannot commit directly to a stacked branch"
149
" in pre-2a formats. See "
150
"https://bugs.launchpad.net/bzr/+bug/375013 for details.")
151
# This is a stacked repo, we need to make sure we have the parent
152
# inventories for the parents.
153
parent_keys = [(p,) for p in self.parents]
154
parent_map = self.repository.inventories._index.get_parent_map(
156
missing_parent_keys = {pk for pk in parent_keys
157
if pk not in parent_map}
158
fallback_repos = list(reversed(self.repository._fallback_repositories))
159
missing_keys = [('inventories', pk[0])
160
for pk in missing_parent_keys]
162
while missing_keys and fallback_repos:
163
fallback_repo = fallback_repos.pop()
164
source = fallback_repo._get_source(self.repository._format)
165
sink = self.repository._get_sink()
166
missing_keys = sink.insert_missing_keys(source, missing_keys)
168
raise errors.BzrError('Unable to fill in parent inventories for a'
171
def commit(self, message):
172
"""Make the actual commit.
174
:return: The revision id of the recorded revision.
176
self._validate_unicode_text(message, 'commit message')
177
rev = _mod_revision.Revision(
178
timestamp=self._timestamp,
179
timezone=self._timezone,
180
committer=self._committer,
182
inventory_sha1=self.inv_sha1,
183
revision_id=self._new_revision_id,
184
properties=self._revprops)
185
rev.parent_ids = self.parents
186
if self._config_stack.get('create_signatures') == _mod_config.SIGN_ALWAYS:
187
testament = Testament(rev, self.revision_tree())
188
plaintext = testament.as_short_text()
189
self.repository.store_revision_signature(
190
gpg.GPGStrategy(self._config_stack), plaintext,
191
self._new_revision_id)
192
self.repository._add_revision(rev)
193
self._ensure_fallback_inventories()
194
self.repository.commit_write_group()
195
return self._new_revision_id
198
"""Abort the commit that is being built.
200
self.repository.abort_write_group()
202
def revision_tree(self):
203
"""Return the tree that was just committed.
205
After calling commit() this can be called to get a
206
RevisionTree representing the newly committed tree. This is
207
preferred to calling Repository.revision_tree() because that may
208
require deserializing the inventory, while we already have a copy in
211
if self._new_inventory is None:
212
self._new_inventory = self.repository.get_inventory(
213
self._new_revision_id)
214
return inventorytree.InventoryRevisionTree(self.repository,
215
self._new_inventory, self._new_revision_id)
217
def finish_inventory(self):
218
"""Tell the builder that the inventory is finished.
220
:return: The inventory id in the repository, which can be used with
221
repository.get_inventory.
223
# an inventory delta was accumulated without creating a new
225
basis_id = self.basis_delta_revision
226
self.inv_sha1, self._new_inventory = self.repository.add_inventory_by_delta(
227
basis_id, self._basis_delta, self._new_revision_id,
229
return self._new_revision_id
231
def _gen_revision_id(self):
232
"""Return new revision-id."""
233
return generate_ids.gen_revision_id(self._committer, self._timestamp)
235
def _require_root_change(self, tree):
236
"""Enforce an appropriate root object change.
238
This is called once when record_iter_changes is called, if and only if
239
the root was not in the delta calculated by record_iter_changes.
241
:param tree: The tree which is being committed.
243
if self.repository.supports_rich_root():
245
if len(self.parents) == 0:
246
raise errors.RootMissing()
247
entry = entry_factory['directory'](tree.path2id(''), '',
249
entry.revision = self._new_revision_id
250
self._basis_delta.append(('', '', entry.file_id, entry))
252
def _get_delta(self, ie, basis_inv, path):
253
"""Get a delta against the basis inventory for ie."""
254
if not basis_inv.has_id(ie.file_id):
256
result = (None, path, ie.file_id, ie)
257
self._basis_delta.append(result)
259
elif ie != basis_inv.get_entry(ie.file_id):
261
# TODO: avoid tis id2path call.
262
result = (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
263
self._basis_delta.append(result)
269
def _heads(self, file_id, revision_ids):
270
"""Calculate the graph heads for revision_ids in the graph of file_id.
272
This can use either a per-file graph or a global revision graph as we
273
have an identity relationship between the two graphs.
275
return self.__heads(revision_ids)
277
def get_basis_delta(self):
278
"""Return the complete inventory delta versus the basis inventory.
280
:return: An inventory delta, suitable for use with apply_delta, or
281
Repository.add_inventory_by_delta, etc.
283
return self._basis_delta
285
def record_iter_changes(self, tree, basis_revision_id, iter_changes,
286
_entry_factory=entry_factory):
287
"""Record a new tree via iter_changes.
289
:param tree: The tree to obtain text contents from for changed objects.
290
:param basis_revision_id: The revision id of the tree the iter_changes
291
has been generated against. Currently assumed to be the same
292
as self.parents[0] - if it is not, errors may occur.
293
:param iter_changes: An iter_changes iterator with the changes to apply
294
to basis_revision_id. The iterator must not include any items with
295
a current kind of None - missing items must be either filtered out
296
or errored-on before record_iter_changes sees the item.
297
:param _entry_factory: Private method to bind entry_factory locally for
299
:return: A generator of (relpath, fs_hash) tuples for use with
302
# Create an inventory delta based on deltas between all the parents and
303
# deltas between all the parent inventories. We use inventory delta's
304
# between the inventory objects because iter_changes masks
305
# last-changed-field only changes.
307
# file_id -> change map, change is fileid, paths, changed, versioneds,
308
# parents, names, kinds, executables
310
# {file_id -> revision_id -> inventory entry, for entries in parent
311
# trees that are not parents[0]
315
revtrees = list(self.repository.revision_trees(self.parents))
316
except errors.NoSuchRevision:
317
# one or more ghosts, slow path.
319
for revision_id in self.parents:
321
revtrees.append(self.repository.revision_tree(revision_id))
322
except errors.NoSuchRevision:
324
basis_revision_id = _mod_revision.NULL_REVISION
326
revtrees.append(self.repository.revision_tree(
327
_mod_revision.NULL_REVISION))
328
# The basis inventory from a repository
330
basis_tree = revtrees[0]
332
basis_tree = self.repository.revision_tree(
333
_mod_revision.NULL_REVISION)
334
basis_inv = basis_tree.root_inventory
335
if len(self.parents) > 0:
336
if basis_revision_id != self.parents[0] and not ghost_basis:
338
"arbitrary basis parents not yet supported with merges")
339
for revtree in revtrees[1:]:
340
for change in revtree.root_inventory._make_delta(basis_inv):
341
if change[1] is None:
342
# Not present in this parent.
344
if change[2] not in merged_ids:
345
if change[0] is not None:
346
basis_entry = basis_inv.get_entry(change[2])
347
merged_ids[change[2]] = [
349
basis_entry.revision,
352
parent_entries[change[2]] = {
354
basis_entry.revision: basis_entry,
356
change[3].revision: change[3],
359
merged_ids[change[2]] = [change[3].revision]
360
parent_entries[change[2]] = {
361
change[3].revision: change[3]}
363
merged_ids[change[2]].append(change[3].revision)
364
parent_entries[change[2]
365
][change[3].revision] = change[3]
368
# Setup the changes from the tree:
369
# changes maps file_id -> (change, [parent revision_ids])
371
for change in iter_changes:
372
# This probably looks up in basis_inv way to much.
373
if change.path[0] is not None:
374
head_candidate = [basis_inv.get_entry(change.file_id).revision]
377
changes[change.file_id] = change, merged_ids.get(
378
change.file_id, head_candidate)
379
unchanged_merged = set(merged_ids) - set(changes)
380
# Extend the changes dict with synthetic changes to record merges of
382
for file_id in unchanged_merged:
383
# Record a merged version of these items that did not change vs the
384
# basis. This can be either identical parallel changes, or a revert
385
# of a specific file after a merge. The recorded content will be
386
# that of the current tree (which is the same as the basis), but
387
# the per-file graph will reflect a merge.
388
# NB:XXX: We are reconstructing path information we had, this
389
# should be preserved instead.
390
# inv delta change: (file_id, (path_in_source, path_in_target),
391
# changed_content, versioned, parent, name, kind,
394
basis_entry = basis_inv.get_entry(file_id)
395
except errors.NoSuchId:
396
# a change from basis->some_parents but file_id isn't in basis
397
# so was new in the merge, which means it must have changed
398
# from basis -> current, and as it hasn't the add was reverted
399
# by the user. So we discard this change.
404
(basis_inv.id2path(file_id), tree.id2path(file_id)),
406
(basis_entry.parent_id, basis_entry.parent_id),
407
(basis_entry.name, basis_entry.name),
408
(basis_entry.kind, basis_entry.kind),
409
(basis_entry.executable, basis_entry.executable))
410
changes[file_id] = (change, merged_ids[file_id])
411
# changes contains tuples with the change and a set of inventory
412
# candidates for the file.
414
# old_path, new_path, file_id, new_inventory_entry
415
seen_root = False # Is the root in the basis delta?
416
inv_delta = self._basis_delta
417
modified_rev = self._new_revision_id
418
for change, head_candidates in changes.values():
419
if change.versioned[1]: # versioned in target.
420
# Several things may be happening here:
421
# We may have a fork in the per-file graph
422
# - record a change with the content from tree
423
# We may have a change against < all trees
424
# - carry over the tree that hasn't changed
425
# We may have a change against all trees
426
# - record the change with the content from tree
427
kind = change.kind[1]
428
file_id = change.file_id
429
entry = _entry_factory[kind](file_id, change.name[1],
431
head_set = self._heads(change.file_id, set(head_candidates))
434
for head_candidate in head_candidates:
435
if head_candidate in head_set:
436
heads.append(head_candidate)
437
head_set.remove(head_candidate)
440
# Could be a carry-over situation:
441
parent_entry_revs = parent_entries.get(file_id, None)
442
if parent_entry_revs:
443
parent_entry = parent_entry_revs.get(heads[0], None)
446
if parent_entry is None:
447
# The parent iter_changes was called against is the one
448
# that is the per-file head, so any change is relevant
449
# iter_changes is valid.
450
carry_over_possible = False
452
# could be a carry over situation
453
# A change against the basis may just indicate a merge,
454
# we need to check the content against the source of the
455
# merge to determine if it was changed after the merge
457
if (parent_entry.kind != entry.kind
458
or parent_entry.parent_id != entry.parent_id
459
or parent_entry.name != entry.name):
460
# Metadata common to all entries has changed
461
# against per-file parent
462
carry_over_possible = False
464
carry_over_possible = True
465
# per-type checks for changes against the parent_entry
468
# Cannot be a carry-over situation
469
carry_over_possible = False
470
# Populate the entry in the delta
472
# XXX: There is still a small race here: If someone reverts
473
# the content of a file after iter_changes examines and
474
# decides it has changed, we will unconditionally record a
475
# new version even if some other process reverts it while
476
# commit is running (with the revert happening after
477
# iter_changes did its examination).
478
if change.executable[1]:
479
entry.executable = True
481
entry.executable = False
482
if (carry_over_possible
483
and parent_entry.executable == entry.executable):
484
# Check the file length, content hash after reading
486
nostore_sha = parent_entry.text_sha1
489
file_obj, stat_value = tree.get_file_with_stat(change.path[1])
491
entry.text_sha1, entry.text_size = self._add_file_to_weave(
492
file_id, file_obj, heads, nostore_sha,
493
size=(stat_value.st_size if stat_value else None))
494
yield change.path[1], (entry.text_sha1, stat_value)
495
except errors.ExistingContent:
496
# No content change against a carry_over parent
497
# Perhaps this should also yield a fs hash update?
499
entry.text_size = parent_entry.text_size
500
entry.text_sha1 = parent_entry.text_sha1
503
elif kind == 'symlink':
505
entry.symlink_target = tree.get_symlink_target(
507
if (carry_over_possible and
508
parent_entry.symlink_target ==
509
entry.symlink_target):
512
self._add_file_to_weave(
513
change.file_id, BytesIO(), heads, None, size=0)
514
elif kind == 'directory':
515
if carry_over_possible:
518
# Nothing to set on the entry.
519
# XXX: split into the Root and nonRoot versions.
520
if change.path[1] != '' or self.repository.supports_rich_root():
521
self._add_file_to_weave(
522
change.file_id, BytesIO(), heads, None, size=0)
523
elif kind == 'tree-reference':
524
if not self.repository._format.supports_tree_reference:
525
# This isn't quite sane as an error, but we shouldn't
526
# ever see this code path in practice: tree's don't
527
# permit references when the repo doesn't support tree
529
raise errors.UnsupportedOperation(
530
tree.add_reference, self.repository)
531
reference_revision = tree.get_reference_revision(
533
entry.reference_revision = reference_revision
534
if (carry_over_possible
535
and parent_entry.reference_revision ==
539
self._add_file_to_weave(
540
change.file_id, BytesIO(), heads, None, size=0)
542
raise AssertionError('unknown kind %r' % kind)
544
entry.revision = modified_rev
546
entry.revision = parent_entry.revision
549
new_path = change.path[1]
550
inv_delta.append((change.path[0], new_path, change.file_id, entry))
553
# The initial commit adds a root directory, but this in itself is not
554
# a worthwhile commit.
555
if ((len(inv_delta) > 0 and basis_revision_id != _mod_revision.NULL_REVISION)
556
or (len(inv_delta) > 1 and basis_revision_id == _mod_revision.NULL_REVISION)):
557
# This should perhaps be guarded by a check that the basis we
558
# commit against is the basis for the commit and if not do a delta
560
self._any_changes = True
562
# housekeeping root entry changes do not affect no-change commits.
563
self._require_root_change(tree)
564
self.basis_delta_revision = basis_revision_id
566
def _add_file_to_weave(self, file_id, fileobj, parents, nostore_sha, size):
567
parent_keys = tuple([(file_id, parent) for parent in parents])
568
return self.repository.texts.add_content(
569
versionedfile.FileContentFactory(
570
(file_id, self._new_revision_id), parent_keys, fileobj, size=size),
571
nostore_sha=nostore_sha, random_id=self.random_revid)[0:2]
574
class VersionedFileRepository(Repository):
575
"""Repository holding history for one or more branches.
577
The repository holds and retrieves historical information including
578
revisions and file history. It's normally accessed only by the Branch,
579
which views a particular line of development through that history.
581
The Repository builds on top of some byte storage facilies (the revisions,
582
signatures, inventories, texts and chk_bytes attributes) and a Transport,
583
which respectively provide byte storage and a means to access the (possibly
586
The byte storage facilities are addressed via tuples, which we refer to
587
as 'keys' throughout the code base. Revision_keys, inventory_keys and
588
signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:
589
(file_id, revision_id). chk_bytes uses CHK keys - a 1-tuple with a single
590
byte string made up of a hash identifier and a hash value.
591
We use this interface because it allows low friction with the underlying
592
code that implements disk indices, network encoding and other parts of
595
:ivar revisions: A breezy.versionedfile.VersionedFiles instance containing
596
the serialised revisions for the repository. This can be used to obtain
597
revision graph information or to access raw serialised revisions.
598
The result of trying to insert data into the repository via this store
599
is undefined: it should be considered read-only except for implementors
601
:ivar signatures: A breezy.versionedfile.VersionedFiles instance containing
602
the serialised signatures for the repository. This can be used to
603
obtain access to raw serialised signatures. The result of trying to
604
insert data into the repository via this store is undefined: it should
605
be considered read-only except for implementors of repositories.
606
:ivar inventories: A breezy.versionedfile.VersionedFiles instance containing
607
the serialised inventories for the repository. This can be used to
608
obtain unserialised inventories. The result of trying to insert data
609
into the repository via this store is undefined: it should be
610
considered read-only except for implementors of repositories.
611
:ivar texts: A breezy.versionedfile.VersionedFiles instance containing the
612
texts of files and directories for the repository. This can be used to
613
obtain file texts or file graphs. Note that Repository.iter_file_bytes
614
is usually a better interface for accessing file texts.
615
The result of trying to insert data into the repository via this store
616
is undefined: it should be considered read-only except for implementors
618
:ivar chk_bytes: A breezy.versionedfile.VersionedFiles instance containing
619
any data the repository chooses to store or have indexed by its hash.
620
The result of trying to insert data into the repository via this store
621
is undefined: it should be considered read-only except for implementors
623
:ivar _transport: Transport for file access to repository, typically
624
pointing to .bzr/repository.
627
# What class to use for a CommitBuilder. Often it's simpler to change this
628
# in a Repository class subclass rather than to override
629
# get_commit_builder.
630
_commit_builder_class = VersionedFileCommitBuilder
632
def add_fallback_repository(self, repository):
633
"""Add a repository to use for looking up data not held locally.
635
:param repository: A repository.
637
if not self._format.supports_external_lookups:
638
raise errors.UnstackableRepositoryFormat(self._format, self.base)
639
# This can raise an exception, so should be done before we lock the
640
# fallback repository.
641
self._check_fallback_repository(repository)
643
# This repository will call fallback.unlock() when we transition to
644
# the unlocked state, so we make sure to increment the lock count
645
repository.lock_read()
646
self._fallback_repositories.append(repository)
647
self.texts.add_fallback_versioned_files(repository.texts)
648
self.inventories.add_fallback_versioned_files(repository.inventories)
649
self.revisions.add_fallback_versioned_files(repository.revisions)
650
self.signatures.add_fallback_versioned_files(repository.signatures)
651
if self.chk_bytes is not None:
652
self.chk_bytes.add_fallback_versioned_files(repository.chk_bytes)
654
def create_bundle(self, target, base, fileobj, format=None):
655
return serializer.write_bundle(self, target, base, fileobj, format)
657
@only_raises(errors.LockNotHeld, errors.LockBroken)
659
super(VersionedFileRepository, self).unlock()
660
if self.control_files._lock_count == 0:
661
self._inventory_entry_cache.clear()
663
def add_inventory(self, revision_id, inv, parents):
664
"""Add the inventory inv to the repository as revision_id.
666
:param parents: The revision ids of the parents that revision_id
667
is known to have and are in the repository already.
669
:returns: The validator(which is a sha1 digest, though what is sha'd is
670
repository format specific) of the serialized inventory.
672
if not self.is_in_write_group():
673
raise AssertionError("%r not in write group" % (self,))
674
_mod_revision.check_not_reserved_id(revision_id)
675
if not (inv.revision_id is None or inv.revision_id == revision_id):
676
raise AssertionError(
677
"Mismatch between inventory revision"
678
" id and insertion revid (%r, %r)"
679
% (inv.revision_id, revision_id))
681
raise errors.RootMissing()
682
return self._add_inventory_checked(revision_id, inv, parents)
684
def _add_inventory_checked(self, revision_id, inv, parents):
685
"""Add inv to the repository after checking the inputs.
687
This function can be overridden to allow different inventory styles.
689
:seealso: add_inventory, for the contract.
691
inv_lines = self._serializer.write_inventory_to_lines(inv)
692
return self._inventory_add_lines(revision_id, parents,
693
inv_lines, check_content=False)
695
def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
696
parents, basis_inv=None, propagate_caches=False):
697
"""Add a new inventory expressed as a delta against another revision.
699
See the inventory developers documentation for the theory behind
702
:param basis_revision_id: The inventory id the delta was created
703
against. (This does not have to be a direct parent.)
704
:param delta: The inventory delta (see Inventory.apply_delta for
706
:param new_revision_id: The revision id that the inventory is being
708
:param parents: The revision ids of the parents that revision_id is
709
known to have and are in the repository already. These are supplied
710
for repositories that depend on the inventory graph for revision
711
graph access, as well as for those that pun ancestry with delta
713
:param basis_inv: The basis inventory if it is already known,
715
:param propagate_caches: If True, the caches for this inventory are
716
copied to and updated for the result if possible.
718
:returns: (validator, new_inv)
719
The validator(which is a sha1 digest, though what is sha'd is
720
repository format specific) of the serialized inventory, and the
723
if not self.is_in_write_group():
724
raise AssertionError("%r not in write group" % (self,))
725
_mod_revision.check_not_reserved_id(new_revision_id)
726
basis_tree = self.revision_tree(basis_revision_id)
727
with basis_tree.lock_read():
728
# Note that this mutates the inventory of basis_tree, which not all
729
# inventory implementations may support: A better idiom would be to
730
# return a new inventory, but as there is no revision tree cache in
731
# repository this is safe for now - RBC 20081013
732
if basis_inv is None:
733
basis_inv = basis_tree.root_inventory
734
basis_inv.apply_delta(delta)
735
basis_inv.revision_id = new_revision_id
736
return (self.add_inventory(new_revision_id, basis_inv, parents),
739
def _inventory_add_lines(self, revision_id, parents, lines,
741
"""Store lines in inv_vf and return the sha1 of the inventory."""
742
parents = [(parent,) for parent in parents]
743
result = self.inventories.add_lines((revision_id,), parents, lines,
744
check_content=check_content)[0]
745
self.inventories._access.flush()
748
def add_revision(self, revision_id, rev, inv=None):
749
"""Add rev to the revision store as revision_id.
751
:param revision_id: the revision id to use.
752
:param rev: The revision object.
753
:param inv: The inventory for the revision. if None, it will be looked
754
up in the inventory storer
756
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
758
_mod_revision.check_not_reserved_id(revision_id)
759
# check inventory present
760
if not self.inventories.get_parent_map([(revision_id,)]):
762
raise errors.WeaveRevisionNotPresent(revision_id,
765
# yes, this is not suitable for adding with ghosts.
766
rev.inventory_sha1 = self.add_inventory(revision_id, inv,
770
rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]
771
self._add_revision(rev)
773
def _add_revision(self, revision):
774
lines = self._serializer.write_revision_to_lines(revision)
775
key = (revision.revision_id,)
776
parents = tuple((parent,) for parent in revision.parent_ids)
777
self.revisions.add_lines(key, parents, lines)
779
def _check_inventories(self, checker):
780
"""Check the inventories found from the revision scan.
782
This is responsible for verifying the sha1 of inventories and
783
creating a pending_keys set that covers data referenced by inventories.
785
with ui.ui_factory.nested_progress_bar() as bar:
786
self._do_check_inventories(checker, bar)
788
def _do_check_inventories(self, checker, bar):
789
"""Helper for _check_inventories."""
791
keys = {'chk_bytes': set(), 'inventories': set(), 'texts': set()}
792
kinds = ['chk_bytes', 'texts']
793
count = len(checker.pending_keys)
794
bar.update(gettext("inventories"), 0, 2)
795
current_keys = checker.pending_keys
796
checker.pending_keys = {}
797
# Accumulate current checks.
798
for key in current_keys:
799
if key[0] != 'inventories' and key[0] not in kinds:
800
checker._report_items.append('unknown key type %r' % (key,))
801
keys[key[0]].add(key[1:])
802
if keys['inventories']:
803
# NB: output order *should* be roughly sorted - topo or
804
# inverse topo depending on repository - either way decent
805
# to just delta against. However, pre-CHK formats didn't
806
# try to optimise inventory layout on disk. As such the
807
# pre-CHK code path does not use inventory deltas.
809
for record in self.inventories.check(keys=keys['inventories']):
810
if record.storage_kind == 'absent':
811
checker._report_items.append(
812
'Missing inventory {%s}' % (record.key,))
814
last_object = self._check_record('inventories', record,
815
checker, last_object,
816
current_keys[('inventories',) + record.key])
817
del keys['inventories']
820
bar.update(gettext("texts"), 1)
821
while (checker.pending_keys or keys['chk_bytes'] or
823
# Something to check.
824
current_keys = checker.pending_keys
825
checker.pending_keys = {}
826
# Accumulate current checks.
827
for key in current_keys:
828
if key[0] not in kinds:
829
checker._report_items.append(
830
'unknown key type %r' % (key,))
831
keys[key[0]].add(key[1:])
832
# Check the outermost kind only - inventories || chk_bytes || texts
836
for record in getattr(self, kind).check(keys=keys[kind]):
837
if record.storage_kind == 'absent':
838
checker._report_items.append(
839
'Missing %s {%s}' % (kind, record.key,))
841
last_object = self._check_record(kind, record,
842
checker, last_object, current_keys[(kind,) + record.key])
846
def _check_record(self, kind, record, checker, last_object, item_data):
847
"""Check a single text from this repository."""
848
if kind == 'inventories':
849
rev_id = record.key[0]
850
inv = self._deserialise_inventory(
851
rev_id, record.get_bytes_as('lines'))
852
if last_object is not None:
853
delta = inv._make_delta(last_object)
854
for old_path, path, file_id, ie in delta:
857
ie.check(checker, rev_id, inv)
859
for path, ie in inv.iter_entries():
860
ie.check(checker, rev_id, inv)
861
if self._format.fast_deltas:
863
elif kind == 'chk_bytes':
864
# No code written to check chk_bytes for this repo format.
865
checker._report_items.append(
866
'unsupported key type chk_bytes for %s' % (record.key,))
867
elif kind == 'texts':
868
self._check_text(record, checker, item_data)
870
checker._report_items.append(
871
'unknown key type %s for %s' % (kind, record.key))
873
def _check_text(self, record, checker, item_data):
874
"""Check a single text."""
875
# Check it is extractable.
876
# TODO: check length.
877
chunks = record.get_bytes_as('chunked')
878
sha1 = osutils.sha_strings(chunks)
879
length = sum(map(len, chunks))
880
if item_data and sha1 != item_data[1]:
881
checker._report_items.append(
882
'sha1 mismatch: %s has sha1 %s expected %s referenced by %s' %
883
(record.key, sha1, item_data[1], item_data[2]))
885
def _eliminate_revisions_not_present(self, revision_ids):
886
"""Check every revision id in revision_ids to see if we have it.
888
Returns a set of the present revisions.
890
with self.lock_read():
892
graph = self.get_graph()
893
parent_map = graph.get_parent_map(revision_ids)
894
# The old API returned a list, should this actually be a set?
895
return list(parent_map)
897
def __init__(self, _format, a_controldir, control_files):
898
"""Instantiate a VersionedFileRepository.
900
:param _format: The format of the repository on disk.
901
:param controldir: The ControlDir of the repository.
902
:param control_files: Control files to use for locking, etc.
904
# In the future we will have a single api for all stores for
905
# getting file texts, inventories and revisions, then
906
# this construct will accept instances of those things.
907
super(VersionedFileRepository, self).__init__(_format, a_controldir,
909
self._transport = control_files._transport
910
self.base = self._transport.base
912
self._reconcile_does_inventory_gc = True
913
self._reconcile_fixes_text_parents = False
914
self._reconcile_backsup_inventory = True
915
# An InventoryEntry cache, used during deserialization
916
self._inventory_entry_cache = fifo_cache.FIFOCache(10 * 1024)
917
# Is it safe to return inventory entries directly from the entry cache,
918
# rather copying them?
919
self._safe_to_return_from_cache = False
921
def fetch(self, source, revision_id=None, find_ghosts=False,
922
fetch_spec=None, lossy=False):
923
"""Fetch the content required to construct revision_id from source.
925
If revision_id is None and fetch_spec is None, then all content is
928
fetch() may not be used when the repository is in a write group -
929
either finish the current write group before using fetch, or use
930
fetch before starting the write group.
932
:param find_ghosts: Find and copy revisions in the source that are
933
ghosts in the target (and not reachable directly by walking out to
934
the first-present revision in target from revision_id).
935
:param revision_id: If specified, all the content needed for this
936
revision ID will be copied to the target. Fetch will determine for
937
itself which content needs to be copied.
938
:param fetch_spec: If specified, a SearchResult or
939
PendingAncestryResult that describes which revisions to copy. This
940
allows copying multiple heads at once. Mutually exclusive with
943
if fetch_spec is not None and revision_id is not None:
944
raise AssertionError(
945
"fetch_spec and revision_id are mutually exclusive.")
946
if self.is_in_write_group():
947
raise errors.InternalBzrError(
948
"May not fetch while in a write group.")
949
# fast path same-url fetch operations
950
# TODO: lift out to somewhere common with RemoteRepository
951
# <https://bugs.launchpad.net/bzr/+bug/401646>
952
if (self.has_same_location(source) and
953
fetch_spec is None and
954
self._has_same_fallbacks(source)):
955
# check that last_revision is in 'from' and then return a
957
if (revision_id is not None
958
and not _mod_revision.is_null(revision_id)):
959
self.get_revision(revision_id)
960
return FetchResult(0)
961
inter = InterRepository.get(source, self)
962
if (fetch_spec is not None
963
and not getattr(inter, "supports_fetch_spec", False)):
964
raise errors.UnsupportedOperation(
965
"fetch_spec not supported for %r" % inter)
966
return inter.fetch(revision_id=revision_id,
967
find_ghosts=find_ghosts, fetch_spec=fetch_spec,
970
def gather_stats(self, revid=None, committers=None):
971
"""See Repository.gather_stats()."""
972
with self.lock_read():
973
result = super(VersionedFileRepository,
974
self).gather_stats(revid, committers)
975
# now gather global repository information
976
# XXX: This is available for many repos regardless of listability.
977
if self.user_transport.listable():
978
# XXX: do we want to __define len__() ?
979
# Maybe the versionedfiles object should provide a different
980
# method to get the number of keys.
981
result['revisions'] = len(self.revisions.keys())
985
def get_commit_builder(self, branch, parents, config_stack, timestamp=None,
986
timezone=None, committer=None, revprops=None,
987
revision_id=None, lossy=False):
988
"""Obtain a CommitBuilder for this repository.
990
:param branch: Branch to commit to.
991
:param parents: Revision ids of the parents of the new revision.
992
:param config_stack: Configuration stack to use.
993
:param timestamp: Optional timestamp recorded for commit.
994
:param timezone: Optional timezone for timestamp.
995
:param committer: Optional committer to set for commit.
996
:param revprops: Optional dictionary of revision properties.
997
:param revision_id: Optional revision id.
998
:param lossy: Whether to discard data that can not be natively
999
represented, when pushing to a foreign VCS
1001
if self._fallback_repositories and not self._format.supports_chks:
1002
raise errors.BzrError("Cannot commit directly to a stacked branch"
1003
" in pre-2a formats. See "
1004
"https://bugs.launchpad.net/bzr/+bug/375013 for details.")
1005
result = self._commit_builder_class(self, parents, config_stack,
1006
timestamp, timezone, committer, revprops, revision_id,
1008
self.start_write_group()
1011
def get_missing_parent_inventories(self, check_for_missing_texts=True):
1012
"""Return the keys of missing inventory parents for revisions added in
1015
A revision is not complete if the inventory delta for that revision
1016
cannot be calculated. Therefore if the parent inventories of a
1017
revision are not present, the revision is incomplete, and e.g. cannot
1018
be streamed by a smart server. This method finds missing inventory
1019
parents for revisions added in this write group.
1021
if not self._format.supports_external_lookups:
1022
# This is only an issue for stacked repositories
1024
if not self.is_in_write_group():
1025
raise AssertionError('not in a write group')
1027
# XXX: We assume that every added revision already has its
1028
# corresponding inventory, so we only check for parent inventories that
1029
# might be missing, rather than all inventories.
1030
parents = set(self.revisions._index.get_missing_parents())
1031
parents.discard(_mod_revision.NULL_REVISION)
1032
unstacked_inventories = self.inventories._index
1033
present_inventories = unstacked_inventories.get_parent_map(
1034
key[-1:] for key in parents)
1035
parents.difference_update(present_inventories)
1036
if len(parents) == 0:
1037
# No missing parent inventories.
1039
if not check_for_missing_texts:
1040
return set(('inventories', rev_id) for (rev_id,) in parents)
1041
# Ok, now we have a list of missing inventories. But these only matter
1042
# if the inventories that reference them are missing some texts they
1043
# appear to introduce.
1044
# XXX: Texts referenced by all added inventories need to be present,
1045
# but at the moment we're only checking for texts referenced by
1046
# inventories at the graph's edge.
1047
key_deps = self.revisions._index._key_dependencies
1048
key_deps.satisfy_refs_for_keys(present_inventories)
1049
referrers = frozenset(r[0] for r in key_deps.get_referrers())
1050
file_ids = self.fileids_altered_by_revision_ids(referrers)
1051
missing_texts = set()
1052
for file_id, version_ids in file_ids.items():
1053
missing_texts.update(
1054
(file_id, version_id) for version_id in version_ids)
1055
present_texts = self.texts.get_parent_map(missing_texts)
1056
missing_texts.difference_update(present_texts)
1057
if not missing_texts:
1058
# No texts are missing, so all revisions and their deltas are
1061
# Alternatively the text versions could be returned as the missing
1062
# keys, but this is likely to be less data.
1063
missing_keys = set(('inventories', rev_id) for (rev_id,) in parents)
1066
def has_revisions(self, revision_ids):
1067
"""Probe to find out the presence of multiple revisions.
1069
:param revision_ids: An iterable of revision_ids.
1070
:return: A set of the revision_ids that were present.
1072
with self.lock_read():
1073
parent_map = self.revisions.get_parent_map(
1074
[(rev_id,) for rev_id in revision_ids])
1076
if _mod_revision.NULL_REVISION in revision_ids:
1077
result.add(_mod_revision.NULL_REVISION)
1078
result.update([key[0] for key in parent_map])
1081
def get_revision_reconcile(self, revision_id):
1082
"""'reconcile' helper routine that allows access to a revision always.
1084
This variant of get_revision does not cross check the weave graph
1085
against the revision one as get_revision does: but it should only
1086
be used by reconcile, or reconcile-alike commands that are correcting
1087
or testing the revision graph.
1089
with self.lock_read():
1090
return self.get_revisions([revision_id])[0]
1092
def iter_revisions(self, revision_ids):
1093
"""Iterate over revision objects.
1095
:param revision_ids: An iterable of revisions to examine. None may be
1096
passed to request all revisions known to the repository. Note that
1097
not all repositories can find unreferenced revisions; for those
1098
repositories only referenced ones will be returned.
1099
:return: An iterator of (revid, revision) tuples. Absent revisions (
1100
those asked for but not available) are returned as (revid, None).
1102
with self.lock_read():
1103
for rev_id in revision_ids:
1104
if not rev_id or not isinstance(rev_id, bytes):
1105
raise errors.InvalidRevisionId(
1106
revision_id=rev_id, branch=self)
1107
keys = [(key,) for key in revision_ids]
1108
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1109
for record in stream:
1110
revid = record.key[0]
1111
if record.storage_kind == 'absent':
1114
text = record.get_bytes_as('fulltext')
1115
rev = self._serializer.read_revision_from_string(text)
1118
def add_signature_text(self, revision_id, signature):
1119
"""Store a signature text for a revision.
1121
:param revision_id: Revision id of the revision
1122
:param signature: Signature text.
1124
with self.lock_write():
1125
self.signatures.add_lines((revision_id,), (),
1126
osutils.split_lines(signature))
1128
def sign_revision(self, revision_id, gpg_strategy):
1129
with self.lock_write():
1130
testament = Testament.from_revision(
1132
plaintext = testament.as_short_text()
1133
self.store_revision_signature(gpg_strategy, plaintext, revision_id)
1135
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
1136
with self.lock_write():
1137
signature = gpg_strategy.sign(plaintext, gpg.MODE_CLEAR)
1138
self.add_signature_text(revision_id, signature)
1140
def verify_revision_signature(self, revision_id, gpg_strategy):
1141
"""Verify the signature on a revision.
1143
:param revision_id: the revision to verify
1144
:gpg_strategy: the GPGStrategy object to used
1146
:return: gpg.SIGNATURE_VALID or a failed SIGNATURE_ value
1148
with self.lock_read():
1149
if not self.has_signature_for_revision_id(revision_id):
1150
return gpg.SIGNATURE_NOT_SIGNED, None
1151
signature = self.get_signature_text(revision_id)
1153
testament = Testament.from_revision(
1156
(status, key, signed_plaintext) = gpg_strategy.verify(signature)
1157
if testament.as_short_text() != signed_plaintext:
1158
return gpg.SIGNATURE_NOT_VALID, None
1159
return (status, key)
1161
def find_text_key_references(self):
1162
"""Find the text key references within the repository.
1164
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1165
to whether they were referred to by the inventory of the
1166
revision_id that they contain. The inventory texts from all present
1167
revision ids are assessed to generate this report.
1169
revision_keys = self.revisions.keys()
1170
w = self.inventories
1171
with ui.ui_factory.nested_progress_bar() as pb:
1172
return self._serializer._find_text_key_references(
1173
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
1175
def _inventory_xml_lines_for_keys(self, keys):
1176
"""Get a line iterator of the sort needed for findind references.
1178
Not relevant for non-xml inventory repositories.
1180
Ghosts in revision_keys are ignored.
1182
:param revision_keys: The revision keys for the inventories to inspect.
1183
:return: An iterator over (inventory line, revid) for the fulltexts of
1184
all of the xml inventories specified by revision_keys.
1186
stream = self.inventories.get_record_stream(keys, 'unordered', True)
1187
for record in stream:
1188
if record.storage_kind != 'absent':
1189
revid = record.key[-1]
1190
for line in record.get_bytes_as('lines'):
1193
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1195
"""Helper routine for fileids_altered_by_revision_ids.
1197
This performs the translation of xml lines to revision ids.
1199
:param line_iterator: An iterator of lines, origin_version_id
1200
:param revision_keys: The revision ids to filter for. This should be a
1201
set or other type which supports efficient __contains__ lookups, as
1202
the revision key from each parsed line will be looked up in the
1203
revision_keys filter.
1204
:return: a dictionary mapping altered file-ids to an iterable of
1205
revision_ids. Each altered file-ids has the exact revision_ids that
1206
altered it listed explicitly.
1208
seen = set(self._serializer._find_text_key_references(line_iterator))
1209
parent_keys = self._find_parent_keys_of_revisions(revision_keys)
1210
parent_seen = set(self._serializer._find_text_key_references(
1211
self._inventory_xml_lines_for_keys(parent_keys)))
1212
new_keys = seen - parent_seen
1214
setdefault = result.setdefault
1215
for key in new_keys:
1216
setdefault(key[0], set()).add(key[-1])
1219
def _find_parent_keys_of_revisions(self, revision_keys):
1220
"""Similar to _find_parent_ids_of_revisions, but used with keys.
1222
:param revision_keys: An iterable of revision_keys.
1223
:return: The parents of all revision_keys that are not already in
1226
parent_map = self.revisions.get_parent_map(revision_keys)
1227
parent_keys = set(itertools.chain.from_iterable(
1228
parent_map.values()))
1229
parent_keys.difference_update(revision_keys)
1230
parent_keys.discard(_mod_revision.NULL_REVISION)
1233
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1234
"""Find the file ids and versions affected by revisions.
1236
:param revisions: an iterable containing revision ids.
1237
:param _inv_weave: The inventory weave from this repository or None.
1238
If None, the inventory weave will be opened automatically.
1239
:return: a dictionary mapping altered file-ids to an iterable of
1240
revision_ids. Each altered file-ids has the exact revision_ids that
1241
altered it listed explicitly.
1243
selected_keys = set((revid,) for revid in revision_ids)
1244
w = _inv_weave or self.inventories
1245
return self._find_file_ids_from_xml_inventory_lines(
1246
w.iter_lines_added_or_present_in_keys(
1247
selected_keys, pb=None),
1250
def iter_files_bytes(self, desired_files):
1251
"""Iterate through file versions.
1253
Files will not necessarily be returned in the order they occur in
1254
desired_files. No specific order is guaranteed.
1256
Yields pairs of identifier, bytes_iterator. identifier is an opaque
1257
value supplied by the caller as part of desired_files. It should
1258
uniquely identify the file version in the caller's context. (Examples:
1259
an index number or a TreeTransform trans_id.)
1261
bytes_iterator is an iterable of bytestrings for the file. The
1262
kind of iterable and length of the bytestrings are unspecified, but for
1263
this implementation, it is a list of bytes produced by
1264
VersionedFile.get_record_stream().
1266
:param desired_files: a list of (file_id, revision_id, identifier)
1270
for file_id, revision_id, callable_data in desired_files:
1271
text_keys[(file_id, revision_id)] = callable_data
1272
for record in self.texts.get_record_stream(text_keys, 'unordered', True):
1273
if record.storage_kind == 'absent':
1274
raise errors.RevisionNotPresent(record.key[1], record.key[0])
1275
yield text_keys[record.key], record.get_bytes_as('chunked')
1277
def _generate_text_key_index(self, text_key_references=None,
1279
"""Generate a new text key index for the repository.
1281
This is an expensive function that will take considerable time to run.
1283
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1284
list of parents, also text keys. When a given key has no parents,
1285
the parents list will be [NULL_REVISION].
1287
# All revisions, to find inventory parents.
1288
if ancestors is None:
1289
graph = self.get_graph()
1290
ancestors = graph.get_parent_map(self.all_revision_ids())
1291
if text_key_references is None:
1292
text_key_references = self.find_text_key_references()
1293
with ui.ui_factory.nested_progress_bar() as pb:
1294
return self._do_generate_text_key_index(ancestors,
1295
text_key_references, pb)
1297
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1298
"""Helper for _generate_text_key_index to avoid deep nesting."""
1299
revision_order = tsort.topo_sort(ancestors)
1300
invalid_keys = set()
1302
for revision_id in revision_order:
1303
revision_keys[revision_id] = set()
1304
text_count = len(text_key_references)
1305
# a cache of the text keys to allow reuse; costs a dict of all the
1306
# keys, but saves a 2-tuple for every child of a given key.
1308
for text_key, valid in text_key_references.items():
1310
invalid_keys.add(text_key)
1312
revision_keys[text_key[1]].add(text_key)
1313
text_key_cache[text_key] = text_key
1314
del text_key_references
1316
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1317
NULL_REVISION = _mod_revision.NULL_REVISION
1318
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
1319
# too small for large or very branchy trees. However, for 55K path
1320
# trees, it would be easy to use too much memory trivially. Ideally we
1321
# could gauge this by looking at available real memory etc, but this is
1322
# always a tricky proposition.
1323
inventory_cache = lru_cache.LRUCache(10)
1324
batch_size = 10 # should be ~150MB on a 55K path tree
1325
batch_count = len(revision_order) // batch_size + 1
1327
pb.update(gettext("Calculating text parents"),
1328
processed_texts, text_count)
1329
for offset in range(batch_count):
1330
to_query = revision_order[offset * batch_size:(offset + 1)
1334
for revision_id in to_query:
1335
parent_ids = ancestors[revision_id]
1336
for text_key in revision_keys[revision_id]:
1337
pb.update(gettext("Calculating text parents"),
1339
processed_texts += 1
1340
candidate_parents = []
1341
for parent_id in parent_ids:
1342
parent_text_key = (text_key[0], parent_id)
1344
check_parent = parent_text_key not in \
1345
revision_keys[parent_id]
1347
# the parent parent_id is a ghost:
1348
check_parent = False
1349
# truncate the derived graph against this ghost.
1350
parent_text_key = None
1352
# look at the parent commit details inventories to
1353
# determine possible candidates in the per file graph.
1356
inv = inventory_cache[parent_id]
1358
inv = self.revision_tree(
1359
parent_id).root_inventory
1360
inventory_cache[parent_id] = inv
1362
parent_entry = inv.get_entry(text_key[0])
1363
except (KeyError, errors.NoSuchId):
1365
if parent_entry is not None:
1367
text_key[0], parent_entry.revision)
1369
parent_text_key = None
1370
if parent_text_key is not None:
1371
candidate_parents.append(
1372
text_key_cache[parent_text_key])
1373
parent_heads = text_graph.heads(candidate_parents)
1374
new_parents = list(parent_heads)
1375
new_parents.sort(key=lambda x: candidate_parents.index(x))
1376
if new_parents == []:
1377
new_parents = [NULL_REVISION]
1378
text_index[text_key] = new_parents
1380
for text_key in invalid_keys:
1381
text_index[text_key] = [NULL_REVISION]
1384
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
1385
"""Get an iterable listing the keys of all the data introduced by a set
1388
The keys will be ordered so that the corresponding items can be safely
1389
fetched and inserted in that order.
1391
:returns: An iterable producing tuples of (knit-kind, file-id,
1392
versions). knit-kind is one of 'file', 'inventory', 'signatures',
1393
'revisions'. file-id is None unless knit-kind is 'file'.
1395
for result in self._find_file_keys_to_fetch(revision_ids, _files_pb):
1398
for result in self._find_non_file_keys_to_fetch(revision_ids):
1401
def _find_file_keys_to_fetch(self, revision_ids, pb):
1402
# XXX: it's a bit weird to control the inventory weave caching in this
1403
# generator. Ideally the caching would be done in fetch.py I think. Or
1404
# maybe this generator should explicitly have the contract that it
1405
# should not be iterated until the previously yielded item has been
1407
inv_w = self.inventories
1409
# file ids that changed
1410
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
1412
num_file_ids = len(file_ids)
1413
for file_id, altered_versions in file_ids.items():
1415
pb.update(gettext("Fetch texts"), count, num_file_ids)
1417
yield ("file", file_id, altered_versions)
1419
def _find_non_file_keys_to_fetch(self, revision_ids):
1421
yield ("inventory", None, revision_ids)
1424
# XXX: Note ATM no callers actually pay attention to this return
1425
# instead they just use the list of revision ids and ignore
1426
# missing sigs. Consider removing this work entirely
1427
revisions_with_signatures = set(self.signatures.get_parent_map(
1428
[(r,) for r in revision_ids]))
1429
revisions_with_signatures = {r for (r,) in revisions_with_signatures}
1430
revisions_with_signatures.intersection_update(revision_ids)
1431
yield ("signatures", None, revisions_with_signatures)
1434
yield ("revisions", None, revision_ids)
1436
def get_inventory(self, revision_id):
1437
"""Get Inventory object by revision id."""
1438
with self.lock_read():
1439
return next(self.iter_inventories([revision_id]))
1441
def iter_inventories(self, revision_ids, ordering=None):
1442
"""Get many inventories by revision_ids.
1444
This will buffer some or all of the texts used in constructing the
1445
inventories in memory, but will only parse a single inventory at a
1448
:param revision_ids: The expected revision ids of the inventories.
1449
:param ordering: optional ordering, e.g. 'topological'. If not
1450
specified, the order of revision_ids will be preserved (by
1451
buffering if necessary).
1452
:return: An iterator of inventories.
1454
if ((None in revision_ids) or
1455
(_mod_revision.NULL_REVISION in revision_ids)):
1456
raise ValueError('cannot get null revision inventory')
1457
for inv, revid in self._iter_inventories(revision_ids, ordering):
1459
raise errors.NoSuchRevision(self, revid)
1462
def _iter_inventories(self, revision_ids, ordering):
1463
"""single-document based inventory iteration."""
1464
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
1465
for lines, revision_id in inv_xmls:
1467
yield None, revision_id
1469
yield self._deserialise_inventory(revision_id, lines), revision_id
1471
def _iter_inventory_xmls(self, revision_ids, ordering):
1472
if ordering is None:
1473
order_as_requested = True
1474
ordering = 'unordered'
1476
order_as_requested = False
1477
keys = [(revision_id,) for revision_id in revision_ids]
1480
if order_as_requested:
1481
key_iter = iter(keys)
1482
next_key = next(key_iter)
1483
stream = self.inventories.get_record_stream(keys, ordering, True)
1485
for record in stream:
1486
if record.storage_kind != 'absent':
1487
lines = record.get_bytes_as('lines')
1488
if order_as_requested:
1489
text_lines[record.key] = lines
1491
yield lines, record.key[-1]
1493
yield None, record.key[-1]
1494
if order_as_requested:
1495
# Yield as many results as we can while preserving order.
1496
while next_key in text_lines:
1497
lines = text_lines.pop(next_key)
1498
yield lines, next_key[-1]
1500
next_key = next(key_iter)
1501
except StopIteration:
1502
# We still want to fully consume the get_record_stream,
1503
# just in case it is not actually finished at this point
1507
def _deserialise_inventory(self, revision_id, xml):
1508
"""Transform the xml into an inventory object.
1510
:param revision_id: The expected revision id of the inventory.
1511
:param xml: A serialised inventory.
1513
result = self._serializer.read_inventory_from_lines(
1514
xml, revision_id, entry_cache=self._inventory_entry_cache,
1515
return_from_cache=self._safe_to_return_from_cache)
1516
if result.revision_id != revision_id:
1517
raise AssertionError('revision id mismatch %s != %s' % (
1518
result.revision_id, revision_id))
1521
def get_serializer_format(self):
1522
return self._serializer.format_num
1524
def _get_inventory_xml(self, revision_id):
1525
"""Get serialized inventory as a string."""
1526
with self.lock_read():
1527
texts = self._iter_inventory_xmls([revision_id], 'unordered')
1528
lines, revision_id = next(texts)
1530
raise errors.NoSuchRevision(self, revision_id)
1533
def revision_tree(self, revision_id):
1534
"""Return Tree for a revision on this branch.
1536
`revision_id` may be NULL_REVISION for the empty tree revision.
1538
revision_id = _mod_revision.ensure_null(revision_id)
1539
# TODO: refactor this to use an existing revision object
1540
# so we don't need to read it in twice.
1541
if revision_id == _mod_revision.NULL_REVISION:
1542
return inventorytree.InventoryRevisionTree(self,
1543
Inventory(root_id=None), _mod_revision.NULL_REVISION)
1545
with self.lock_read():
1546
inv = self.get_inventory(revision_id)
1547
return inventorytree.InventoryRevisionTree(self, inv, revision_id)
1549
def revision_trees(self, revision_ids):
1550
"""Return Trees for revisions in this repository.
1552
:param revision_ids: a sequence of revision-ids;
1553
a revision-id may not be None or b'null:'
1555
inventories = self.iter_inventories(revision_ids)
1556
for inv in inventories:
1557
yield inventorytree.InventoryRevisionTree(self, inv, inv.revision_id)
1559
def get_deltas_for_revisions(self, revisions, specific_fileids=None):
1560
"""Produce a generator of revision deltas.
1562
Note that the input is a sequence of REVISIONS, not revision_ids.
1563
Trees will be held in memory until the generator exits.
1564
Each delta is relative to the revision's lefthand predecessor.
1566
:param specific_fileids: if not None, the result is filtered
1567
so that only those file-ids, their parents and their
1568
children are included.
1570
# Get the revision-ids of interest
1571
required_trees = set()
1572
for revision in revisions:
1573
required_trees.add(revision.revision_id)
1574
required_trees.update(revision.parent_ids[:1])
1576
# Get the matching filtered trees. Note that it's more
1577
# efficient to pass filtered trees to changes_from() rather
1578
# than doing the filtering afterwards. changes_from() could
1579
# arguably do the filtering itself but it's path-based, not
1580
# file-id based, so filtering before or afterwards is
1582
if specific_fileids is None:
1583
trees = dict((t.get_revision_id(), t) for
1584
t in self.revision_trees(required_trees))
1586
trees = dict((t.get_revision_id(), t) for
1587
t in self._filtered_revision_trees(required_trees,
1590
# Calculate the deltas
1591
for revision in revisions:
1592
if not revision.parent_ids:
1593
old_tree = self.revision_tree(_mod_revision.NULL_REVISION)
1595
old_tree = trees[revision.parent_ids[0]]
1596
yield trees[revision.revision_id].changes_from(old_tree)
1598
def _filtered_revision_trees(self, revision_ids, file_ids):
1599
"""Return Tree for a revision on this branch with only some files.
1601
:param revision_ids: a sequence of revision-ids;
1602
a revision-id may not be None or b'null:'
1603
:param file_ids: if not None, the result is filtered
1604
so that only those file-ids, their parents and their
1605
children are included.
1607
inventories = self.iter_inventories(revision_ids)
1608
for inv in inventories:
1609
# Should we introduce a FilteredRevisionTree class rather
1610
# than pre-filter the inventory here?
1611
filtered_inv = inv.filter(file_ids)
1612
yield inventorytree.InventoryRevisionTree(self, filtered_inv, filtered_inv.revision_id)
1614
def get_parent_map(self, revision_ids):
1615
"""See graph.StackedParentsProvider.get_parent_map"""
1616
# revisions index works in keys; this just works in revisions
1617
# therefore wrap and unwrap
1620
for revision_id in revision_ids:
1621
if revision_id == _mod_revision.NULL_REVISION:
1622
result[revision_id] = ()
1623
elif revision_id is None:
1624
raise ValueError('get_parent_map(None) is not valid')
1626
query_keys.append((revision_id,))
1627
for (revision_id,), parent_keys in (
1628
self.revisions.get_parent_map(query_keys)).items():
1630
result[revision_id] = tuple([parent_revid
1631
for (parent_revid,) in parent_keys])
1633
result[revision_id] = (_mod_revision.NULL_REVISION,)
1636
def get_known_graph_ancestry(self, revision_ids):
1637
"""Return the known graph for a set of revision ids and their ancestors.
1639
st = static_tuple.StaticTuple
1640
revision_keys = [st(r_id).intern() for r_id in revision_ids]
1641
with self.lock_read():
1642
known_graph = self.revisions.get_known_graph_ancestry(
1644
return graph.GraphThunkIdsToKeys(known_graph)
1646
def get_file_graph(self):
1647
"""Return the graph walker for text revisions."""
1648
with self.lock_read():
1649
return graph.Graph(self.texts)
1651
def revision_ids_to_search_result(self, result_set):
1652
"""Convert a set of revision ids to a graph SearchResult."""
1653
result_parents = set(itertools.chain.from_iterable(
1654
self.get_graph().get_parent_map(result_set).values()))
1655
included_keys = result_set.intersection(result_parents)
1656
start_keys = result_set.difference(included_keys)
1657
exclude_keys = result_parents.difference(result_set)
1658
result = vf_search.SearchResult(start_keys, exclude_keys,
1659
len(result_set), result_set)
1662
def _get_versioned_file_checker(self, text_key_references=None,
1664
"""Return an object suitable for checking versioned files.
1666
:param text_key_references: if non-None, an already built
1667
dictionary mapping text keys ((fileid, revision_id) tuples)
1668
to whether they were referred to by the inventory of the
1669
revision_id that they contain. If None, this will be
1671
:param ancestors: Optional result from
1672
self.get_graph().get_parent_map(self.all_revision_ids()) if already
1675
return _VersionedFileChecker(self,
1676
text_key_references=text_key_references, ancestors=ancestors)
1678
def has_signature_for_revision_id(self, revision_id):
1679
"""Query for a revision signature for revision_id in the repository."""
1680
with self.lock_read():
1681
if not self.has_revision(revision_id):
1682
raise errors.NoSuchRevision(self, revision_id)
1683
sig_present = (1 == len(
1684
self.signatures.get_parent_map([(revision_id,)])))
1687
def get_signature_text(self, revision_id):
1688
"""Return the text for a signature."""
1689
with self.lock_read():
1690
stream = self.signatures.get_record_stream([(revision_id,)],
1692
record = next(stream)
1693
if record.storage_kind == 'absent':
1694
raise errors.NoSuchRevision(self, revision_id)
1695
return record.get_bytes_as('fulltext')
1697
def _check(self, revision_ids, callback_refs, check_repo):
1698
with self.lock_read():
1699
result = check.VersionedFileCheck(self, check_repo=check_repo)
1700
result.check(callback_refs)
1703
def _find_inconsistent_revision_parents(self, revisions_iterator=None):
1704
"""Find revisions with different parent lists in the revision object
1705
and in the index graph.
1707
:param revisions_iterator: None, or an iterator of (revid,
1708
Revision-or-None). This iterator controls the revisions checked.
1709
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
1710
parents-in-revision).
1712
if not self.is_locked():
1713
raise AssertionError()
1715
if revisions_iterator is None:
1716
revisions_iterator = self.iter_revisions(self.all_revision_ids())
1717
for revid, revision in revisions_iterator:
1718
if revision is None:
1720
parent_map = vf.get_parent_map([(revid,)])
1721
parents_according_to_index = tuple(parent[-1] for parent in
1722
parent_map[(revid,)])
1723
parents_according_to_revision = tuple(revision.parent_ids)
1724
if parents_according_to_index != parents_according_to_revision:
1725
yield (revid, parents_according_to_index,
1726
parents_according_to_revision)
1728
def _check_for_inconsistent_revision_parents(self):
1729
inconsistencies = list(self._find_inconsistent_revision_parents())
1731
raise errors.BzrCheckError(
1732
"Revision knit has inconsistent parents.")
1734
def _get_sink(self):
1735
"""Return a sink for streaming into this repository."""
1736
return StreamSink(self)
1738
def _get_source(self, to_format):
1739
"""Return a source for streaming from this repository."""
1740
return StreamSource(self, to_format)
1742
def reconcile(self, other=None, thorough=False):
1743
"""Reconcile this repository."""
1744
from .reconcile import VersionedFileRepoReconciler
1745
with self.lock_write():
1746
reconciler = VersionedFileRepoReconciler(self, thorough=thorough)
1747
return reconciler.reconcile()
1750
class MetaDirVersionedFileRepository(MetaDirRepository,
1751
VersionedFileRepository):
1752
"""Repositories in a meta-dir, that work via versioned file objects."""
1754
def __init__(self, _format, a_controldir, control_files):
1755
super(MetaDirVersionedFileRepository, self).__init__(_format, a_controldir,
1759
class MetaDirVersionedFileRepositoryFormat(RepositoryFormatMetaDir,
1760
VersionedFileRepositoryFormat):
1761
"""Base class for repository formats using versioned files in metadirs."""
1764
class StreamSink(object):
1765
"""An object that can insert a stream into a repository.
1767
This interface handles the complexity of reserialising inventories and
1768
revisions from different formats, and allows unidirectional insertion into
1769
stacked repositories without looking for the missing basis parents
1773
def __init__(self, target_repo):
1774
self.target_repo = target_repo
1776
def insert_missing_keys(self, source, missing_keys):
1777
"""Insert missing keys from another source.
1779
:param source: StreamSource to stream from
1780
:param missing_keys: Keys to insert
1781
:return: keys still missing
1783
stream = source.get_stream_for_missing_keys(missing_keys)
1784
return self.insert_stream_without_locking(stream,
1785
self.target_repo._format)
1787
def insert_stream(self, stream, src_format, resume_tokens):
1788
"""Insert a stream's content into the target repository.
1790
:param src_format: a bzr repository format.
1792
:return: a list of resume tokens and an iterable of keys additional
1793
items required before the insertion can be completed.
1795
with self.target_repo.lock_write():
1797
self.target_repo.resume_write_group(resume_tokens)
1800
self.target_repo.start_write_group()
1803
# locked_insert_stream performs a commit|suspend.
1804
missing_keys = self.insert_stream_without_locking(stream,
1805
src_format, is_resume)
1807
# suspend the write group and tell the caller what we is
1808
# missing. We know we can suspend or else we would not have
1809
# entered this code path. (All repositories that can handle
1810
# missing keys can handle suspending a write group).
1811
write_group_tokens = self.target_repo.suspend_write_group()
1812
return write_group_tokens, missing_keys
1813
hint = self.target_repo.commit_write_group()
1814
to_serializer = self.target_repo._format._serializer
1815
src_serializer = src_format._serializer
1816
if (to_serializer != src_serializer
1817
and self.target_repo._format.pack_compresses):
1818
self.target_repo.pack(hint=hint)
1821
self.target_repo.abort_write_group(suppress_errors=True)
1824
def insert_stream_without_locking(self, stream, src_format,
1826
"""Insert a stream's content into the target repository.
1828
This assumes that you already have a locked repository and an active
1831
:param src_format: a bzr repository format.
1832
:param is_resume: Passed down to get_missing_parent_inventories to
1833
indicate if we should be checking for missing texts at the same
1836
:return: A set of keys that are missing.
1838
if not self.target_repo.is_write_locked():
1839
raise errors.ObjectNotLocked(self)
1840
if not self.target_repo.is_in_write_group():
1841
raise errors.BzrError('you must already be in a write group')
1842
to_serializer = self.target_repo._format._serializer
1843
src_serializer = src_format._serializer
1845
if to_serializer == src_serializer:
1846
# If serializers match and the target is a pack repository, set the
1847
# write cache size on the new pack. This avoids poor performance
1848
# on transports where append is unbuffered (such as
1849
# RemoteTransport). This is safe to do because nothing should read
1850
# back from the target repository while a stream with matching
1851
# serialization is being inserted.
1852
# The exception is that a delta record from the source that should
1853
# be a fulltext may need to be expanded by the target (see
1854
# test_fetch_revisions_with_deltas_into_pack); but we take care to
1855
# explicitly flush any buffered writes first in that rare case.
1857
new_pack = self.target_repo._pack_collection._new_pack
1858
except AttributeError:
1859
# Not a pack repository
1862
new_pack.set_write_cache_size(1024 * 1024)
1863
for substream_type, substream in stream:
1864
if 'stream' in debug.debug_flags:
1865
mutter('inserting substream: %s', substream_type)
1866
if substream_type == 'texts':
1867
self.target_repo.texts.insert_record_stream(substream)
1868
elif substream_type == 'inventories':
1869
if src_serializer == to_serializer:
1870
self.target_repo.inventories.insert_record_stream(
1873
self._extract_and_insert_inventories(
1874
substream, src_serializer)
1875
elif substream_type == 'inventory-deltas':
1876
self._extract_and_insert_inventory_deltas(
1877
substream, src_serializer)
1878
elif substream_type == 'chk_bytes':
1879
# XXX: This doesn't support conversions, as it assumes the
1880
# conversion was done in the fetch code.
1881
self.target_repo.chk_bytes.insert_record_stream(substream)
1882
elif substream_type == 'revisions':
1883
# This may fallback to extract-and-insert more often than
1884
# required if the serializers are different only in terms of
1886
if src_serializer == to_serializer:
1887
self.target_repo.revisions.insert_record_stream(substream)
1889
self._extract_and_insert_revisions(substream,
1891
elif substream_type == 'signatures':
1892
self.target_repo.signatures.insert_record_stream(substream)
1894
raise AssertionError('kaboom! %s' % (substream_type,))
1895
# Done inserting data, and the missing_keys calculations will try to
1896
# read back from the inserted data, so flush the writes to the new pack
1897
# (if this is pack format).
1898
if new_pack is not None:
1899
new_pack._write_data(b'', flush=True)
1900
# Find all the new revisions (including ones from resume_tokens)
1901
missing_keys = self.target_repo.get_missing_parent_inventories(
1902
check_for_missing_texts=is_resume)
1904
for prefix, versioned_file in (
1905
('texts', self.target_repo.texts),
1906
('inventories', self.target_repo.inventories),
1907
('revisions', self.target_repo.revisions),
1908
('signatures', self.target_repo.signatures),
1909
('chk_bytes', self.target_repo.chk_bytes),
1911
if versioned_file is None:
1913
# TODO: key is often going to be a StaticTuple object
1914
# I don't believe we can define a method by which
1915
# (prefix,) + StaticTuple will work, though we could
1916
# define a StaticTuple.sq_concat that would allow you to
1917
# pass in either a tuple or a StaticTuple as the second
1918
# object, so instead we could have:
1919
# StaticTuple(prefix) + key here...
1920
missing_keys.update((prefix,) + key for key in
1921
versioned_file.get_missing_compression_parent_keys())
1922
except NotImplementedError:
1923
# cannot even attempt suspending, and missing would have failed
1924
# during stream insertion.
1925
missing_keys = set()
1928
def _extract_and_insert_inventory_deltas(self, substream, serializer):
1929
target_rich_root = self.target_repo._format.rich_root_data
1930
target_tree_refs = self.target_repo._format.supports_tree_reference
1931
for record in substream:
1932
# Insert the delta directly
1933
inventory_delta_bytes = record.get_bytes_as('lines')
1934
deserialiser = inventory_delta.InventoryDeltaDeserializer()
1936
parse_result = deserialiser.parse_text_bytes(
1937
inventory_delta_bytes)
1938
except inventory_delta.IncompatibleInventoryDelta as err:
1939
mutter("Incompatible delta: %s", err.msg)
1940
raise errors.IncompatibleRevision(self.target_repo._format)
1941
basis_id, new_id, rich_root, tree_refs, inv_delta = parse_result
1942
revision_id = new_id
1943
parents = [key[0] for key in record.parents]
1944
self.target_repo.add_inventory_by_delta(
1945
basis_id, inv_delta, revision_id, parents)
1947
def _extract_and_insert_inventories(self, substream, serializer,
1949
"""Generate a new inventory versionedfile in target, converting data.
1951
The inventory is retrieved from the source, (deserializing it), and
1952
stored in the target (reserializing it in a different format).
1954
target_rich_root = self.target_repo._format.rich_root_data
1955
target_tree_refs = self.target_repo._format.supports_tree_reference
1956
for record in substream:
1957
# It's not a delta, so it must be a fulltext in the source
1958
# serializer's format.
1959
lines = record.get_bytes_as('lines')
1960
revision_id = record.key[0]
1961
inv = serializer.read_inventory_from_lines(lines, revision_id)
1962
parents = [key[0] for key in record.parents]
1963
self.target_repo.add_inventory(revision_id, inv, parents)
1964
# No need to keep holding this full inv in memory when the rest of
1965
# the substream is likely to be all deltas.
1968
def _extract_and_insert_revisions(self, substream, serializer):
1969
for record in substream:
1970
bytes = record.get_bytes_as('fulltext')
1971
revision_id = record.key[0]
1972
rev = serializer.read_revision_from_string(bytes)
1973
if rev.revision_id != revision_id:
1974
raise AssertionError('wtf: %s != %s' % (rev, revision_id))
1975
self.target_repo.add_revision(revision_id, rev)
1978
if self.target_repo._format._fetch_reconcile:
1979
self.target_repo.reconcile()
1982
class StreamSource(object):
1983
"""A source of a stream for fetching between repositories."""
1985
def __init__(self, from_repository, to_format):
1986
"""Create a StreamSource streaming from from_repository."""
1987
self.from_repository = from_repository
1988
self.to_format = to_format
1989
self._record_counter = RecordCounter()
1991
def delta_on_metadata(self):
1992
"""Return True if delta's are permitted on metadata streams.
1994
That is on revisions and signatures.
1996
src_serializer = self.from_repository._format._serializer
1997
target_serializer = self.to_format._serializer
1998
return (self.to_format._fetch_uses_deltas
1999
and src_serializer == target_serializer)
2001
def _fetch_revision_texts(self, revs):
2002
# fetch signatures first and then the revision texts
2003
# may need to be a InterRevisionStore call here.
2004
from_sf = self.from_repository.signatures
2005
# A missing signature is just skipped.
2006
keys = [(rev_id,) for rev_id in revs]
2007
signatures = versionedfile.filter_absent(from_sf.get_record_stream(
2009
self.to_format._fetch_order,
2010
not self.to_format._fetch_uses_deltas))
2011
# If a revision has a delta, this is actually expanded inside the
2012
# insert_record_stream code now, which is an alternate fix for
2014
from_rf = self.from_repository.revisions
2015
revisions = from_rf.get_record_stream(
2017
self.to_format._fetch_order,
2018
not self.delta_on_metadata())
2019
return [('signatures', signatures), ('revisions', revisions)]
2021
def _generate_root_texts(self, revs):
2022
"""This will be called by get_stream between fetching weave texts and
2023
fetching the inventory weave.
2025
if self._rich_root_upgrade():
2026
return _mod_fetch.Inter1and2Helper(
2027
self.from_repository).generate_root_texts(revs)
2031
def get_stream(self, search):
2033
revs = search.get_keys()
2034
graph = self.from_repository.get_graph()
2035
revs = tsort.topo_sort(graph.get_parent_map(revs))
2036
data_to_fetch = self.from_repository.item_keys_introduced_by(revs)
2038
for knit_kind, file_id, revisions in data_to_fetch:
2039
if knit_kind != phase:
2041
# Make a new progress bar for this phase
2042
if knit_kind == "file":
2043
# Accumulate file texts
2044
text_keys.extend([(file_id, revision) for revision in
2046
elif knit_kind == "inventory":
2047
# Now copy the file texts.
2048
from_texts = self.from_repository.texts
2049
yield ('texts', from_texts.get_record_stream(
2050
text_keys, self.to_format._fetch_order,
2051
not self.to_format._fetch_uses_deltas))
2052
# Cause an error if a text occurs after we have done the
2055
# Before we process the inventory we generate the root
2056
# texts (if necessary) so that the inventories references
2058
for _ in self._generate_root_texts(revs):
2060
# we fetch only the referenced inventories because we do not
2061
# know for unselected inventories whether all their required
2062
# texts are present in the other repository - it could be
2064
for info in self._get_inventory_stream(revs):
2066
elif knit_kind == "signatures":
2067
# Nothing to do here; this will be taken care of when
2068
# _fetch_revision_texts happens.
2070
elif knit_kind == "revisions":
2071
for record in self._fetch_revision_texts(revs):
2074
raise AssertionError("Unknown knit kind %r" % knit_kind)
2076
def get_stream_for_missing_keys(self, missing_keys):
2077
# missing keys can only occur when we are byte copying and not
2078
# translating (because translation means we don't send
2079
# unreconstructable deltas ever).
2081
keys['texts'] = set()
2082
keys['revisions'] = set()
2083
keys['inventories'] = set()
2084
keys['chk_bytes'] = set()
2085
keys['signatures'] = set()
2086
for key in missing_keys:
2087
keys[key[0]].add(key[1:])
2088
if len(keys['revisions']):
2089
# If we allowed copying revisions at this point, we could end up
2090
# copying a revision without copying its required texts: a
2091
# violation of the requirements for repository integrity.
2092
raise AssertionError(
2093
'cannot copy revisions to fill in missing deltas %s' % (
2094
keys['revisions'],))
2095
for substream_kind, keys in keys.items():
2096
vf = getattr(self.from_repository, substream_kind)
2097
if vf is None and keys:
2098
raise AssertionError(
2099
"cannot fill in keys for a versioned file we don't"
2100
" have: %s needs %s" % (substream_kind, keys))
2102
# No need to stream something we don't have
2104
if substream_kind == 'inventories':
2105
# Some missing keys are genuinely ghosts, filter those out.
2106
present = self.from_repository.inventories.get_parent_map(keys)
2107
revs = [key[0] for key in present]
2108
# Get the inventory stream more-or-less as we do for the
2109
# original stream; there's no reason to assume that records
2110
# direct from the source will be suitable for the sink. (Think
2111
# e.g. 2a -> 1.9-rich-root).
2112
for info in self._get_inventory_stream(revs, missing=True):
2116
# Ask for full texts always so that we don't need more round trips
2117
# after this stream.
2118
# Some of the missing keys are genuinely ghosts, so filter absent
2119
# records. The Sink is responsible for doing another check to
2120
# ensure that ghosts don't introduce missing data for future
2122
stream = versionedfile.filter_absent(vf.get_record_stream(keys,
2123
self.to_format._fetch_order, True))
2124
yield substream_kind, stream
2126
def inventory_fetch_order(self):
2127
if self._rich_root_upgrade():
2128
return 'topological'
2130
return self.to_format._fetch_order
2132
def _rich_root_upgrade(self):
2133
return (not self.from_repository._format.rich_root_data
2134
and self.to_format.rich_root_data)
2136
def _get_inventory_stream(self, revision_ids, missing=False):
2137
from_format = self.from_repository._format
2138
if (from_format.supports_chks and self.to_format.supports_chks
2139
and from_format.network_name() == self.to_format.network_name()):
2140
raise AssertionError(
2141
"this case should be handled by GroupCHKStreamSource")
2142
elif 'forceinvdeltas' in debug.debug_flags:
2143
return self._get_convertable_inventory_stream(revision_ids,
2144
delta_versus_null=missing)
2145
elif from_format.network_name() == self.to_format.network_name():
2147
return self._get_simple_inventory_stream(revision_ids,
2149
elif (not from_format.supports_chks and not self.to_format.supports_chks and
2150
from_format._serializer == self.to_format._serializer):
2151
# Essentially the same format.
2152
return self._get_simple_inventory_stream(revision_ids,
2155
# Any time we switch serializations, we want to use an
2156
# inventory-delta based approach.
2157
return self._get_convertable_inventory_stream(revision_ids,
2158
delta_versus_null=missing)
2160
def _get_simple_inventory_stream(self, revision_ids, missing=False):
2161
# NB: This currently reopens the inventory weave in source;
2162
# using a single stream interface instead would avoid this.
2163
from_weave = self.from_repository.inventories
2165
delta_closure = True
2167
delta_closure = not self.delta_on_metadata()
2168
yield ('inventories', from_weave.get_record_stream(
2169
[(rev_id,) for rev_id in revision_ids],
2170
self.inventory_fetch_order(), delta_closure))
2172
def _get_convertable_inventory_stream(self, revision_ids,
2173
delta_versus_null=False):
2174
# The two formats are sufficiently different that there is no fast
2175
# path, so we need to send just inventorydeltas, which any
2176
# sufficiently modern client can insert into any repository.
2177
# The StreamSink code expects to be able to
2178
# convert on the target, so we need to put bytes-on-the-wire that can
2179
# be converted. That means inventory deltas (if the remote is <1.19,
2180
# RemoteStreamSink will fallback to VFS to insert the deltas).
2181
yield ('inventory-deltas',
2182
self._stream_invs_as_deltas(revision_ids,
2183
delta_versus_null=delta_versus_null))
2185
def _stream_invs_as_deltas(self, revision_ids, delta_versus_null=False):
2186
"""Return a stream of inventory-deltas for the given rev ids.
2188
:param revision_ids: The list of inventories to transmit
2189
:param delta_versus_null: Don't try to find a minimal delta for this
2190
entry, instead compute the delta versus the NULL_REVISION. This
2191
effectively streams a complete inventory. Used for stuff like
2192
filling in missing parents, etc.
2194
from_repo = self.from_repository
2195
revision_keys = [(rev_id,) for rev_id in revision_ids]
2196
parent_map = from_repo.inventories.get_parent_map(revision_keys)
2197
# XXX: possibly repos could implement a more efficient iter_inv_deltas
2199
inventories = self.from_repository.iter_inventories(
2200
revision_ids, 'topological')
2201
format = from_repo._format
2202
invs_sent_so_far = {_mod_revision.NULL_REVISION}
2203
inventory_cache = lru_cache.LRUCache(50)
2204
null_inventory = from_repo.revision_tree(
2205
_mod_revision.NULL_REVISION).root_inventory
2206
# XXX: ideally the rich-root/tree-refs flags would be per-revision, not
2207
# per-repo (e.g. streaming a non-rich-root revision out of a rich-root
2208
# repo back into a non-rich-root repo ought to be allowed)
2209
serializer = inventory_delta.InventoryDeltaSerializer(
2210
versioned_root=format.rich_root_data,
2211
tree_references=format.supports_tree_reference)
2212
for inv in inventories:
2213
key = (inv.revision_id,)
2214
parent_keys = parent_map.get(key, ())
2216
if not delta_versus_null and parent_keys:
2217
# The caller did not ask for complete inventories and we have
2218
# some parents that we can delta against. Make a delta against
2219
# each parent so that we can find the smallest.
2220
parent_ids = [parent_key[0] for parent_key in parent_keys]
2221
for parent_id in parent_ids:
2222
if parent_id not in invs_sent_so_far:
2223
# We don't know that the remote side has this basis, so
2226
if parent_id == _mod_revision.NULL_REVISION:
2227
parent_inv = null_inventory
2229
parent_inv = inventory_cache.get(parent_id, None)
2230
if parent_inv is None:
2231
parent_inv = from_repo.get_inventory(parent_id)
2232
candidate_delta = inv._make_delta(parent_inv)
2234
or len(delta) > len(candidate_delta)):
2235
delta = candidate_delta
2236
basis_id = parent_id
2238
# Either none of the parents ended up being suitable, or we
2239
# were asked to delta against NULL
2240
basis_id = _mod_revision.NULL_REVISION
2241
delta = inv._make_delta(null_inventory)
2242
invs_sent_so_far.add(inv.revision_id)
2243
inventory_cache[inv.revision_id] = inv
2244
delta_serialized = serializer.delta_to_lines(basis_id, key[-1], delta)
2245
yield versionedfile.ChunkedContentFactory(
2246
key, parent_keys, None, delta_serialized, chunks_are_lines=True)
2249
class _VersionedFileChecker(object):
2251
def __init__(self, repository, text_key_references=None, ancestors=None):
2252
self.repository = repository
2253
self.text_index = self.repository._generate_text_key_index(
2254
text_key_references=text_key_references, ancestors=ancestors)
2256
def calculate_file_version_parents(self, text_key):
2257
"""Calculate the correct parents for a file version according to
2260
parent_keys = self.text_index[text_key]
2261
if parent_keys == [_mod_revision.NULL_REVISION]:
2263
return tuple(parent_keys)
2265
def check_file_version_parents(self, texts, progress_bar=None):
2266
"""Check the parents stored in a versioned file are correct.
2268
It also detects file versions that are not referenced by their
2269
corresponding revision's inventory.
2271
:returns: A tuple of (wrong_parents, dangling_file_versions).
2272
wrong_parents is a dict mapping {revision_id: (stored_parents,
2273
correct_parents)} for each revision_id where the stored parents
2274
are not correct. dangling_file_versions is a set of (file_id,
2275
revision_id) tuples for versions that are present in this versioned
2276
file, but not used by the corresponding inventory.
2278
local_progress = None
2279
if progress_bar is None:
2280
local_progress = ui.ui_factory.nested_progress_bar()
2281
progress_bar = local_progress
2283
return self._check_file_version_parents(texts, progress_bar)
2286
local_progress.finished()
2288
def _check_file_version_parents(self, texts, progress_bar):
2289
"""See check_file_version_parents."""
2291
self.file_ids = {file_id for file_id, _ in self.text_index}
2292
# text keys is now grouped by file_id
2293
n_versions = len(self.text_index)
2294
progress_bar.update(gettext('loading text store'), 0, n_versions)
2295
parent_map = self.repository.texts.get_parent_map(self.text_index)
2296
# On unlistable transports this could well be empty/error...
2297
text_keys = self.repository.texts.keys()
2298
unused_keys = frozenset(text_keys) - set(self.text_index)
2299
for num, key in enumerate(self.text_index):
2300
progress_bar.update(
2301
gettext('checking text graph'), num, n_versions)
2302
correct_parents = self.calculate_file_version_parents(key)
2304
knit_parents = parent_map[key]
2305
except errors.RevisionNotPresent:
2308
if correct_parents != knit_parents:
2309
wrong_parents[key] = (knit_parents, correct_parents)
2310
return wrong_parents, unused_keys
2313
class InterVersionedFileRepository(InterRepository):
2315
_walk_to_common_revisions_batch_size = 50
2317
supports_fetch_spec = True
2319
def fetch(self, revision_id=None, find_ghosts=False,
2320
fetch_spec=None, lossy=False):
2321
"""Fetch the content required to construct revision_id.
2323
The content is copied from self.source to self.target.
2325
:param revision_id: if None all content is copied, if NULL_REVISION no
2330
raise errors.LossyPushToSameVCS(self.source, self.target)
2331
if self.target._format.experimental:
2332
ui.ui_factory.show_user_warning(
2333
'experimental_format_fetch',
2334
from_format=self.source._format,
2335
to_format=self.target._format)
2336
from breezy.bzr.fetch import RepoFetcher
2337
# See <https://launchpad.net/bugs/456077> asking for a warning here
2338
if self.source._format.network_name() != self.target._format.network_name():
2339
ui.ui_factory.show_user_warning(
2340
'cross_format_fetch', from_format=self.source._format,
2341
to_format=self.target._format)
2342
with self.lock_write():
2343
f = RepoFetcher(to_repository=self.target,
2344
from_repository=self.source,
2345
last_revision=revision_id,
2346
fetch_spec=fetch_spec,
2347
find_ghosts=find_ghosts)
2348
return FetchResult()
2350
def _walk_to_common_revisions(self, revision_ids, if_present_ids=None):
2351
"""Walk out from revision_ids in source to revisions target has.
2353
:param revision_ids: The start point for the search.
2354
:return: A set of revision ids.
2356
target_graph = self.target.get_graph()
2357
revision_ids = frozenset(revision_ids)
2359
all_wanted_revs = revision_ids.union(if_present_ids)
2361
all_wanted_revs = revision_ids
2362
missing_revs = set()
2363
source_graph = self.source.get_graph()
2364
# ensure we don't pay silly lookup costs.
2365
searcher = source_graph._make_breadth_first_searcher(all_wanted_revs)
2366
null_set = frozenset([_mod_revision.NULL_REVISION])
2367
searcher_exhausted = False
2371
# Iterate the searcher until we have enough next_revs
2372
while len(next_revs) < self._walk_to_common_revisions_batch_size:
2374
next_revs_part, ghosts_part = searcher.next_with_ghosts()
2375
next_revs.update(next_revs_part)
2376
ghosts.update(ghosts_part)
2377
except StopIteration:
2378
searcher_exhausted = True
2380
# If there are ghosts in the source graph, and the caller asked for
2381
# them, make sure that they are present in the target.
2382
# We don't care about other ghosts as we can't fetch them and
2383
# haven't been asked to.
2384
ghosts_to_check = set(revision_ids.intersection(ghosts))
2385
revs_to_get = set(next_revs).union(ghosts_to_check)
2387
have_revs = set(target_graph.get_parent_map(revs_to_get))
2388
# we always have NULL_REVISION present.
2389
have_revs = have_revs.union(null_set)
2390
# Check if the target is missing any ghosts we need.
2391
ghosts_to_check.difference_update(have_revs)
2393
# One of the caller's revision_ids is a ghost in both the
2394
# source and the target.
2395
raise errors.NoSuchRevision(
2396
self.source, ghosts_to_check.pop())
2397
missing_revs.update(next_revs - have_revs)
2398
# Because we may have walked past the original stop point, make
2399
# sure everything is stopped
2400
stop_revs = searcher.find_seen_ancestors(have_revs)
2401
searcher.stop_searching_any(stop_revs)
2402
if searcher_exhausted:
2404
(started_keys, excludes, included_keys) = searcher.get_state()
2405
return vf_search.SearchResult(started_keys, excludes,
2406
len(included_keys), included_keys)
2408
def search_missing_revision_ids(self,
2409
find_ghosts=True, revision_ids=None, if_present_ids=None,
2411
"""Return the revision ids that source has that target does not.
2413
:param revision_ids: return revision ids included by these
2414
revision_ids. NoSuchRevision will be raised if any of these
2415
revisions are not present.
2416
:param if_present_ids: like revision_ids, but will not cause
2417
NoSuchRevision if any of these are absent, instead they will simply
2418
not be in the result. This is useful for e.g. finding revisions
2419
to fetch for tags, which may reference absent revisions.
2420
:param find_ghosts: If True find missing revisions in deep history
2421
rather than just finding the surface difference.
2422
:return: A breezy.graph.SearchResult.
2424
with self.lock_read():
2425
# stop searching at found target revisions.
2426
if not find_ghosts and (revision_ids is not None or if_present_ids is
2428
result = self._walk_to_common_revisions(revision_ids,
2429
if_present_ids=if_present_ids)
2432
result_set = result.get_keys()
2434
# generic, possibly worst case, slow code path.
2435
target_ids = set(self.target.all_revision_ids())
2436
source_ids = self._present_source_revisions_for(
2437
revision_ids, if_present_ids)
2438
result_set = set(source_ids).difference(target_ids)
2439
if limit is not None:
2440
topo_ordered = self.source.get_graph().iter_topo_order(result_set)
2441
result_set = set(itertools.islice(topo_ordered, limit))
2442
return self.source.revision_ids_to_search_result(result_set)
2444
def _present_source_revisions_for(self, revision_ids, if_present_ids=None):
2445
"""Returns set of all revisions in ancestry of revision_ids present in
2448
:param revision_ids: if None, all revisions in source are returned.
2449
:param if_present_ids: like revision_ids, but if any/all of these are
2450
absent no error is raised.
2452
if revision_ids is not None or if_present_ids is not None:
2453
# First, ensure all specified revisions exist. Callers expect
2454
# NoSuchRevision when they pass absent revision_ids here.
2455
if revision_ids is None:
2456
revision_ids = set()
2457
if if_present_ids is None:
2458
if_present_ids = set()
2459
revision_ids = set(revision_ids)
2460
if_present_ids = set(if_present_ids)
2461
all_wanted_ids = revision_ids.union(if_present_ids)
2462
graph = self.source.get_graph()
2463
present_revs = set(graph.get_parent_map(all_wanted_ids))
2464
missing = revision_ids.difference(present_revs)
2466
raise errors.NoSuchRevision(self.source, missing.pop())
2467
found_ids = all_wanted_ids.intersection(present_revs)
2468
source_ids = [rev_id for (rev_id, parents) in
2469
graph.iter_ancestry(found_ids)
2470
if rev_id != _mod_revision.NULL_REVISION and
2471
parents is not None]
2473
source_ids = self.source.all_revision_ids()
2474
return set(source_ids)
2477
def _get_repo_format_to_test(self):
2481
def is_compatible(cls, source, target):
2482
# The default implementation is compatible with everything
2483
return (source._format.supports_full_versioned_files
2484
and target._format.supports_full_versioned_files)
2487
class InterDifferingSerializer(InterVersionedFileRepository):
2490
def _get_repo_format_to_test(self):
2494
def is_compatible(source, target):
2495
if not source._format.supports_full_versioned_files:
2497
if not target._format.supports_full_versioned_files:
2499
# This is redundant with format.check_conversion_target(), however that
2500
# raises an exception, and we just want to say "False" as in we won't
2501
# support converting between these formats.
2502
if 'IDS_never' in debug.debug_flags:
2504
if source.supports_rich_root() and not target.supports_rich_root():
2506
if (source._format.supports_tree_reference and
2507
not target._format.supports_tree_reference):
2509
if target._fallback_repositories and target._format.supports_chks:
2510
# IDS doesn't know how to copy CHKs for the parent inventories it
2511
# adds to stacked repos.
2513
if 'IDS_always' in debug.debug_flags:
2515
# Only use this code path for local source and target. IDS does far
2516
# too much IO (both bandwidth and roundtrips) over a network.
2517
if not source.controldir.transport.base.startswith('file:///'):
2519
if not target.controldir.transport.base.startswith('file:///'):
2523
def _get_trees(self, revision_ids, cache):
2525
for rev_id in revision_ids:
2527
possible_trees.append((rev_id, cache[rev_id]))
2529
# Not cached, but inventory might be present anyway.
2531
tree = self.source.revision_tree(rev_id)
2532
except errors.NoSuchRevision:
2533
# Nope, parent is ghost.
2536
cache[rev_id] = tree
2537
possible_trees.append((rev_id, tree))
2538
return possible_trees
2540
def _get_delta_for_revision(self, tree, parent_ids, possible_trees):
2541
"""Get the best delta and base for this revision.
2543
:return: (basis_id, delta)
2546
# Generate deltas against each tree, to find the shortest.
2547
# FIXME: Support nested trees
2548
texts_possibly_new_in_tree = set()
2549
for basis_id, basis_tree in possible_trees:
2550
delta = tree.root_inventory._make_delta(basis_tree.root_inventory)
2551
for old_path, new_path, file_id, new_entry in delta:
2552
if new_path is None:
2553
# This file_id isn't present in the new rev, so we don't
2557
# Rich roots are handled elsewhere...
2559
kind = new_entry.kind
2560
if kind != 'directory' and kind != 'file':
2561
# No text record associated with this inventory entry.
2563
# This is a directory or file that has changed somehow.
2564
texts_possibly_new_in_tree.add((file_id, new_entry.revision))
2565
deltas.append((len(delta), basis_id, delta))
2567
return deltas[0][1:]
2569
def _fetch_parent_invs_for_stacking(self, parent_map, cache):
2570
"""Find all parent revisions that are absent, but for which the
2571
inventory is present, and copy those inventories.
2573
This is necessary to preserve correctness when the source is stacked
2574
without fallbacks configured. (Note that in cases like upgrade the
2575
source may be not have _fallback_repositories even though it is
2578
parent_revs = set(itertools.chain.from_iterable(
2579
parent_map.values()))
2580
present_parents = self.source.get_parent_map(parent_revs)
2581
absent_parents = parent_revs.difference(present_parents)
2582
parent_invs_keys_for_stacking = self.source.inventories.get_parent_map(
2583
(rev_id,) for rev_id in absent_parents)
2584
parent_inv_ids = [key[-1] for key in parent_invs_keys_for_stacking]
2585
for parent_tree in self.source.revision_trees(parent_inv_ids):
2586
current_revision_id = parent_tree.get_revision_id()
2587
parents_parents_keys = parent_invs_keys_for_stacking[
2588
(current_revision_id,)]
2589
parents_parents = [key[-1] for key in parents_parents_keys]
2590
basis_id = _mod_revision.NULL_REVISION
2591
basis_tree = self.source.revision_tree(basis_id)
2592
delta = parent_tree.root_inventory._make_delta(
2593
basis_tree.root_inventory)
2594
self.target.add_inventory_by_delta(
2595
basis_id, delta, current_revision_id, parents_parents)
2596
cache[current_revision_id] = parent_tree
2598
def _fetch_batch(self, revision_ids, basis_id, cache):
2599
"""Fetch across a few revisions.
2601
:param revision_ids: The revisions to copy
2602
:param basis_id: The revision_id of a tree that must be in cache, used
2603
as a basis for delta when no other base is available
2604
:param cache: A cache of RevisionTrees that we can use.
2605
:return: The revision_id of the last converted tree. The RevisionTree
2606
for it will be in cache
2608
# Walk though all revisions; get inventory deltas, copy referenced
2609
# texts that delta references, insert the delta, revision and
2611
root_keys_to_create = set()
2614
pending_revisions = []
2615
parent_map = self.source.get_parent_map(revision_ids)
2616
self._fetch_parent_invs_for_stacking(parent_map, cache)
2617
self.source._safe_to_return_from_cache = True
2618
for tree in self.source.revision_trees(revision_ids):
2619
# Find a inventory delta for this revision.
2620
# Find text entries that need to be copied, too.
2621
current_revision_id = tree.get_revision_id()
2622
parent_ids = parent_map.get(current_revision_id, ())
2623
parent_trees = self._get_trees(parent_ids, cache)
2624
possible_trees = list(parent_trees)
2625
if len(possible_trees) == 0:
2626
# There either aren't any parents, or the parents are ghosts,
2627
# so just use the last converted tree.
2628
possible_trees.append((basis_id, cache[basis_id]))
2629
basis_id, delta = self._get_delta_for_revision(tree, parent_ids,
2631
revision = self.source.get_revision(current_revision_id)
2632
pending_deltas.append((basis_id, delta,
2633
current_revision_id, revision.parent_ids))
2634
if self._converting_to_rich_root:
2635
self._revision_id_to_root_id[current_revision_id] = \
2637
# Determine which texts are in present in this revision but not in
2638
# any of the available parents.
2639
texts_possibly_new_in_tree = set()
2640
for old_path, new_path, file_id, entry in delta:
2641
if new_path is None:
2642
# This file_id isn't present in the new rev
2646
if not self.target.supports_rich_root():
2647
# The target doesn't support rich root, so we don't
2650
if self._converting_to_rich_root:
2651
# This can't be copied normally, we have to insert
2653
root_keys_to_create.add((file_id, entry.revision))
2656
texts_possibly_new_in_tree.add((file_id, entry.revision))
2657
for basis_id, basis_tree in possible_trees:
2658
basis_inv = basis_tree.root_inventory
2659
for file_key in list(texts_possibly_new_in_tree):
2660
file_id, file_revision = file_key
2662
entry = basis_inv.get_entry(file_id)
2663
except errors.NoSuchId:
2665
if entry.revision == file_revision:
2666
texts_possibly_new_in_tree.remove(file_key)
2667
text_keys.update(texts_possibly_new_in_tree)
2668
pending_revisions.append(revision)
2669
cache[current_revision_id] = tree
2670
basis_id = current_revision_id
2671
self.source._safe_to_return_from_cache = False
2673
from_texts = self.source.texts
2674
to_texts = self.target.texts
2675
if root_keys_to_create:
2676
root_stream = _mod_fetch._new_root_data_stream(
2677
root_keys_to_create, self._revision_id_to_root_id, parent_map,
2679
to_texts.insert_record_stream(root_stream)
2680
to_texts.insert_record_stream(from_texts.get_record_stream(
2681
text_keys, self.target._format._fetch_order,
2682
not self.target._format._fetch_uses_deltas))
2683
# insert inventory deltas
2684
for delta in pending_deltas:
2685
self.target.add_inventory_by_delta(*delta)
2686
if self.target._fallback_repositories:
2687
# Make sure this stacked repository has all the parent inventories
2688
# for the new revisions that we are about to insert. We do this
2689
# before adding the revisions so that no revision is added until
2690
# all the inventories it may depend on are added.
2691
# Note that this is overzealous, as we may have fetched these in an
2694
revision_ids = set()
2695
for revision in pending_revisions:
2696
revision_ids.add(revision.revision_id)
2697
parent_ids.update(revision.parent_ids)
2698
parent_ids.difference_update(revision_ids)
2699
parent_ids.discard(_mod_revision.NULL_REVISION)
2700
parent_map = self.source.get_parent_map(parent_ids)
2701
# we iterate over parent_map and not parent_ids because we don't
2702
# want to try copying any revision which is a ghost
2703
for parent_tree in self.source.revision_trees(parent_map):
2704
current_revision_id = parent_tree.get_revision_id()
2705
parents_parents = parent_map[current_revision_id]
2706
possible_trees = self._get_trees(parents_parents, cache)
2707
if len(possible_trees) == 0:
2708
# There either aren't any parents, or the parents are
2709
# ghosts, so just use the last converted tree.
2710
possible_trees.append((basis_id, cache[basis_id]))
2711
basis_id, delta = self._get_delta_for_revision(parent_tree,
2712
parents_parents, possible_trees)
2713
self.target.add_inventory_by_delta(
2714
basis_id, delta, current_revision_id, parents_parents)
2715
# insert signatures and revisions
2716
for revision in pending_revisions:
2718
signature = self.source.get_signature_text(
2719
revision.revision_id)
2720
self.target.add_signature_text(revision.revision_id,
2722
except errors.NoSuchRevision:
2724
self.target.add_revision(revision.revision_id, revision)
2727
def _fetch_all_revisions(self, revision_ids, pb):
2728
"""Fetch everything for the list of revisions.
2730
:param revision_ids: The list of revisions to fetch. Must be in
2732
:param pb: A ProgressTask
2735
basis_id, basis_tree = self._get_basis(revision_ids[0])
2737
cache = lru_cache.LRUCache(100)
2738
cache[basis_id] = basis_tree
2739
del basis_tree # We don't want to hang on to it here
2743
for offset in range(0, len(revision_ids), batch_size):
2744
self.target.start_write_group()
2746
pb.update(gettext('Transferring revisions'), offset,
2748
batch = revision_ids[offset:offset + batch_size]
2749
basis_id = self._fetch_batch(batch, basis_id, cache)
2751
self.source._safe_to_return_from_cache = False
2752
self.target.abort_write_group()
2755
hint = self.target.commit_write_group()
2758
if hints and self.target._format.pack_compresses:
2759
self.target.pack(hint=hints)
2760
pb.update(gettext('Transferring revisions'), len(revision_ids),
2763
def fetch(self, revision_id=None, find_ghosts=False,
2764
fetch_spec=None, lossy=False):
2765
"""See InterRepository.fetch()."""
2767
raise errors.LossyPushToSameVCS(self.source, self.target)
2768
if fetch_spec is not None:
2769
revision_ids = fetch_spec.get_keys()
2772
if self.source._format.experimental:
2773
ui.ui_factory.show_user_warning('experimental_format_fetch',
2774
from_format=self.source._format,
2775
to_format=self.target._format)
2776
if (not self.source.supports_rich_root() and
2777
self.target.supports_rich_root()):
2778
self._converting_to_rich_root = True
2779
self._revision_id_to_root_id = {}
2781
self._converting_to_rich_root = False
2782
# See <https://launchpad.net/bugs/456077> asking for a warning here
2783
if self.source._format.network_name() != self.target._format.network_name():
2784
ui.ui_factory.show_user_warning('cross_format_fetch',
2785
from_format=self.source._format,
2786
to_format=self.target._format)
2787
with self.lock_write():
2788
if revision_ids is None:
2790
search_revision_ids = [revision_id]
2792
search_revision_ids = None
2793
revision_ids = self.target.search_missing_revision_ids(
2794
self.source, revision_ids=search_revision_ids,
2795
find_ghosts=find_ghosts).get_keys()
2796
if not revision_ids:
2797
return FetchResult(0)
2798
revision_ids = tsort.topo_sort(
2799
self.source.get_graph().get_parent_map(revision_ids))
2800
if not revision_ids:
2801
return FetchResult(0)
2802
# Walk though all revisions; get inventory deltas, copy referenced
2803
# texts that delta references, insert the delta, revision and
2805
with ui.ui_factory.nested_progress_bar() as pb:
2806
self._fetch_all_revisions(revision_ids, pb)
2807
return FetchResult(len(revision_ids))
2809
def _get_basis(self, first_revision_id):
2810
"""Get a revision and tree which exists in the target.
2812
This assumes that first_revision_id is selected for transmission
2813
because all other ancestors are already present. If we can't find an
2814
ancestor we fall back to NULL_REVISION since we know that is safe.
2816
:return: (basis_id, basis_tree)
2818
first_rev = self.source.get_revision(first_revision_id)
2820
basis_id = first_rev.parent_ids[0]
2821
# only valid as a basis if the target has it
2822
self.target.get_revision(basis_id)
2823
# Try to get a basis tree - if it's a ghost it will hit the
2824
# NoSuchRevision case.
2825
basis_tree = self.source.revision_tree(basis_id)
2826
except (IndexError, errors.NoSuchRevision):
2827
basis_id = _mod_revision.NULL_REVISION
2828
basis_tree = self.source.revision_tree(basis_id)
2829
return basis_id, basis_tree
2832
class InterSameDataRepository(InterVersionedFileRepository):
2833
"""Code for converting between repositories that represent the same data.
2835
Data format and model must match for this to work.
2839
def _get_repo_format_to_test(self):
2840
"""Repository format for testing with.
2842
InterSameData can pull from subtree to subtree and from non-subtree to
2843
non-subtree, so we test this with the richest repository format.
2845
from breezy.bzr import knitrepo
2846
return knitrepo.RepositoryFormatKnit3()
2849
def is_compatible(source, target):
2851
InterRepository._same_model(source, target)
2852
and source._format.supports_full_versioned_files
2853
and target._format.supports_full_versioned_files)
2856
InterRepository.register_optimiser(InterVersionedFileRepository)
2857
InterRepository.register_optimiser(InterDifferingSerializer)
2858
InterRepository.register_optimiser(InterSameDataRepository)
2861
def install_revisions(repository, iterable, num_revisions=None, pb=None):
2862
"""Install all revision data into a repository.
2864
Accepts an iterable of revision, tree, signature tuples. The signature
2867
with WriteGroup(repository):
2868
inventory_cache = lru_cache.LRUCache(10)
2869
for n, (revision, revision_tree, signature) in enumerate(iterable):
2870
_install_revision(repository, revision, revision_tree, signature,
2873
pb.update(gettext('Transferring revisions'),
2874
n + 1, num_revisions)
2877
def _install_revision(repository, rev, revision_tree, signature,
2879
"""Install all revision data into a repository."""
2880
present_parents = []
2882
for p_id in rev.parent_ids:
2883
if repository.has_revision(p_id):
2884
present_parents.append(p_id)
2885
parent_trees[p_id] = repository.revision_tree(p_id)
2887
parent_trees[p_id] = repository.revision_tree(
2888
_mod_revision.NULL_REVISION)
2890
# FIXME: Support nested trees
2891
inv = revision_tree.root_inventory
2892
entries = inv.iter_entries()
2893
# backwards compatibility hack: skip the root id.
2894
if not repository.supports_rich_root():
2895
path, root = next(entries)
2896
if root.revision != rev.revision_id:
2897
raise errors.IncompatibleRevision(repr(repository))
2899
for path, ie in entries:
2900
text_keys[(ie.file_id, ie.revision)] = ie
2901
text_parent_map = repository.texts.get_parent_map(text_keys)
2902
missing_texts = set(text_keys) - set(text_parent_map)
2903
# Add the texts that are not already present
2904
for text_key in missing_texts:
2905
ie = text_keys[text_key]
2907
# FIXME: TODO: The following loop overlaps/duplicates that done by
2908
# commit to determine parents. There is a latent/real bug here where
2909
# the parents inserted are not those commit would do - in particular
2910
# they are not filtered by heads(). RBC, AB
2911
for revision, tree in parent_trees.items():
2913
path = tree.id2path(ie.file_id)
2914
except errors.NoSuchId:
2916
parent_id = tree.get_file_revision(path)
2917
if parent_id in text_parents:
2919
text_parents.append((ie.file_id, parent_id))
2920
revision_tree_path = revision_tree.id2path(ie.file_id)
2921
with revision_tree.get_file(revision_tree_path) as f:
2922
lines = f.readlines()
2923
repository.texts.add_lines(text_key, text_parents, lines)
2925
# install the inventory
2926
if repository._format._commit_inv_deltas and len(rev.parent_ids):
2927
# Cache this inventory
2928
inventory_cache[rev.revision_id] = inv
2930
basis_inv = inventory_cache[rev.parent_ids[0]]
2932
repository.add_inventory(rev.revision_id, inv, present_parents)
2934
delta = inv._make_delta(basis_inv)
2935
repository.add_inventory_by_delta(rev.parent_ids[0], delta,
2936
rev.revision_id, present_parents)
2938
repository.add_inventory(rev.revision_id, inv, present_parents)
2939
except errors.RevisionAlreadyPresent:
2941
if signature is not None:
2942
repository.add_signature_text(rev.revision_id, signature)
2943
repository.add_revision(rev.revision_id, rev, inv)
2946
def install_revision(repository, rev, revision_tree):
2947
"""Install all revision data into a repository."""
2948
install_revisions(repository, [(rev, revision_tree, None)])