1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib.lazy_import import lazy_import
18
lazy_import(globals(), """
37
revision as _mod_revision,
42
from bzrlib.bundle import serializer
43
from bzrlib.revisiontree import RevisionTree
44
from bzrlib.store.versioned import VersionedFileStore
45
from bzrlib.testament import Testament
48
from bzrlib import registry
49
from bzrlib.decorators import needs_read_lock, needs_write_lock
50
from bzrlib.inter import InterObject
51
from bzrlib.inventory import Inventory, InventoryDirectory, ROOT_ID
52
from bzrlib.symbol_versioning import (
58
from bzrlib.trace import mutter, mutter_callsite, warning
61
# Old formats display a warning, but only once
62
_deprecation_warning_done = False
65
class CommitBuilder(object):
66
"""Provides an interface to build up a commit.
68
This allows describing a tree to be committed without needing to
69
know the internals of the format of the repository.
72
# all clients should supply tree roots.
73
record_root_entry = True
74
# the default CommitBuilder does not manage trees whose root is versioned.
75
_versioned_root = False
77
def __init__(self, repository, parents, config, timestamp=None,
78
timezone=None, committer=None, revprops=None,
80
"""Initiate a CommitBuilder.
82
:param repository: Repository to commit to.
83
:param parents: Revision ids of the parents of the new revision.
84
:param config: Configuration to use.
85
:param timestamp: Optional timestamp recorded for commit.
86
:param timezone: Optional timezone for timestamp.
87
:param committer: Optional committer to set for commit.
88
:param revprops: Optional dictionary of revision properties.
89
:param revision_id: Optional revision id.
94
self._committer = self._config.username()
96
self._committer = committer
98
self.new_inventory = Inventory(None)
99
self._new_revision_id = revision_id
100
self.parents = parents
101
self.repository = repository
104
if revprops is not None:
105
self._revprops.update(revprops)
107
if timestamp is None:
108
timestamp = time.time()
109
# Restrict resolution to 1ms
110
self._timestamp = round(timestamp, 3)
113
self._timezone = osutils.local_time_offset()
115
self._timezone = int(timezone)
117
self._generate_revision_if_needed()
118
self.__heads = graph.HeadsCache(repository.get_graph()).heads
120
def commit(self, message):
121
"""Make the actual commit.
123
:return: The revision id of the recorded revision.
125
rev = _mod_revision.Revision(
126
timestamp=self._timestamp,
127
timezone=self._timezone,
128
committer=self._committer,
130
inventory_sha1=self.inv_sha1,
131
revision_id=self._new_revision_id,
132
properties=self._revprops)
133
rev.parent_ids = self.parents
134
self.repository.add_revision(self._new_revision_id, rev,
135
self.new_inventory, self._config)
136
self.repository.commit_write_group()
137
return self._new_revision_id
140
"""Abort the commit that is being built.
142
self.repository.abort_write_group()
144
def revision_tree(self):
145
"""Return the tree that was just committed.
147
After calling commit() this can be called to get a RevisionTree
148
representing the newly committed tree. This is preferred to
149
calling Repository.revision_tree() because that may require
150
deserializing the inventory, while we already have a copy in
153
return RevisionTree(self.repository, self.new_inventory,
154
self._new_revision_id)
156
def finish_inventory(self):
157
"""Tell the builder that the inventory is finished."""
158
if self.new_inventory.root is None:
159
raise AssertionError('Root entry should be supplied to'
160
' record_entry_contents, as of bzr 0.10.')
161
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
162
self.new_inventory.revision_id = self._new_revision_id
163
self.inv_sha1 = self.repository.add_inventory(
164
self._new_revision_id,
169
def _gen_revision_id(self):
170
"""Return new revision-id."""
171
return generate_ids.gen_revision_id(self._config.username(),
174
def _generate_revision_if_needed(self):
175
"""Create a revision id if None was supplied.
177
If the repository can not support user-specified revision ids
178
they should override this function and raise CannotSetRevisionId
179
if _new_revision_id is not None.
181
:raises: CannotSetRevisionId
183
if self._new_revision_id is None:
184
self._new_revision_id = self._gen_revision_id()
185
self.random_revid = True
187
self.random_revid = False
189
def _heads(self, file_id, revision_ids):
190
"""Calculate the graph heads for revision_ids in the graph of file_id.
192
This can use either a per-file graph or a global revision graph as we
193
have an identity relationship between the two graphs.
195
return self.__heads(revision_ids)
197
def _check_root(self, ie, parent_invs, tree):
198
"""Helper for record_entry_contents.
200
:param ie: An entry being added.
201
:param parent_invs: The inventories of the parent revisions of the
203
:param tree: The tree that is being committed.
205
# In this revision format, root entries have no knit or weave When
206
# serializing out to disk and back in root.revision is always
208
ie.revision = self._new_revision_id
210
def _get_delta(self, ie, basis_inv, path):
211
"""Get a delta against the basis inventory for ie."""
212
if ie.file_id not in basis_inv:
214
return (None, path, ie.file_id, ie)
215
elif ie != basis_inv[ie.file_id]:
217
# TODO: avoid tis id2path call.
218
return (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
223
def record_entry_contents(self, ie, parent_invs, path, tree,
225
"""Record the content of ie from tree into the commit if needed.
227
Side effect: sets ie.revision when unchanged
229
:param ie: An inventory entry present in the commit.
230
:param parent_invs: The inventories of the parent revisions of the
232
:param path: The path the entry is at in the tree.
233
:param tree: The tree which contains this entry and should be used to
235
:param content_summary: Summary data from the tree about the paths
236
content - stat, length, exec, sha/link target. This is only
237
accessed when the entry has a revision of None - that is when it is
238
a candidate to commit.
239
:return: A tuple (change_delta, version_recorded). change_delta is
240
an inventory_delta change for this entry against the basis tree of
241
the commit, or None if no change occured against the basis tree.
242
version_recorded is True if a new version of the entry has been
243
recorded. For instance, committing a merge where a file was only
244
changed on the other side will return (delta, False).
246
if self.new_inventory.root is None:
247
if ie.parent_id is not None:
248
raise errors.RootMissing()
249
self._check_root(ie, parent_invs, tree)
250
if ie.revision is None:
251
kind = content_summary[0]
253
# ie is carried over from a prior commit
255
# XXX: repository specific check for nested tree support goes here - if
256
# the repo doesn't want nested trees we skip it ?
257
if (kind == 'tree-reference' and
258
not self.repository._format.supports_tree_reference):
259
# mismatch between commit builder logic and repository:
260
# this needs the entry creation pushed down into the builder.
261
raise NotImplementedError('Missing repository subtree support.')
262
self.new_inventory.add(ie)
264
# TODO: slow, take it out of the inner loop.
266
basis_inv = parent_invs[0]
268
basis_inv = Inventory(root_id=None)
270
# ie.revision is always None if the InventoryEntry is considered
271
# for committing. We may record the previous parents revision if the
272
# content is actually unchanged against a sole head.
273
if ie.revision is not None:
274
if not self._versioned_root and path == '':
275
# repositories that do not version the root set the root's
276
# revision to the new commit even when no change occurs, and
277
# this masks when a change may have occurred against the basis,
278
# so calculate if one happened.
279
if ie.file_id in basis_inv:
280
delta = (basis_inv.id2path(ie.file_id), path,
284
delta = (None, path, ie.file_id, ie)
287
# we don't need to commit this, because the caller already
288
# determined that an existing revision of this file is
289
# appropriate. If its not being considered for committing then
290
# it and all its parents to the root must be unaltered so
291
# no-change against the basis.
292
if ie.revision == self._new_revision_id:
293
raise AssertionError("Impossible situation, a skipped "
294
"inventory entry (%r) claims to be modified in this "
295
"commit (%r).", (ie, self._new_revision_id))
297
# XXX: Friction: parent_candidates should return a list not a dict
298
# so that we don't have to walk the inventories again.
299
parent_candiate_entries = ie.parent_candidates(parent_invs)
300
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
302
for inv in parent_invs:
303
if ie.file_id in inv:
304
old_rev = inv[ie.file_id].revision
305
if old_rev in head_set:
306
heads.append(inv[ie.file_id].revision)
307
head_set.remove(inv[ie.file_id].revision)
310
# now we check to see if we need to write a new record to the
312
# We write a new entry unless there is one head to the ancestors, and
313
# the kind-derived content is unchanged.
315
# Cheapest check first: no ancestors, or more the one head in the
316
# ancestors, we write a new node.
320
# There is a single head, look it up for comparison
321
parent_entry = parent_candiate_entries[heads[0]]
322
# if the non-content specific data has changed, we'll be writing a
324
if (parent_entry.parent_id != ie.parent_id or
325
parent_entry.name != ie.name):
327
# now we need to do content specific checks:
329
# if the kind changed the content obviously has
330
if kind != parent_entry.kind:
333
if content_summary[2] is None:
334
raise ValueError("Files must not have executable = None")
336
if (# if the file length changed we have to store:
337
parent_entry.text_size != content_summary[1] or
338
# if the exec bit has changed we have to store:
339
parent_entry.executable != content_summary[2]):
341
elif parent_entry.text_sha1 == content_summary[3]:
342
# all meta and content is unchanged (using a hash cache
343
# hit to check the sha)
344
ie.revision = parent_entry.revision
345
ie.text_size = parent_entry.text_size
346
ie.text_sha1 = parent_entry.text_sha1
347
ie.executable = parent_entry.executable
348
return self._get_delta(ie, basis_inv, path), False
350
# Either there is only a hash change(no hash cache entry,
351
# or same size content change), or there is no change on
353
# Provide the parent's hash to the store layer, so that the
354
# content is unchanged we will not store a new node.
355
nostore_sha = parent_entry.text_sha1
357
# We want to record a new node regardless of the presence or
358
# absence of a content change in the file.
360
ie.executable = content_summary[2]
361
lines = tree.get_file(ie.file_id, path).readlines()
363
ie.text_sha1, ie.text_size = self._add_text_to_weave(
364
ie.file_id, lines, heads, nostore_sha)
365
except errors.ExistingContent:
366
# Turns out that the file content was unchanged, and we were
367
# only going to store a new node if it was changed. Carry over
369
ie.revision = parent_entry.revision
370
ie.text_size = parent_entry.text_size
371
ie.text_sha1 = parent_entry.text_sha1
372
ie.executable = parent_entry.executable
373
return self._get_delta(ie, basis_inv, path), False
374
elif kind == 'directory':
376
# all data is meta here, nothing specific to directory, so
378
ie.revision = parent_entry.revision
379
return self._get_delta(ie, basis_inv, path), False
381
self._add_text_to_weave(ie.file_id, lines, heads, None)
382
elif kind == 'symlink':
383
current_link_target = content_summary[3]
385
# symlink target is not generic metadata, check if it has
387
if current_link_target != parent_entry.symlink_target:
390
# unchanged, carry over.
391
ie.revision = parent_entry.revision
392
ie.symlink_target = parent_entry.symlink_target
393
return self._get_delta(ie, basis_inv, path), False
394
ie.symlink_target = current_link_target
396
self._add_text_to_weave(ie.file_id, lines, heads, None)
397
elif kind == 'tree-reference':
399
if content_summary[3] != parent_entry.reference_revision:
402
# unchanged, carry over.
403
ie.reference_revision = parent_entry.reference_revision
404
ie.revision = parent_entry.revision
405
return self._get_delta(ie, basis_inv, path), False
406
ie.reference_revision = content_summary[3]
408
self._add_text_to_weave(ie.file_id, lines, heads, None)
410
raise NotImplementedError('unknown kind')
411
ie.revision = self._new_revision_id
412
return self._get_delta(ie, basis_inv, path), True
414
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
415
# Note: as we read the content directly from the tree, we know its not
416
# been turned into unicode or badly split - but a broken tree
417
# implementation could give us bad output from readlines() so this is
418
# not a guarantee of safety. What would be better is always checking
419
# the content during test suite execution. RBC 20070912
420
parent_keys = tuple((file_id, parent) for parent in parents)
421
return self.repository.texts.add_lines(
422
(file_id, self._new_revision_id), parent_keys, new_lines,
423
nostore_sha=nostore_sha, random_id=self.random_revid,
424
check_content=False)[0:2]
427
class RootCommitBuilder(CommitBuilder):
428
"""This commitbuilder actually records the root id"""
430
# the root entry gets versioned properly by this builder.
431
_versioned_root = True
433
def _check_root(self, ie, parent_invs, tree):
434
"""Helper for record_entry_contents.
436
:param ie: An entry being added.
437
:param parent_invs: The inventories of the parent revisions of the
439
:param tree: The tree that is being committed.
443
######################################################################
446
class Repository(object):
447
"""Repository holding history for one or more branches.
449
The repository holds and retrieves historical information including
450
revisions and file history. It's normally accessed only by the Branch,
451
which views a particular line of development through that history.
453
The Repository builds on top of some byte storage facilies (the revisions,
454
signatures, inventories and texts attributes) and a Transport, which
455
respectively provide byte storage and a means to access the (possibly
458
The byte storage facilities are addressed via tuples, which we refer to
459
as 'keys' throughout the code base. Revision_keys, inventory_keys and
460
signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:
461
(file_id, revision_id). We use this interface because it allows low
462
friction with the underlying code that implements disk indices, network
463
encoding and other parts of bzrlib.
465
:ivar revisions: A bzrlib.versionedfile.VersionedFiles instance containing
466
the serialised revisions for the repository. This can be used to obtain
467
revision graph information or to access raw serialised revisions.
468
The result of trying to insert data into the repository via this store
469
is undefined: it should be considered read-only except for implementors
471
:ivar signatures: A bzrlib.versionedfile.VersionedFiles instance containing
472
the serialised signatures for the repository. This can be used to
473
obtain access to raw serialised signatures. The result of trying to
474
insert data into the repository via this store is undefined: it should
475
be considered read-only except for implementors of repositories.
476
:ivar inventories: A bzrlib.versionedfile.VersionedFiles instance containing
477
the serialised inventories for the repository. This can be used to
478
obtain unserialised inventories. The result of trying to insert data
479
into the repository via this store is undefined: it should be
480
considered read-only except for implementors of repositories.
481
:ivar texts: A bzrlib.versionedfile.VersionedFiles instance containing the
482
texts of files and directories for the repository. This can be used to
483
obtain file texts or file graphs. Note that Repository.iter_file_bytes
484
is usually a better interface for accessing file texts.
485
The result of trying to insert data into the repository via this store
486
is undefined: it should be considered read-only except for implementors
488
:ivar _transport: Transport for file access to repository, typically
489
pointing to .bzr/repository.
492
# What class to use for a CommitBuilder. Often its simpler to change this
493
# in a Repository class subclass rather than to override
494
# get_commit_builder.
495
_commit_builder_class = CommitBuilder
496
# The search regex used by xml based repositories to determine what things
497
# where changed in a single commit.
498
_file_ids_altered_regex = lazy_regex.lazy_compile(
499
r'file_id="(?P<file_id>[^"]+)"'
500
r'.* revision="(?P<revision_id>[^"]+)"'
503
def abort_write_group(self):
504
"""Commit the contents accrued within the current write group.
506
:seealso: start_write_group.
508
if self._write_group is not self.get_transaction():
509
# has an unlock or relock occured ?
510
raise errors.BzrError('mismatched lock context and write group.')
511
self._abort_write_group()
512
self._write_group = None
514
def _abort_write_group(self):
515
"""Template method for per-repository write group cleanup.
517
This is called during abort before the write group is considered to be
518
finished and should cleanup any internal state accrued during the write
519
group. There is no requirement that data handed to the repository be
520
*not* made available - this is not a rollback - but neither should any
521
attempt be made to ensure that data added is fully commited. Abort is
522
invoked when an error has occured so futher disk or network operations
523
may not be possible or may error and if possible should not be
527
def add_fallback_repository(self, repository):
528
"""Add a repository to use for looking up data not held locally.
530
:param repository: A repository.
532
if not self._format.supports_external_lookups:
533
raise errors.UnstackableRepositoryFormat(self._format, self.base)
534
self._check_fallback_repository(repository)
535
self._fallback_repositories.append(repository)
536
self.texts.add_fallback_versioned_files(repository.texts)
537
self.inventories.add_fallback_versioned_files(repository.inventories)
538
self.revisions.add_fallback_versioned_files(repository.revisions)
539
self.signatures.add_fallback_versioned_files(repository.signatures)
541
def _check_fallback_repository(self, repository):
542
"""Check that this repository can fallback to repository safely.
544
Raise an error if not.
546
:param repository: A repository to fallback to.
548
return InterRepository._assert_same_model(self, repository)
550
def add_inventory(self, revision_id, inv, parents):
551
"""Add the inventory inv to the repository as revision_id.
553
:param parents: The revision ids of the parents that revision_id
554
is known to have and are in the repository already.
556
:returns: The validator(which is a sha1 digest, though what is sha'd is
557
repository format specific) of the serialized inventory.
559
if not self.is_in_write_group():
560
raise AssertionError("%r not in write group" % (self,))
561
_mod_revision.check_not_reserved_id(revision_id)
562
if not (inv.revision_id is None or inv.revision_id == revision_id):
563
raise AssertionError(
564
"Mismatch between inventory revision"
565
" id and insertion revid (%r, %r)"
566
% (inv.revision_id, revision_id))
568
raise AssertionError()
569
inv_lines = self._serialise_inventory_to_lines(inv)
570
return self._inventory_add_lines(revision_id, parents,
571
inv_lines, check_content=False)
573
def _inventory_add_lines(self, revision_id, parents, lines,
575
"""Store lines in inv_vf and return the sha1 of the inventory."""
576
parents = [(parent,) for parent in parents]
577
return self.inventories.add_lines((revision_id,), parents, lines,
578
check_content=check_content)[0]
580
def add_revision(self, revision_id, rev, inv=None, config=None):
581
"""Add rev to the revision store as revision_id.
583
:param revision_id: the revision id to use.
584
:param rev: The revision object.
585
:param inv: The inventory for the revision. if None, it will be looked
586
up in the inventory storer
587
:param config: If None no digital signature will be created.
588
If supplied its signature_needed method will be used
589
to determine if a signature should be made.
591
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
593
_mod_revision.check_not_reserved_id(revision_id)
594
if config is not None and config.signature_needed():
596
inv = self.get_inventory(revision_id)
597
plaintext = Testament(rev, inv).as_short_text()
598
self.store_revision_signature(
599
gpg.GPGStrategy(config), plaintext, revision_id)
600
# check inventory present
601
if not self.inventories.get_parent_map([(revision_id,)]):
603
raise errors.WeaveRevisionNotPresent(revision_id,
606
# yes, this is not suitable for adding with ghosts.
607
rev.inventory_sha1 = self.add_inventory(revision_id, inv,
611
rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]
612
self._add_revision(rev)
614
def _add_revision(self, revision):
615
text = self._serializer.write_revision_to_string(revision)
616
key = (revision.revision_id,)
617
parents = tuple((parent,) for parent in revision.parent_ids)
618
self.revisions.add_lines(key, parents, osutils.split_lines(text))
620
def all_revision_ids(self):
621
"""Returns a list of all the revision ids in the repository.
623
This is conceptually deprecated because code should generally work on
624
the graph reachable from a particular revision, and ignore any other
625
revisions that might be present. There is no direct replacement
628
if 'evil' in debug.debug_flags:
629
mutter_callsite(2, "all_revision_ids is linear with history.")
630
return self._all_revision_ids()
632
def _all_revision_ids(self):
633
"""Returns a list of all the revision ids in the repository.
635
These are in as much topological order as the underlying store can
638
raise NotImplementedError(self._all_revision_ids)
640
def break_lock(self):
641
"""Break a lock if one is present from another instance.
643
Uses the ui factory to ask for confirmation if the lock may be from
646
self.control_files.break_lock()
649
def _eliminate_revisions_not_present(self, revision_ids):
650
"""Check every revision id in revision_ids to see if we have it.
652
Returns a set of the present revisions.
655
graph = self.get_graph()
656
parent_map = graph.get_parent_map(revision_ids)
657
# The old API returned a list, should this actually be a set?
658
return parent_map.keys()
661
def create(a_bzrdir):
662
"""Construct the current default format repository in a_bzrdir."""
663
return RepositoryFormat.get_default_format().initialize(a_bzrdir)
665
def __init__(self, _format, a_bzrdir, control_files):
666
"""instantiate a Repository.
668
:param _format: The format of the repository on disk.
669
:param a_bzrdir: The BzrDir of the repository.
671
In the future we will have a single api for all stores for
672
getting file texts, inventories and revisions, then
673
this construct will accept instances of those things.
675
super(Repository, self).__init__()
676
self._format = _format
677
# the following are part of the public API for Repository:
678
self.bzrdir = a_bzrdir
679
self.control_files = control_files
680
self._transport = control_files._transport
681
self.base = self._transport.base
683
self._reconcile_does_inventory_gc = True
684
self._reconcile_fixes_text_parents = False
685
self._reconcile_backsup_inventory = True
686
# not right yet - should be more semantically clear ?
688
# TODO: make sure to construct the right store classes, etc, depending
689
# on whether escaping is required.
690
self._warn_if_deprecated()
691
self._write_group = None
692
# Additional places to query for data.
693
self._fallback_repositories = []
694
# What order should fetch operations request streams in?
695
# The default is unordered as that is the cheapest for an origin to
697
self._fetch_order = 'unordered'
698
# Does this repository use deltas that can be fetched as-deltas ?
699
# (E.g. knits, where the knit deltas can be transplanted intact.
700
# We default to False, which will ensure that enough data to get
701
# a full text out of any fetch stream will be grabbed.
702
self._fetch_uses_deltas = False
703
# Should fetch trigger a reconcile after the fetch? Only needed for
704
# some repository formats that can suffer internal inconsistencies.
705
self._fetch_reconcile = False
708
return '%s(%r)' % (self.__class__.__name__,
711
def has_same_location(self, other):
712
"""Returns a boolean indicating if this repository is at the same
713
location as another repository.
715
This might return False even when two repository objects are accessing
716
the same physical repository via different URLs.
718
if self.__class__ is not other.__class__:
720
return (self._transport.base == other._transport.base)
722
def is_in_write_group(self):
723
"""Return True if there is an open write group.
725
:seealso: start_write_group.
727
return self._write_group is not None
730
return self.control_files.is_locked()
732
def is_write_locked(self):
733
"""Return True if this object is write locked."""
734
return self.is_locked() and self.control_files._lock_mode == 'w'
736
def lock_write(self, token=None):
737
"""Lock this repository for writing.
739
This causes caching within the repository obejct to start accumlating
740
data during reads, and allows a 'write_group' to be obtained. Write
741
groups must be used for actual data insertion.
743
:param token: if this is already locked, then lock_write will fail
744
unless the token matches the existing lock.
745
:returns: a token if this instance supports tokens, otherwise None.
746
:raises TokenLockingNotSupported: when a token is given but this
747
instance doesn't support using token locks.
748
:raises MismatchedToken: if the specified token doesn't match the token
749
of the existing lock.
750
:seealso: start_write_group.
752
A token should be passed in if you know that you have locked the object
753
some other way, and need to synchronise this object's state with that
756
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
758
result = self.control_files.lock_write(token=token)
759
for repo in self._fallback_repositories:
760
# Writes don't affect fallback repos
766
self.control_files.lock_read()
767
for repo in self._fallback_repositories:
771
def get_physical_lock_status(self):
772
return self.control_files.get_physical_lock_status()
774
def leave_lock_in_place(self):
775
"""Tell this repository not to release the physical lock when this
778
If lock_write doesn't return a token, then this method is not supported.
780
self.control_files.leave_in_place()
782
def dont_leave_lock_in_place(self):
783
"""Tell this repository to release the physical lock when this
784
object is unlocked, even if it didn't originally acquire it.
786
If lock_write doesn't return a token, then this method is not supported.
788
self.control_files.dont_leave_in_place()
791
def gather_stats(self, revid=None, committers=None):
792
"""Gather statistics from a revision id.
794
:param revid: The revision id to gather statistics from, if None, then
795
no revision specific statistics are gathered.
796
:param committers: Optional parameter controlling whether to grab
797
a count of committers from the revision specific statistics.
798
:return: A dictionary of statistics. Currently this contains:
799
committers: The number of committers if requested.
800
firstrev: A tuple with timestamp, timezone for the penultimate left
801
most ancestor of revid, if revid is not the NULL_REVISION.
802
latestrev: A tuple with timestamp, timezone for revid, if revid is
803
not the NULL_REVISION.
804
revisions: The total revision count in the repository.
805
size: An estimate disk size of the repository in bytes.
808
if revid and committers:
809
result['committers'] = 0
810
if revid and revid != _mod_revision.NULL_REVISION:
812
all_committers = set()
813
revisions = self.get_ancestry(revid)
814
# pop the leading None
816
first_revision = None
818
# ignore the revisions in the middle - just grab first and last
819
revisions = revisions[0], revisions[-1]
820
for revision in self.get_revisions(revisions):
821
if not first_revision:
822
first_revision = revision
824
all_committers.add(revision.committer)
825
last_revision = revision
827
result['committers'] = len(all_committers)
828
result['firstrev'] = (first_revision.timestamp,
829
first_revision.timezone)
830
result['latestrev'] = (last_revision.timestamp,
831
last_revision.timezone)
833
# now gather global repository information
834
# XXX: This is available for many repos regardless of listability.
835
if self.bzrdir.root_transport.listable():
836
# XXX: do we want to __define len__() ?
837
# Maybe the versionedfiles object should provide a different
838
# method to get the number of keys.
839
result['revisions'] = len(self.revisions.keys())
843
def find_branches(self, using=False):
844
"""Find branches underneath this repository.
846
This will include branches inside other branches.
848
:param using: If True, list only branches using this repository.
850
if using and not self.is_shared():
852
return [self.bzrdir.open_branch()]
853
except errors.NotBranchError:
855
class Evaluator(object):
858
self.first_call = True
860
def __call__(self, bzrdir):
861
# On the first call, the parameter is always the bzrdir
862
# containing the current repo.
863
if not self.first_call:
865
repository = bzrdir.open_repository()
866
except errors.NoRepositoryPresent:
869
return False, (None, repository)
870
self.first_call = False
872
value = (bzrdir.open_branch(), None)
873
except errors.NotBranchError:
878
for branch, repository in bzrdir.BzrDir.find_bzrdirs(
879
self.bzrdir.root_transport, evaluate=Evaluator()):
880
if branch is not None:
881
branches.append(branch)
882
if not using and repository is not None:
883
branches.extend(repository.find_branches())
887
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
888
"""Return the revision ids that other has that this does not.
890
These are returned in topological order.
892
revision_id: only return revision ids included by revision_id.
894
return InterRepository.get(other, self).search_missing_revision_ids(
895
revision_id, find_ghosts)
897
@deprecated_method(one_two)
899
def missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
900
"""Return the revision ids that other has that this does not.
902
These are returned in topological order.
904
revision_id: only return revision ids included by revision_id.
906
keys = self.search_missing_revision_ids(
907
other, revision_id, find_ghosts).get_keys()
910
parents = other.get_graph().get_parent_map(keys)
913
return tsort.topo_sort(parents)
917
"""Open the repository rooted at base.
919
For instance, if the repository is at URL/.bzr/repository,
920
Repository.open(URL) -> a Repository instance.
922
control = bzrdir.BzrDir.open(base)
923
return control.open_repository()
925
def copy_content_into(self, destination, revision_id=None):
926
"""Make a complete copy of the content in self into destination.
928
This is a destructive operation! Do not use it on existing
931
return InterRepository.get(self, destination).copy_content(revision_id)
933
def commit_write_group(self):
934
"""Commit the contents accrued within the current write group.
936
:seealso: start_write_group.
938
if self._write_group is not self.get_transaction():
939
# has an unlock or relock occured ?
940
raise errors.BzrError('mismatched lock context %r and '
942
(self.get_transaction(), self._write_group))
943
self._commit_write_group()
944
self._write_group = None
946
def _commit_write_group(self):
947
"""Template method for per-repository write group cleanup.
949
This is called before the write group is considered to be
950
finished and should ensure that all data handed to the repository
951
for writing during the write group is safely committed (to the
952
extent possible considering file system caching etc).
955
def fetch(self, source, revision_id=None, pb=None, find_ghosts=False):
956
"""Fetch the content required to construct revision_id from source.
958
If revision_id is None all content is copied.
959
:param find_ghosts: Find and copy revisions in the source that are
960
ghosts in the target (and not reachable directly by walking out to
961
the first-present revision in target from revision_id).
963
# fast path same-url fetch operations
964
if self.has_same_location(source):
965
# check that last_revision is in 'from' and then return a
967
if (revision_id is not None and
968
not _mod_revision.is_null(revision_id)):
969
self.get_revision(revision_id)
971
# if there is no specific appropriate InterRepository, this will get
972
# the InterRepository base class, which raises an
973
# IncompatibleRepositories when asked to fetch.
974
inter = InterRepository.get(source, self)
975
return inter.fetch(revision_id=revision_id, pb=pb,
976
find_ghosts=find_ghosts)
978
def create_bundle(self, target, base, fileobj, format=None):
979
return serializer.write_bundle(self, target, base, fileobj, format)
981
def get_commit_builder(self, branch, parents, config, timestamp=None,
982
timezone=None, committer=None, revprops=None,
984
"""Obtain a CommitBuilder for this repository.
986
:param branch: Branch to commit to.
987
:param parents: Revision ids of the parents of the new revision.
988
:param config: Configuration to use.
989
:param timestamp: Optional timestamp recorded for commit.
990
:param timezone: Optional timezone for timestamp.
991
:param committer: Optional committer to set for commit.
992
:param revprops: Optional dictionary of revision properties.
993
:param revision_id: Optional revision id.
995
result = self._commit_builder_class(self, parents, config,
996
timestamp, timezone, committer, revprops, revision_id)
997
self.start_write_group()
1001
if (self.control_files._lock_count == 1 and
1002
self.control_files._lock_mode == 'w'):
1003
if self._write_group is not None:
1004
self.abort_write_group()
1005
self.control_files.unlock()
1006
raise errors.BzrError(
1007
'Must end write groups before releasing write locks.')
1008
self.control_files.unlock()
1009
for repo in self._fallback_repositories:
1013
def clone(self, a_bzrdir, revision_id=None):
1014
"""Clone this repository into a_bzrdir using the current format.
1016
Currently no check is made that the format of this repository and
1017
the bzrdir format are compatible. FIXME RBC 20060201.
1019
:return: The newly created destination repository.
1021
# TODO: deprecate after 0.16; cloning this with all its settings is
1022
# probably not very useful -- mbp 20070423
1023
dest_repo = self._create_sprouting_repo(a_bzrdir, shared=self.is_shared())
1024
self.copy_content_into(dest_repo, revision_id)
1027
def start_write_group(self):
1028
"""Start a write group in the repository.
1030
Write groups are used by repositories which do not have a 1:1 mapping
1031
between file ids and backend store to manage the insertion of data from
1032
both fetch and commit operations.
1034
A write lock is required around the start_write_group/commit_write_group
1035
for the support of lock-requiring repository formats.
1037
One can only insert data into a repository inside a write group.
1041
if not self.is_write_locked():
1042
raise errors.NotWriteLocked(self)
1043
if self._write_group:
1044
raise errors.BzrError('already in a write group')
1045
self._start_write_group()
1046
# so we can detect unlock/relock - the write group is now entered.
1047
self._write_group = self.get_transaction()
1049
def _start_write_group(self):
1050
"""Template method for per-repository write group startup.
1052
This is called before the write group is considered to be
1057
def sprout(self, to_bzrdir, revision_id=None):
1058
"""Create a descendent repository for new development.
1060
Unlike clone, this does not copy the settings of the repository.
1062
dest_repo = self._create_sprouting_repo(to_bzrdir, shared=False)
1063
dest_repo.fetch(self, revision_id=revision_id)
1066
def _create_sprouting_repo(self, a_bzrdir, shared):
1067
if not isinstance(a_bzrdir._format, self.bzrdir._format.__class__):
1068
# use target default format.
1069
dest_repo = a_bzrdir.create_repository()
1071
# Most control formats need the repository to be specifically
1072
# created, but on some old all-in-one formats it's not needed
1074
dest_repo = self._format.initialize(a_bzrdir, shared=shared)
1075
except errors.UninitializableFormat:
1076
dest_repo = a_bzrdir.open_repository()
1080
def has_revision(self, revision_id):
1081
"""True if this repository has a copy of the revision."""
1082
return revision_id in self.has_revisions((revision_id,))
1085
def has_revisions(self, revision_ids):
1086
"""Probe to find out the presence of multiple revisions.
1088
:param revision_ids: An iterable of revision_ids.
1089
:return: A set of the revision_ids that were present.
1091
parent_map = self.revisions.get_parent_map(
1092
[(rev_id,) for rev_id in revision_ids])
1094
if _mod_revision.NULL_REVISION in revision_ids:
1095
result.add(_mod_revision.NULL_REVISION)
1096
result.update([key[0] for key in parent_map])
1100
def get_revision(self, revision_id):
1101
"""Return the Revision object for a named revision."""
1102
return self.get_revisions([revision_id])[0]
1105
def get_revision_reconcile(self, revision_id):
1106
"""'reconcile' helper routine that allows access to a revision always.
1108
This variant of get_revision does not cross check the weave graph
1109
against the revision one as get_revision does: but it should only
1110
be used by reconcile, or reconcile-alike commands that are correcting
1111
or testing the revision graph.
1113
return self._get_revisions([revision_id])[0]
1116
def get_revisions(self, revision_ids):
1117
"""Get many revisions at once."""
1118
return self._get_revisions(revision_ids)
1121
def _get_revisions(self, revision_ids):
1122
"""Core work logic to get many revisions without sanity checks."""
1123
for rev_id in revision_ids:
1124
if not rev_id or not isinstance(rev_id, basestring):
1125
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1126
keys = [(key,) for key in revision_ids]
1127
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1129
for record in stream:
1130
if record.storage_kind == 'absent':
1131
raise errors.NoSuchRevision(self, record.key[0])
1132
text = record.get_bytes_as('fulltext')
1133
rev = self._serializer.read_revision_from_string(text)
1134
revs[record.key[0]] = rev
1135
return [revs[revid] for revid in revision_ids]
1138
def get_revision_xml(self, revision_id):
1139
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1140
# would have already do it.
1141
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1142
rev = self.get_revision(revision_id)
1143
rev_tmp = cStringIO.StringIO()
1144
# the current serializer..
1145
self._serializer.write_revision(rev, rev_tmp)
1147
return rev_tmp.getvalue()
1149
def get_deltas_for_revisions(self, revisions):
1150
"""Produce a generator of revision deltas.
1152
Note that the input is a sequence of REVISIONS, not revision_ids.
1153
Trees will be held in memory until the generator exits.
1154
Each delta is relative to the revision's lefthand predecessor.
1156
required_trees = set()
1157
for revision in revisions:
1158
required_trees.add(revision.revision_id)
1159
required_trees.update(revision.parent_ids[:1])
1160
trees = dict((t.get_revision_id(), t) for
1161
t in self.revision_trees(required_trees))
1162
for revision in revisions:
1163
if not revision.parent_ids:
1164
old_tree = self.revision_tree(_mod_revision.NULL_REVISION)
1166
old_tree = trees[revision.parent_ids[0]]
1167
yield trees[revision.revision_id].changes_from(old_tree)
1170
def get_revision_delta(self, revision_id):
1171
"""Return the delta for one revision.
1173
The delta is relative to the left-hand predecessor of the
1176
r = self.get_revision(revision_id)
1177
return list(self.get_deltas_for_revisions([r]))[0]
1180
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
1181
signature = gpg_strategy.sign(plaintext)
1182
self.add_signature_text(revision_id, signature)
1185
def add_signature_text(self, revision_id, signature):
1186
self.signatures.add_lines((revision_id,), (),
1187
osutils.split_lines(signature))
1189
def find_text_key_references(self):
1190
"""Find the text key references within the repository.
1192
:return: a dictionary mapping (file_id, revision_id) tuples to altered file-ids to an iterable of
1193
revision_ids. Each altered file-ids has the exact revision_ids that
1194
altered it listed explicitly.
1195
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1196
to whether they were referred to by the inventory of the
1197
revision_id that they contain. The inventory texts from all present
1198
revision ids are assessed to generate this report.
1200
revision_keys = self.revisions.keys()
1201
w = self.inventories
1202
pb = ui.ui_factory.nested_progress_bar()
1204
return self._find_text_key_references_from_xml_inventory_lines(
1205
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
1209
def _find_text_key_references_from_xml_inventory_lines(self,
1211
"""Core routine for extracting references to texts from inventories.
1213
This performs the translation of xml lines to revision ids.
1215
:param line_iterator: An iterator of lines, origin_version_id
1216
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1217
to whether they were referred to by the inventory of the
1218
revision_id that they contain. Note that if that revision_id was
1219
not part of the line_iterator's output then False will be given -
1220
even though it may actually refer to that key.
1222
if not self._serializer.support_altered_by_hack:
1223
raise AssertionError(
1224
"_find_text_key_references_from_xml_inventory_lines only "
1225
"supported for branches which store inventory as unnested xml"
1226
", not on %r" % self)
1229
# this code needs to read every new line in every inventory for the
1230
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
1231
# not present in one of those inventories is unnecessary but not
1232
# harmful because we are filtering by the revision id marker in the
1233
# inventory lines : we only select file ids altered in one of those
1234
# revisions. We don't need to see all lines in the inventory because
1235
# only those added in an inventory in rev X can contain a revision=X
1237
unescape_revid_cache = {}
1238
unescape_fileid_cache = {}
1240
# jam 20061218 In a big fetch, this handles hundreds of thousands
1241
# of lines, so it has had a lot of inlining and optimizing done.
1242
# Sorry that it is a little bit messy.
1243
# Move several functions to be local variables, since this is a long
1245
search = self._file_ids_altered_regex.search
1246
unescape = _unescape_xml
1247
setdefault = result.setdefault
1248
for line, line_key in line_iterator:
1249
match = search(line)
1252
# One call to match.group() returning multiple items is quite a
1253
# bit faster than 2 calls to match.group() each returning 1
1254
file_id, revision_id = match.group('file_id', 'revision_id')
1256
# Inlining the cache lookups helps a lot when you make 170,000
1257
# lines and 350k ids, versus 8.4 unique ids.
1258
# Using a cache helps in 2 ways:
1259
# 1) Avoids unnecessary decoding calls
1260
# 2) Re-uses cached strings, which helps in future set and
1262
# (2) is enough that removing encoding entirely along with
1263
# the cache (so we are using plain strings) results in no
1264
# performance improvement.
1266
revision_id = unescape_revid_cache[revision_id]
1268
unescaped = unescape(revision_id)
1269
unescape_revid_cache[revision_id] = unescaped
1270
revision_id = unescaped
1272
# Note that unconditionally unescaping means that we deserialise
1273
# every fileid, which for general 'pull' is not great, but we don't
1274
# really want to have some many fulltexts that this matters anyway.
1277
file_id = unescape_fileid_cache[file_id]
1279
unescaped = unescape(file_id)
1280
unescape_fileid_cache[file_id] = unescaped
1283
key = (file_id, revision_id)
1284
setdefault(key, False)
1285
if revision_id == line_key[-1]:
1289
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1291
"""Helper routine for fileids_altered_by_revision_ids.
1293
This performs the translation of xml lines to revision ids.
1295
:param line_iterator: An iterator of lines, origin_version_id
1296
:param revision_ids: The revision ids to filter for. This should be a
1297
set or other type which supports efficient __contains__ lookups, as
1298
the revision id from each parsed line will be looked up in the
1299
revision_ids filter.
1300
:return: a dictionary mapping altered file-ids to an iterable of
1301
revision_ids. Each altered file-ids has the exact revision_ids that
1302
altered it listed explicitly.
1305
setdefault = result.setdefault
1307
self._find_text_key_references_from_xml_inventory_lines(
1308
line_iterator).iterkeys():
1309
# once data is all ensured-consistent; then this is
1310
# if revision_id == version_id
1311
if key[-1:] in revision_ids:
1312
setdefault(key[0], set()).add(key[-1])
1315
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1316
"""Find the file ids and versions affected by revisions.
1318
:param revisions: an iterable containing revision ids.
1319
:param _inv_weave: The inventory weave from this repository or None.
1320
If None, the inventory weave will be opened automatically.
1321
:return: a dictionary mapping altered file-ids to an iterable of
1322
revision_ids. Each altered file-ids has the exact revision_ids that
1323
altered it listed explicitly.
1325
selected_keys = set((revid,) for revid in revision_ids)
1326
w = _inv_weave or self.inventories
1327
pb = ui.ui_factory.nested_progress_bar()
1329
return self._find_file_ids_from_xml_inventory_lines(
1330
w.iter_lines_added_or_present_in_keys(
1331
selected_keys, pb=pb),
1336
def iter_files_bytes(self, desired_files):
1337
"""Iterate through file versions.
1339
Files will not necessarily be returned in the order they occur in
1340
desired_files. No specific order is guaranteed.
1342
Yields pairs of identifier, bytes_iterator. identifier is an opaque
1343
value supplied by the caller as part of desired_files. It should
1344
uniquely identify the file version in the caller's context. (Examples:
1345
an index number or a TreeTransform trans_id.)
1347
bytes_iterator is an iterable of bytestrings for the file. The
1348
kind of iterable and length of the bytestrings are unspecified, but for
1349
this implementation, it is a list of bytes produced by
1350
VersionedFile.get_record_stream().
1352
:param desired_files: a list of (file_id, revision_id, identifier)
1355
transaction = self.get_transaction()
1357
for file_id, revision_id, callable_data in desired_files:
1358
text_keys[(file_id, revision_id)] = callable_data
1359
for record in self.texts.get_record_stream(text_keys, 'unordered', True):
1360
if record.storage_kind == 'absent':
1361
raise errors.RevisionNotPresent(record.key, self)
1362
yield text_keys[record.key], record.get_bytes_as('fulltext')
1364
def _generate_text_key_index(self, text_key_references=None,
1366
"""Generate a new text key index for the repository.
1368
This is an expensive function that will take considerable time to run.
1370
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1371
list of parents, also text keys. When a given key has no parents,
1372
the parents list will be [NULL_REVISION].
1374
# All revisions, to find inventory parents.
1375
if ancestors is None:
1376
graph = self.get_graph()
1377
ancestors = graph.get_parent_map(self.all_revision_ids())
1378
if text_key_references is None:
1379
text_key_references = self.find_text_key_references()
1380
pb = ui.ui_factory.nested_progress_bar()
1382
return self._do_generate_text_key_index(ancestors,
1383
text_key_references, pb)
1387
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1388
"""Helper for _generate_text_key_index to avoid deep nesting."""
1389
revision_order = tsort.topo_sort(ancestors)
1390
invalid_keys = set()
1392
for revision_id in revision_order:
1393
revision_keys[revision_id] = set()
1394
text_count = len(text_key_references)
1395
# a cache of the text keys to allow reuse; costs a dict of all the
1396
# keys, but saves a 2-tuple for every child of a given key.
1398
for text_key, valid in text_key_references.iteritems():
1400
invalid_keys.add(text_key)
1402
revision_keys[text_key[1]].add(text_key)
1403
text_key_cache[text_key] = text_key
1404
del text_key_references
1406
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1407
NULL_REVISION = _mod_revision.NULL_REVISION
1408
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
1409
# too small for large or very branchy trees. However, for 55K path
1410
# trees, it would be easy to use too much memory trivially. Ideally we
1411
# could gauge this by looking at available real memory etc, but this is
1412
# always a tricky proposition.
1413
inventory_cache = lru_cache.LRUCache(10)
1414
batch_size = 10 # should be ~150MB on a 55K path tree
1415
batch_count = len(revision_order) / batch_size + 1
1417
pb.update("Calculating text parents.", processed_texts, text_count)
1418
for offset in xrange(batch_count):
1419
to_query = revision_order[offset * batch_size:(offset + 1) *
1423
for rev_tree in self.revision_trees(to_query):
1424
revision_id = rev_tree.get_revision_id()
1425
parent_ids = ancestors[revision_id]
1426
for text_key in revision_keys[revision_id]:
1427
pb.update("Calculating text parents.", processed_texts)
1428
processed_texts += 1
1429
candidate_parents = []
1430
for parent_id in parent_ids:
1431
parent_text_key = (text_key[0], parent_id)
1433
check_parent = parent_text_key not in \
1434
revision_keys[parent_id]
1436
# the parent parent_id is a ghost:
1437
check_parent = False
1438
# truncate the derived graph against this ghost.
1439
parent_text_key = None
1441
# look at the parent commit details inventories to
1442
# determine possible candidates in the per file graph.
1445
inv = inventory_cache[parent_id]
1447
inv = self.revision_tree(parent_id).inventory
1448
inventory_cache[parent_id] = inv
1449
parent_entry = inv._byid.get(text_key[0], None)
1450
if parent_entry is not None:
1452
text_key[0], parent_entry.revision)
1454
parent_text_key = None
1455
if parent_text_key is not None:
1456
candidate_parents.append(
1457
text_key_cache[parent_text_key])
1458
parent_heads = text_graph.heads(candidate_parents)
1459
new_parents = list(parent_heads)
1460
new_parents.sort(key=lambda x:candidate_parents.index(x))
1461
if new_parents == []:
1462
new_parents = [NULL_REVISION]
1463
text_index[text_key] = new_parents
1465
for text_key in invalid_keys:
1466
text_index[text_key] = [NULL_REVISION]
1469
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
1470
"""Get an iterable listing the keys of all the data introduced by a set
1473
The keys will be ordered so that the corresponding items can be safely
1474
fetched and inserted in that order.
1476
:returns: An iterable producing tuples of (knit-kind, file-id,
1477
versions). knit-kind is one of 'file', 'inventory', 'signatures',
1478
'revisions'. file-id is None unless knit-kind is 'file'.
1480
# XXX: it's a bit weird to control the inventory weave caching in this
1481
# generator. Ideally the caching would be done in fetch.py I think. Or
1482
# maybe this generator should explicitly have the contract that it
1483
# should not be iterated until the previously yielded item has been
1485
inv_w = self.inventories
1487
# file ids that changed
1488
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
1490
num_file_ids = len(file_ids)
1491
for file_id, altered_versions in file_ids.iteritems():
1492
if _files_pb is not None:
1493
_files_pb.update("fetch texts", count, num_file_ids)
1495
yield ("file", file_id, altered_versions)
1496
# We're done with the files_pb. Note that it finished by the caller,
1497
# just as it was created by the caller.
1501
yield ("inventory", None, revision_ids)
1504
revisions_with_signatures = set()
1505
for rev_id in revision_ids:
1507
self.get_signature_text(rev_id)
1508
except errors.NoSuchRevision:
1512
revisions_with_signatures.add(rev_id)
1513
yield ("signatures", None, revisions_with_signatures)
1516
yield ("revisions", None, revision_ids)
1519
def get_inventory(self, revision_id):
1520
"""Get Inventory object by revision id."""
1521
return self.iter_inventories([revision_id]).next()
1523
def iter_inventories(self, revision_ids):
1524
"""Get many inventories by revision_ids.
1526
This will buffer some or all of the texts used in constructing the
1527
inventories in memory, but will only parse a single inventory at a
1530
:return: An iterator of inventories.
1532
if ((None in revision_ids)
1533
or (_mod_revision.NULL_REVISION in revision_ids)):
1534
raise ValueError('cannot get null revision inventory')
1535
return self._iter_inventories(revision_ids)
1537
def _iter_inventories(self, revision_ids):
1538
"""single-document based inventory iteration."""
1539
for text, revision_id in self._iter_inventory_xmls(revision_ids):
1540
yield self.deserialise_inventory(revision_id, text)
1542
def _iter_inventory_xmls(self, revision_ids):
1543
keys = [(revision_id,) for revision_id in revision_ids]
1544
stream = self.inventories.get_record_stream(keys, 'unordered', True)
1546
for record in stream:
1547
if record.storage_kind != 'absent':
1548
texts[record.key] = record.get_bytes_as('fulltext')
1550
raise errors.NoSuchRevision(self, record.key)
1552
yield texts[key], key[-1]
1554
def deserialise_inventory(self, revision_id, xml):
1555
"""Transform the xml into an inventory object.
1557
:param revision_id: The expected revision id of the inventory.
1558
:param xml: A serialised inventory.
1560
result = self._serializer.read_inventory_from_string(xml, revision_id)
1561
if result.revision_id != revision_id:
1562
raise AssertionError('revision id mismatch %s != %s' % (
1563
result.revision_id, revision_id))
1566
def serialise_inventory(self, inv):
1567
return self._serializer.write_inventory_to_string(inv)
1569
def _serialise_inventory_to_lines(self, inv):
1570
return self._serializer.write_inventory_to_lines(inv)
1572
def get_serializer_format(self):
1573
return self._serializer.format_num
1576
def get_inventory_xml(self, revision_id):
1577
"""Get inventory XML as a file object."""
1578
texts = self._iter_inventory_xmls([revision_id])
1580
text, revision_id = texts.next()
1581
except StopIteration:
1582
raise errors.HistoryMissing(self, 'inventory', revision_id)
1586
def get_inventory_sha1(self, revision_id):
1587
"""Return the sha1 hash of the inventory entry
1589
return self.get_revision(revision_id).inventory_sha1
1591
def iter_reverse_revision_history(self, revision_id):
1592
"""Iterate backwards through revision ids in the lefthand history
1594
:param revision_id: The revision id to start with. All its lefthand
1595
ancestors will be traversed.
1597
graph = self.get_graph()
1598
next_id = revision_id
1600
if next_id in (None, _mod_revision.NULL_REVISION):
1603
# Note: The following line may raise KeyError in the event of
1604
# truncated history. We decided not to have a try:except:raise
1605
# RevisionNotPresent here until we see a use for it, because of the
1606
# cost in an inner loop that is by its very nature O(history).
1607
# Robert Collins 20080326
1608
parents = graph.get_parent_map([next_id])[next_id]
1609
if len(parents) == 0:
1612
next_id = parents[0]
1615
def get_revision_inventory(self, revision_id):
1616
"""Return inventory of a past revision."""
1617
# TODO: Unify this with get_inventory()
1618
# bzr 0.0.6 and later imposes the constraint that the inventory_id
1619
# must be the same as its revision, so this is trivial.
1620
if revision_id is None:
1621
# This does not make sense: if there is no revision,
1622
# then it is the current tree inventory surely ?!
1623
# and thus get_root_id() is something that looks at the last
1624
# commit on the branch, and the get_root_id is an inventory check.
1625
raise NotImplementedError
1626
# return Inventory(self.get_root_id())
1628
return self.get_inventory(revision_id)
1630
def is_shared(self):
1631
"""Return True if this repository is flagged as a shared repository."""
1632
raise NotImplementedError(self.is_shared)
1635
def reconcile(self, other=None, thorough=False):
1636
"""Reconcile this repository."""
1637
from bzrlib.reconcile import RepoReconciler
1638
reconciler = RepoReconciler(self, thorough=thorough)
1639
reconciler.reconcile()
1642
def _refresh_data(self):
1643
"""Helper called from lock_* to ensure coherency with disk.
1645
The default implementation does nothing; it is however possible
1646
for repositories to maintain loaded indices across multiple locks
1647
by checking inside their implementation of this method to see
1648
whether their indices are still valid. This depends of course on
1649
the disk format being validatable in this manner.
1653
def revision_tree(self, revision_id):
1654
"""Return Tree for a revision on this branch.
1656
`revision_id` may be NULL_REVISION for the empty tree revision.
1658
revision_id = _mod_revision.ensure_null(revision_id)
1659
# TODO: refactor this to use an existing revision object
1660
# so we don't need to read it in twice.
1661
if revision_id == _mod_revision.NULL_REVISION:
1662
return RevisionTree(self, Inventory(root_id=None),
1663
_mod_revision.NULL_REVISION)
1665
inv = self.get_revision_inventory(revision_id)
1666
return RevisionTree(self, inv, revision_id)
1668
def revision_trees(self, revision_ids):
1669
"""Return Tree for a revision on this branch.
1671
`revision_id` may not be None or 'null:'"""
1672
inventories = self.iter_inventories(revision_ids)
1673
for inv in inventories:
1674
yield RevisionTree(self, inv, inv.revision_id)
1677
def get_ancestry(self, revision_id, topo_sorted=True):
1678
"""Return a list of revision-ids integrated by a revision.
1680
The first element of the list is always None, indicating the origin
1681
revision. This might change when we have history horizons, or
1682
perhaps we should have a new API.
1684
This is topologically sorted.
1686
if _mod_revision.is_null(revision_id):
1688
if not self.has_revision(revision_id):
1689
raise errors.NoSuchRevision(self, revision_id)
1690
graph = self.get_graph()
1692
search = graph._make_breadth_first_searcher([revision_id])
1695
found, ghosts = search.next_with_ghosts()
1696
except StopIteration:
1699
if _mod_revision.NULL_REVISION in keys:
1700
keys.remove(_mod_revision.NULL_REVISION)
1702
parent_map = graph.get_parent_map(keys)
1703
keys = tsort.topo_sort(parent_map)
1704
return [None] + list(keys)
1707
"""Compress the data within the repository.
1709
This operation only makes sense for some repository types. For other
1710
types it should be a no-op that just returns.
1712
This stub method does not require a lock, but subclasses should use
1713
@needs_write_lock as this is a long running call its reasonable to
1714
implicitly lock for the user.
1718
@deprecated_method(one_six)
1719
def print_file(self, file, revision_id):
1720
"""Print `file` to stdout.
1722
FIXME RBC 20060125 as John Meinel points out this is a bad api
1723
- it writes to stdout, it assumes that that is valid etc. Fix
1724
by creating a new more flexible convenience function.
1726
tree = self.revision_tree(revision_id)
1727
# use inventory as it was in that revision
1728
file_id = tree.inventory.path2id(file)
1730
# TODO: jam 20060427 Write a test for this code path
1731
# it had a bug in it, and was raising the wrong
1733
raise errors.BzrError("%r is not present in revision %s" % (file, revision_id))
1734
tree.print_file(file_id)
1736
def get_transaction(self):
1737
return self.control_files.get_transaction()
1739
@deprecated_method(one_one)
1740
def get_parents(self, revision_ids):
1741
"""See StackedParentsProvider.get_parents"""
1742
parent_map = self.get_parent_map(revision_ids)
1743
return [parent_map.get(r, None) for r in revision_ids]
1745
def get_parent_map(self, revision_ids):
1746
"""See graph._StackedParentsProvider.get_parent_map"""
1747
# revisions index works in keys; this just works in revisions
1748
# therefore wrap and unwrap
1751
for revision_id in revision_ids:
1752
if revision_id == _mod_revision.NULL_REVISION:
1753
result[revision_id] = ()
1754
elif revision_id is None:
1755
raise ValueError('get_parent_map(None) is not valid')
1757
query_keys.append((revision_id ,))
1758
for ((revision_id,), parent_keys) in \
1759
self.revisions.get_parent_map(query_keys).iteritems():
1761
result[revision_id] = tuple(parent_revid
1762
for (parent_revid,) in parent_keys)
1764
result[revision_id] = (_mod_revision.NULL_REVISION,)
1767
def _make_parents_provider(self):
1770
def get_graph(self, other_repository=None):
1771
"""Return the graph walker for this repository format"""
1772
parents_provider = self._make_parents_provider()
1773
if (other_repository is not None and
1774
not self.has_same_location(other_repository)):
1775
parents_provider = graph._StackedParentsProvider(
1776
[parents_provider, other_repository._make_parents_provider()])
1777
return graph.Graph(parents_provider)
1779
def _get_versioned_file_checker(self):
1780
"""Return an object suitable for checking versioned files."""
1781
return _VersionedFileChecker(self)
1783
def revision_ids_to_search_result(self, result_set):
1784
"""Convert a set of revision ids to a graph SearchResult."""
1785
result_parents = set()
1786
for parents in self.get_graph().get_parent_map(
1787
result_set).itervalues():
1788
result_parents.update(parents)
1789
included_keys = result_set.intersection(result_parents)
1790
start_keys = result_set.difference(included_keys)
1791
exclude_keys = result_parents.difference(result_set)
1792
result = graph.SearchResult(start_keys, exclude_keys,
1793
len(result_set), result_set)
1797
def set_make_working_trees(self, new_value):
1798
"""Set the policy flag for making working trees when creating branches.
1800
This only applies to branches that use this repository.
1802
The default is 'True'.
1803
:param new_value: True to restore the default, False to disable making
1806
raise NotImplementedError(self.set_make_working_trees)
1808
def make_working_trees(self):
1809
"""Returns the policy for making working trees on new branches."""
1810
raise NotImplementedError(self.make_working_trees)
1813
def sign_revision(self, revision_id, gpg_strategy):
1814
plaintext = Testament.from_revision(self, revision_id).as_short_text()
1815
self.store_revision_signature(gpg_strategy, plaintext, revision_id)
1818
def has_signature_for_revision_id(self, revision_id):
1819
"""Query for a revision signature for revision_id in the repository."""
1820
if not self.has_revision(revision_id):
1821
raise errors.NoSuchRevision(self, revision_id)
1822
sig_present = (1 == len(
1823
self.signatures.get_parent_map([(revision_id,)])))
1827
def get_signature_text(self, revision_id):
1828
"""Return the text for a signature."""
1829
stream = self.signatures.get_record_stream([(revision_id,)],
1831
record = stream.next()
1832
if record.storage_kind == 'absent':
1833
raise errors.NoSuchRevision(self, revision_id)
1834
return record.get_bytes_as('fulltext')
1837
def check(self, revision_ids=None):
1838
"""Check consistency of all history of given revision_ids.
1840
Different repository implementations should override _check().
1842
:param revision_ids: A non-empty list of revision_ids whose ancestry
1843
will be checked. Typically the last revision_id of a branch.
1845
return self._check(revision_ids)
1847
def _check(self, revision_ids):
1848
result = check.Check(self)
1852
def _warn_if_deprecated(self):
1853
global _deprecation_warning_done
1854
if _deprecation_warning_done:
1856
_deprecation_warning_done = True
1857
warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
1858
% (self._format, self.bzrdir.transport.base))
1860
def supports_rich_root(self):
1861
return self._format.rich_root_data
1863
def _check_ascii_revisionid(self, revision_id, method):
1864
"""Private helper for ascii-only repositories."""
1865
# weave repositories refuse to store revisionids that are non-ascii.
1866
if revision_id is not None:
1867
# weaves require ascii revision ids.
1868
if isinstance(revision_id, unicode):
1870
revision_id.encode('ascii')
1871
except UnicodeEncodeError:
1872
raise errors.NonAsciiRevisionId(method, self)
1875
revision_id.decode('ascii')
1876
except UnicodeDecodeError:
1877
raise errors.NonAsciiRevisionId(method, self)
1879
def revision_graph_can_have_wrong_parents(self):
1880
"""Is it possible for this repository to have a revision graph with
1883
If True, then this repository must also implement
1884
_find_inconsistent_revision_parents so that check and reconcile can
1885
check for inconsistencies before proceeding with other checks that may
1886
depend on the revision index being consistent.
1888
raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
1891
# remove these delegates a while after bzr 0.15
1892
def __make_delegated(name, from_module):
1893
def _deprecated_repository_forwarder():
1894
symbol_versioning.warn('%s moved to %s in bzr 0.15'
1895
% (name, from_module),
1898
m = __import__(from_module, globals(), locals(), [name])
1900
return getattr(m, name)
1901
except AttributeError:
1902
raise AttributeError('module %s has no name %s'
1904
globals()[name] = _deprecated_repository_forwarder
1907
'AllInOneRepository',
1908
'WeaveMetaDirRepository',
1909
'PreSplitOutRepositoryFormat',
1910
'RepositoryFormat4',
1911
'RepositoryFormat5',
1912
'RepositoryFormat6',
1913
'RepositoryFormat7',
1915
__make_delegated(_name, 'bzrlib.repofmt.weaverepo')
1919
'RepositoryFormatKnit',
1920
'RepositoryFormatKnit1',
1922
__make_delegated(_name, 'bzrlib.repofmt.knitrepo')
1925
def install_revision(repository, rev, revision_tree):
1926
"""Install all revision data into a repository."""
1927
install_revisions(repository, [(rev, revision_tree, None)])
1930
def install_revisions(repository, iterable, num_revisions=None, pb=None):
1931
"""Install all revision data into a repository.
1933
Accepts an iterable of revision, tree, signature tuples. The signature
1936
repository.start_write_group()
1938
for n, (revision, revision_tree, signature) in enumerate(iterable):
1939
_install_revision(repository, revision, revision_tree, signature)
1941
pb.update('Transferring revisions', n + 1, num_revisions)
1943
repository.abort_write_group()
1946
repository.commit_write_group()
1949
def _install_revision(repository, rev, revision_tree, signature):
1950
"""Install all revision data into a repository."""
1951
present_parents = []
1953
for p_id in rev.parent_ids:
1954
if repository.has_revision(p_id):
1955
present_parents.append(p_id)
1956
parent_trees[p_id] = repository.revision_tree(p_id)
1958
parent_trees[p_id] = repository.revision_tree(
1959
_mod_revision.NULL_REVISION)
1961
inv = revision_tree.inventory
1962
entries = inv.iter_entries()
1963
# backwards compatibility hack: skip the root id.
1964
if not repository.supports_rich_root():
1965
path, root = entries.next()
1966
if root.revision != rev.revision_id:
1967
raise errors.IncompatibleRevision(repr(repository))
1969
for path, ie in entries:
1970
text_keys[(ie.file_id, ie.revision)] = ie
1971
text_parent_map = repository.texts.get_parent_map(text_keys)
1972
missing_texts = set(text_keys) - set(text_parent_map)
1973
# Add the texts that are not already present
1974
for text_key in missing_texts:
1975
ie = text_keys[text_key]
1977
# FIXME: TODO: The following loop overlaps/duplicates that done by
1978
# commit to determine parents. There is a latent/real bug here where
1979
# the parents inserted are not those commit would do - in particular
1980
# they are not filtered by heads(). RBC, AB
1981
for revision, tree in parent_trees.iteritems():
1982
if ie.file_id not in tree:
1984
parent_id = tree.inventory[ie.file_id].revision
1985
if parent_id in text_parents:
1987
text_parents.append((ie.file_id, parent_id))
1988
lines = revision_tree.get_file(ie.file_id).readlines()
1989
repository.texts.add_lines(text_key, text_parents, lines)
1991
# install the inventory
1992
repository.add_inventory(rev.revision_id, inv, present_parents)
1993
except errors.RevisionAlreadyPresent:
1995
if signature is not None:
1996
repository.add_signature_text(rev.revision_id, signature)
1997
repository.add_revision(rev.revision_id, rev, inv)
2000
class MetaDirRepository(Repository):
2001
"""Repositories in the new meta-dir layout.
2003
:ivar _transport: Transport for access to repository control files,
2004
typically pointing to .bzr/repository.
2007
def __init__(self, _format, a_bzrdir, control_files):
2008
super(MetaDirRepository, self).__init__(_format, a_bzrdir, control_files)
2009
self._transport = control_files._transport
2011
def is_shared(self):
2012
"""Return True if this repository is flagged as a shared repository."""
2013
return self._transport.has('shared-storage')
2016
def set_make_working_trees(self, new_value):
2017
"""Set the policy flag for making working trees when creating branches.
2019
This only applies to branches that use this repository.
2021
The default is 'True'.
2022
:param new_value: True to restore the default, False to disable making
2027
self._transport.delete('no-working-trees')
2028
except errors.NoSuchFile:
2031
self._transport.put_bytes('no-working-trees', '',
2032
mode=self.bzrdir._get_file_mode())
2034
def make_working_trees(self):
2035
"""Returns the policy for making working trees on new branches."""
2036
return not self._transport.has('no-working-trees')
2039
class MetaDirVersionedFileRepository(MetaDirRepository):
2040
"""Repositories in a meta-dir, that work via versioned file objects."""
2042
def __init__(self, _format, a_bzrdir, control_files):
2043
super(MetaDirVersionedFileRepository, self).__init__(_format, a_bzrdir,
2047
class RepositoryFormatRegistry(registry.Registry):
2048
"""Registry of RepositoryFormats."""
2050
def get(self, format_string):
2051
r = registry.Registry.get(self, format_string)
2057
format_registry = RepositoryFormatRegistry()
2058
"""Registry of formats, indexed by their identifying format string.
2060
This can contain either format instances themselves, or classes/factories that
2061
can be called to obtain one.
2065
#####################################################################
2066
# Repository Formats
2068
class RepositoryFormat(object):
2069
"""A repository format.
2071
Formats provide three things:
2072
* An initialization routine to construct repository data on disk.
2073
* a format string which is used when the BzrDir supports versioned
2075
* an open routine which returns a Repository instance.
2077
There is one and only one Format subclass for each on-disk format. But
2078
there can be one Repository subclass that is used for several different
2079
formats. The _format attribute on a Repository instance can be used to
2080
determine the disk format.
2082
Formats are placed in an dict by their format string for reference
2083
during opening. These should be subclasses of RepositoryFormat
2086
Once a format is deprecated, just deprecate the initialize and open
2087
methods on the format class. Do not deprecate the object, as the
2088
object will be created every system load.
2090
Common instance attributes:
2091
_matchingbzrdir - the bzrdir format that the repository format was
2092
originally written to work with. This can be used if manually
2093
constructing a bzrdir and repository, or more commonly for test suite
2097
# Set to True or False in derived classes. True indicates that the format
2098
# supports ghosts gracefully.
2099
supports_ghosts = None
2100
# Can this repository be given external locations to lookup additional
2101
# data. Set to True or False in derived classes.
2102
supports_external_lookups = None
2105
return "<%s>" % self.__class__.__name__
2107
def __eq__(self, other):
2108
# format objects are generally stateless
2109
return isinstance(other, self.__class__)
2111
def __ne__(self, other):
2112
return not self == other
2115
def find_format(klass, a_bzrdir):
2116
"""Return the format for the repository object in a_bzrdir.
2118
This is used by bzr native formats that have a "format" file in
2119
the repository. Other methods may be used by different types of
2123
transport = a_bzrdir.get_repository_transport(None)
2124
format_string = transport.get("format").read()
2125
return format_registry.get(format_string)
2126
except errors.NoSuchFile:
2127
raise errors.NoRepositoryPresent(a_bzrdir)
2129
raise errors.UnknownFormatError(format=format_string,
2133
def register_format(klass, format):
2134
format_registry.register(format.get_format_string(), format)
2137
def unregister_format(klass, format):
2138
format_registry.remove(format.get_format_string())
2141
def get_default_format(klass):
2142
"""Return the current default format."""
2143
from bzrlib import bzrdir
2144
return bzrdir.format_registry.make_bzrdir('default').repository_format
2146
def get_format_string(self):
2147
"""Return the ASCII format string that identifies this format.
2149
Note that in pre format ?? repositories the format string is
2150
not permitted nor written to disk.
2152
raise NotImplementedError(self.get_format_string)
2154
def get_format_description(self):
2155
"""Return the short description for this format."""
2156
raise NotImplementedError(self.get_format_description)
2158
# TODO: this shouldn't be in the base class, it's specific to things that
2159
# use weaves or knits -- mbp 20070207
2160
def _get_versioned_file_store(self,
2165
versionedfile_class=None,
2166
versionedfile_kwargs={},
2168
if versionedfile_class is None:
2169
versionedfile_class = self._versionedfile_class
2170
weave_transport = control_files._transport.clone(name)
2171
dir_mode = control_files._dir_mode
2172
file_mode = control_files._file_mode
2173
return VersionedFileStore(weave_transport, prefixed=prefixed,
2175
file_mode=file_mode,
2176
versionedfile_class=versionedfile_class,
2177
versionedfile_kwargs=versionedfile_kwargs,
2180
def initialize(self, a_bzrdir, shared=False):
2181
"""Initialize a repository of this format in a_bzrdir.
2183
:param a_bzrdir: The bzrdir to put the new repository in it.
2184
:param shared: The repository should be initialized as a sharable one.
2185
:returns: The new repository object.
2187
This may raise UninitializableFormat if shared repository are not
2188
compatible the a_bzrdir.
2190
raise NotImplementedError(self.initialize)
2192
def is_supported(self):
2193
"""Is this format supported?
2195
Supported formats must be initializable and openable.
2196
Unsupported formats may not support initialization or committing or
2197
some other features depending on the reason for not being supported.
2201
def check_conversion_target(self, target_format):
2202
raise NotImplementedError(self.check_conversion_target)
2204
def open(self, a_bzrdir, _found=False):
2205
"""Return an instance of this format for the bzrdir a_bzrdir.
2207
_found is a private parameter, do not use it.
2209
raise NotImplementedError(self.open)
2212
class MetaDirRepositoryFormat(RepositoryFormat):
2213
"""Common base class for the new repositories using the metadir layout."""
2215
rich_root_data = False
2216
supports_tree_reference = False
2217
supports_external_lookups = False
2218
_matchingbzrdir = bzrdir.BzrDirMetaFormat1()
2221
super(MetaDirRepositoryFormat, self).__init__()
2223
def _create_control_files(self, a_bzrdir):
2224
"""Create the required files and the initial control_files object."""
2225
# FIXME: RBC 20060125 don't peek under the covers
2226
# NB: no need to escape relative paths that are url safe.
2227
repository_transport = a_bzrdir.get_repository_transport(self)
2228
control_files = lockable_files.LockableFiles(repository_transport,
2229
'lock', lockdir.LockDir)
2230
control_files.create_lock()
2231
return control_files
2233
def _upload_blank_content(self, a_bzrdir, dirs, files, utf8_files, shared):
2234
"""Upload the initial blank content."""
2235
control_files = self._create_control_files(a_bzrdir)
2236
control_files.lock_write()
2237
transport = control_files._transport
2239
utf8_files += [('shared-storage', '')]
2241
transport.mkdir_multi(dirs, mode=a_bzrdir._get_dir_mode())
2242
for (filename, content_stream) in files:
2243
transport.put_file(filename, content_stream,
2244
mode=a_bzrdir._get_file_mode())
2245
for (filename, content_bytes) in utf8_files:
2246
transport.put_bytes_non_atomic(filename, content_bytes,
2247
mode=a_bzrdir._get_file_mode())
2249
control_files.unlock()
2252
# formats which have no format string are not discoverable
2253
# and not independently creatable, so are not registered. They're
2254
# all in bzrlib.repofmt.weaverepo now. When an instance of one of these is
2255
# needed, it's constructed directly by the BzrDir. Non-native formats where
2256
# the repository is not separately opened are similar.
2258
format_registry.register_lazy(
2259
'Bazaar-NG Repository format 7',
2260
'bzrlib.repofmt.weaverepo',
2264
format_registry.register_lazy(
2265
'Bazaar-NG Knit Repository Format 1',
2266
'bzrlib.repofmt.knitrepo',
2267
'RepositoryFormatKnit1',
2270
format_registry.register_lazy(
2271
'Bazaar Knit Repository Format 3 (bzr 0.15)\n',
2272
'bzrlib.repofmt.knitrepo',
2273
'RepositoryFormatKnit3',
2276
format_registry.register_lazy(
2277
'Bazaar Knit Repository Format 4 (bzr 1.0)\n',
2278
'bzrlib.repofmt.knitrepo',
2279
'RepositoryFormatKnit4',
2282
# Pack-based formats. There is one format for pre-subtrees, and one for
2283
# post-subtrees to allow ease of testing.
2284
# NOTE: These are experimental in 0.92. Stable in 1.0 and above
2285
format_registry.register_lazy(
2286
'Bazaar pack repository format 1 (needs bzr 0.92)\n',
2287
'bzrlib.repofmt.pack_repo',
2288
'RepositoryFormatKnitPack1',
2290
format_registry.register_lazy(
2291
'Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n',
2292
'bzrlib.repofmt.pack_repo',
2293
'RepositoryFormatKnitPack3',
2295
format_registry.register_lazy(
2296
'Bazaar pack repository format 1 with rich root (needs bzr 1.0)\n',
2297
'bzrlib.repofmt.pack_repo',
2298
'RepositoryFormatKnitPack4',
2300
format_registry.register_lazy(
2301
'Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n',
2302
'bzrlib.repofmt.pack_repo',
2303
'RepositoryFormatKnitPack5',
2305
format_registry.register_lazy(
2306
'Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n',
2307
'bzrlib.repofmt.pack_repo',
2308
'RepositoryFormatKnitPack5RichRoot',
2310
format_registry.register_lazy(
2311
'Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n',
2312
'bzrlib.repofmt.pack_repo',
2313
'RepositoryFormatKnitPack5RichRootBroken',
2316
# Development formats.
2318
format_registry.register_lazy(
2319
"Bazaar development format 1 (needs bzr.dev from before 1.6)\n",
2320
'bzrlib.repofmt.pack_repo',
2321
'RepositoryFormatPackDevelopment1',
2323
format_registry.register_lazy(
2324
("Bazaar development format 1 with subtree support "
2325
"(needs bzr.dev from before 1.6)\n"),
2326
'bzrlib.repofmt.pack_repo',
2327
'RepositoryFormatPackDevelopment1Subtree',
2329
# 1.6->1.7 go below here
2332
class InterRepository(InterObject):
2333
"""This class represents operations taking place between two repositories.
2335
Its instances have methods like copy_content and fetch, and contain
2336
references to the source and target repositories these operations can be
2339
Often we will provide convenience methods on 'repository' which carry out
2340
operations with another repository - they will always forward to
2341
InterRepository.get(other).method_name(parameters).
2345
"""The available optimised InterRepository types."""
2347
def copy_content(self, revision_id=None):
2348
raise NotImplementedError(self.copy_content)
2350
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2351
"""Fetch the content required to construct revision_id.
2353
The content is copied from self.source to self.target.
2355
:param revision_id: if None all content is copied, if NULL_REVISION no
2357
:param pb: optional progress bar to use for progress reports. If not
2358
provided a default one will be created.
2360
:returns: (copied_revision_count, failures).
2362
# Normally we should find a specific InterRepository subclass to do
2363
# the fetch; if nothing else then at least InterSameDataRepository.
2364
# If none of them is suitable it looks like fetching is not possible;
2365
# we try to give a good message why. _assert_same_model will probably
2366
# give a helpful message; otherwise a generic one.
2367
self._assert_same_model(self.source, self.target)
2368
raise errors.IncompatibleRepositories(self.source, self.target,
2369
"no suitableInterRepository found")
2371
def _walk_to_common_revisions(self, revision_ids):
2372
"""Walk out from revision_ids in source to revisions target has.
2374
:param revision_ids: The start point for the search.
2375
:return: A set of revision ids.
2377
target_graph = self.target.get_graph()
2378
revision_ids = frozenset(revision_ids)
2379
if set(target_graph.get_parent_map(revision_ids)) == revision_ids:
2380
return graph.SearchResult(revision_ids, set(), 0, set())
2381
missing_revs = set()
2382
source_graph = self.source.get_graph()
2383
# ensure we don't pay silly lookup costs.
2384
searcher = source_graph._make_breadth_first_searcher(revision_ids)
2385
null_set = frozenset([_mod_revision.NULL_REVISION])
2388
next_revs, ghosts = searcher.next_with_ghosts()
2389
except StopIteration:
2391
if revision_ids.intersection(ghosts):
2392
absent_ids = set(revision_ids.intersection(ghosts))
2393
# If all absent_ids are present in target, no error is needed.
2394
absent_ids.difference_update(
2395
set(target_graph.get_parent_map(absent_ids)))
2397
raise errors.NoSuchRevision(self.source, absent_ids.pop())
2398
# we don't care about other ghosts as we can't fetch them and
2399
# haven't been asked to.
2400
next_revs = set(next_revs)
2401
# we always have NULL_REVISION present.
2402
have_revs = set(target_graph.get_parent_map(next_revs)).union(null_set)
2403
missing_revs.update(next_revs - have_revs)
2404
searcher.stop_searching_any(have_revs)
2405
return searcher.get_result()
2407
@deprecated_method(one_two)
2409
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2410
"""Return the revision ids that source has that target does not.
2412
These are returned in topological order.
2414
:param revision_id: only return revision ids included by this
2416
:param find_ghosts: If True find missing revisions in deep history
2417
rather than just finding the surface difference.
2419
return list(self.search_missing_revision_ids(
2420
revision_id, find_ghosts).get_keys())
2423
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2424
"""Return the revision ids that source has that target does not.
2426
:param revision_id: only return revision ids included by this
2428
:param find_ghosts: If True find missing revisions in deep history
2429
rather than just finding the surface difference.
2430
:return: A bzrlib.graph.SearchResult.
2432
# stop searching at found target revisions.
2433
if not find_ghosts and revision_id is not None:
2434
return self._walk_to_common_revisions([revision_id])
2435
# generic, possibly worst case, slow code path.
2436
target_ids = set(self.target.all_revision_ids())
2437
if revision_id is not None:
2438
source_ids = self.source.get_ancestry(revision_id)
2439
if source_ids[0] is not None:
2440
raise AssertionError()
2443
source_ids = self.source.all_revision_ids()
2444
result_set = set(source_ids).difference(target_ids)
2445
return self.source.revision_ids_to_search_result(result_set)
2448
def _same_model(source, target):
2449
"""True if source and target have the same data representation.
2451
Note: this is always called on the base class; overriding it in a
2452
subclass will have no effect.
2455
InterRepository._assert_same_model(source, target)
2457
except errors.IncompatibleRepositories, e:
2461
def _assert_same_model(source, target):
2462
"""Raise an exception if two repositories do not use the same model.
2464
if source.supports_rich_root() != target.supports_rich_root():
2465
raise errors.IncompatibleRepositories(source, target,
2466
"different rich-root support")
2467
if source._serializer != target._serializer:
2468
raise errors.IncompatibleRepositories(source, target,
2469
"different serializers")
2472
class InterSameDataRepository(InterRepository):
2473
"""Code for converting between repositories that represent the same data.
2475
Data format and model must match for this to work.
2479
def _get_repo_format_to_test(self):
2480
"""Repository format for testing with.
2482
InterSameData can pull from subtree to subtree and from non-subtree to
2483
non-subtree, so we test this with the richest repository format.
2485
from bzrlib.repofmt import knitrepo
2486
return knitrepo.RepositoryFormatKnit3()
2489
def is_compatible(source, target):
2490
return InterRepository._same_model(source, target)
2493
def copy_content(self, revision_id=None):
2494
"""Make a complete copy of the content in self into destination.
2496
This copies both the repository's revision data, and configuration information
2497
such as the make_working_trees setting.
2499
This is a destructive operation! Do not use it on existing
2502
:param revision_id: Only copy the content needed to construct
2503
revision_id and its parents.
2506
self.target.set_make_working_trees(self.source.make_working_trees())
2507
except NotImplementedError:
2509
# but don't bother fetching if we have the needed data now.
2510
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2511
self.target.has_revision(revision_id)):
2513
self.target.fetch(self.source, revision_id=revision_id)
2516
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2517
"""See InterRepository.fetch()."""
2518
from bzrlib.fetch import RepoFetcher
2519
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2520
self.source, self.source._format, self.target,
2521
self.target._format)
2522
f = RepoFetcher(to_repository=self.target,
2523
from_repository=self.source,
2524
last_revision=revision_id,
2525
pb=pb, find_ghosts=find_ghosts)
2526
return f.count_copied, f.failed_revisions
2529
class InterWeaveRepo(InterSameDataRepository):
2530
"""Optimised code paths between Weave based repositories.
2532
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
2533
implemented lazy inter-object optimisation.
2537
def _get_repo_format_to_test(self):
2538
from bzrlib.repofmt import weaverepo
2539
return weaverepo.RepositoryFormat7()
2542
def is_compatible(source, target):
2543
"""Be compatible with known Weave formats.
2545
We don't test for the stores being of specific types because that
2546
could lead to confusing results, and there is no need to be
2549
from bzrlib.repofmt.weaverepo import (
2555
return (isinstance(source._format, (RepositoryFormat5,
2557
RepositoryFormat7)) and
2558
isinstance(target._format, (RepositoryFormat5,
2560
RepositoryFormat7)))
2561
except AttributeError:
2565
def copy_content(self, revision_id=None):
2566
"""See InterRepository.copy_content()."""
2567
# weave specific optimised path:
2569
self.target.set_make_working_trees(self.source.make_working_trees())
2570
except (errors.RepositoryUpgradeRequired, NotImplemented):
2572
# FIXME do not peek!
2573
if self.source._transport.listable():
2574
pb = ui.ui_factory.nested_progress_bar()
2576
self.target.texts.insert_record_stream(
2577
self.source.texts.get_record_stream(
2578
self.source.texts.keys(), 'topological', False))
2579
pb.update('copying inventory', 0, 1)
2580
self.target.inventories.insert_record_stream(
2581
self.source.inventories.get_record_stream(
2582
self.source.inventories.keys(), 'topological', False))
2583
self.target.signatures.insert_record_stream(
2584
self.source.signatures.get_record_stream(
2585
self.source.signatures.keys(),
2587
self.target.revisions.insert_record_stream(
2588
self.source.revisions.get_record_stream(
2589
self.source.revisions.keys(),
2590
'topological', True))
2594
self.target.fetch(self.source, revision_id=revision_id)
2597
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2598
"""See InterRepository.fetch()."""
2599
from bzrlib.fetch import RepoFetcher
2600
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2601
self.source, self.source._format, self.target, self.target._format)
2602
f = RepoFetcher(to_repository=self.target,
2603
from_repository=self.source,
2604
last_revision=revision_id,
2605
pb=pb, find_ghosts=find_ghosts)
2606
return f.count_copied, f.failed_revisions
2609
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2610
"""See InterRepository.missing_revision_ids()."""
2611
# we want all revisions to satisfy revision_id in source.
2612
# but we don't want to stat every file here and there.
2613
# we want then, all revisions other needs to satisfy revision_id
2614
# checked, but not those that we have locally.
2615
# so the first thing is to get a subset of the revisions to
2616
# satisfy revision_id in source, and then eliminate those that
2617
# we do already have.
2618
# this is slow on high latency connection to self, but as as this
2619
# disk format scales terribly for push anyway due to rewriting
2620
# inventory.weave, this is considered acceptable.
2622
if revision_id is not None:
2623
source_ids = self.source.get_ancestry(revision_id)
2624
if source_ids[0] is not None:
2625
raise AssertionError()
2628
source_ids = self.source._all_possible_ids()
2629
source_ids_set = set(source_ids)
2630
# source_ids is the worst possible case we may need to pull.
2631
# now we want to filter source_ids against what we actually
2632
# have in target, but don't try to check for existence where we know
2633
# we do not have a revision as that would be pointless.
2634
target_ids = set(self.target._all_possible_ids())
2635
possibly_present_revisions = target_ids.intersection(source_ids_set)
2636
actually_present_revisions = set(
2637
self.target._eliminate_revisions_not_present(possibly_present_revisions))
2638
required_revisions = source_ids_set.difference(actually_present_revisions)
2639
if revision_id is not None:
2640
# we used get_ancestry to determine source_ids then we are assured all
2641
# revisions referenced are present as they are installed in topological order.
2642
# and the tip revision was validated by get_ancestry.
2643
result_set = required_revisions
2645
# if we just grabbed the possibly available ids, then
2646
# we only have an estimate of whats available and need to validate
2647
# that against the revision records.
2649
self.source._eliminate_revisions_not_present(required_revisions))
2650
return self.source.revision_ids_to_search_result(result_set)
2653
class InterKnitRepo(InterSameDataRepository):
2654
"""Optimised code paths between Knit based repositories."""
2657
def _get_repo_format_to_test(self):
2658
from bzrlib.repofmt import knitrepo
2659
return knitrepo.RepositoryFormatKnit1()
2662
def is_compatible(source, target):
2663
"""Be compatible with known Knit formats.
2665
We don't test for the stores being of specific types because that
2666
could lead to confusing results, and there is no need to be
2669
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
2671
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
2672
isinstance(target._format, RepositoryFormatKnit))
2673
except AttributeError:
2675
return are_knits and InterRepository._same_model(source, target)
2678
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2679
"""See InterRepository.fetch()."""
2680
from bzrlib.fetch import RepoFetcher
2681
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2682
self.source, self.source._format, self.target, self.target._format)
2683
f = RepoFetcher(to_repository=self.target,
2684
from_repository=self.source,
2685
last_revision=revision_id,
2686
pb=pb, find_ghosts=find_ghosts)
2687
return f.count_copied, f.failed_revisions
2690
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2691
"""See InterRepository.missing_revision_ids()."""
2692
if revision_id is not None:
2693
source_ids = self.source.get_ancestry(revision_id)
2694
if source_ids[0] is not None:
2695
raise AssertionError()
2698
source_ids = self.source.all_revision_ids()
2699
source_ids_set = set(source_ids)
2700
# source_ids is the worst possible case we may need to pull.
2701
# now we want to filter source_ids against what we actually
2702
# have in target, but don't try to check for existence where we know
2703
# we do not have a revision as that would be pointless.
2704
target_ids = set(self.target.all_revision_ids())
2705
possibly_present_revisions = target_ids.intersection(source_ids_set)
2706
actually_present_revisions = set(
2707
self.target._eliminate_revisions_not_present(possibly_present_revisions))
2708
required_revisions = source_ids_set.difference(actually_present_revisions)
2709
if revision_id is not None:
2710
# we used get_ancestry to determine source_ids then we are assured all
2711
# revisions referenced are present as they are installed in topological order.
2712
# and the tip revision was validated by get_ancestry.
2713
result_set = required_revisions
2715
# if we just grabbed the possibly available ids, then
2716
# we only have an estimate of whats available and need to validate
2717
# that against the revision records.
2719
self.source._eliminate_revisions_not_present(required_revisions))
2720
return self.source.revision_ids_to_search_result(result_set)
2723
class InterPackRepo(InterSameDataRepository):
2724
"""Optimised code paths between Pack based repositories."""
2727
def _get_repo_format_to_test(self):
2728
from bzrlib.repofmt import pack_repo
2729
return pack_repo.RepositoryFormatKnitPack1()
2732
def is_compatible(source, target):
2733
"""Be compatible with known Pack formats.
2735
We don't test for the stores being of specific types because that
2736
could lead to confusing results, and there is no need to be
2739
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
2741
are_packs = (isinstance(source._format, RepositoryFormatPack) and
2742
isinstance(target._format, RepositoryFormatPack))
2743
except AttributeError:
2745
return are_packs and InterRepository._same_model(source, target)
2748
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2749
"""See InterRepository.fetch()."""
2750
if (len(self.source._fallback_repositories) > 0 or
2751
len(self.target._fallback_repositories) > 0):
2752
# The pack layer is not aware of fallback repositories, so when
2753
# fetching from a stacked repository or into a stacked repository
2754
# we use the generic fetch logic which uses the VersionedFiles
2755
# attributes on repository.
2756
from bzrlib.fetch import RepoFetcher
2757
fetcher = RepoFetcher(self.target, self.source, revision_id,
2759
return fetcher.count_copied, fetcher.failed_revisions
2760
from bzrlib.repofmt.pack_repo import Packer
2761
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2762
self.source, self.source._format, self.target, self.target._format)
2763
self.count_copied = 0
2764
if revision_id is None:
2766
# everything to do - use pack logic
2767
# to fetch from all packs to one without
2768
# inventory parsing etc, IFF nothing to be copied is in the target.
2770
source_revision_ids = frozenset(self.source.all_revision_ids())
2771
revision_ids = source_revision_ids - \
2772
frozenset(self.target.get_parent_map(source_revision_ids))
2773
revision_keys = [(revid,) for revid in revision_ids]
2774
index = self.target._pack_collection.revision_index.combined_index
2775
present_revision_ids = set(item[1][0] for item in
2776
index.iter_entries(revision_keys))
2777
revision_ids = set(revision_ids) - present_revision_ids
2778
# implementing the TODO will involve:
2779
# - detecting when all of a pack is selected
2780
# - avoiding as much as possible pre-selection, so the
2781
# more-core routines such as create_pack_from_packs can filter in
2782
# a just-in-time fashion. (though having a HEADS list on a
2783
# repository might make this a lot easier, because we could
2784
# sensibly detect 'new revisions' without doing a full index scan.
2785
elif _mod_revision.is_null(revision_id):
2790
revision_ids = self.search_missing_revision_ids(revision_id,
2791
find_ghosts=find_ghosts).get_keys()
2792
except errors.NoSuchRevision:
2793
raise errors.InstallFailed([revision_id])
2794
if len(revision_ids) == 0:
2796
packs = self.source._pack_collection.all_packs()
2797
pack = Packer(self.target._pack_collection, packs, '.fetch',
2798
revision_ids).pack()
2799
if pack is not None:
2800
self.target._pack_collection._save_pack_names()
2801
# Trigger an autopack. This may duplicate effort as we've just done
2802
# a pack creation, but for now it is simpler to think about as
2803
# 'upload data, then repack if needed'.
2804
self.target._pack_collection.autopack()
2805
return (pack.get_revision_count(), [])
2810
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2811
"""See InterRepository.missing_revision_ids().
2813
:param find_ghosts: Find ghosts throughout the ancestry of
2816
if not find_ghosts and revision_id is not None:
2817
return self._walk_to_common_revisions([revision_id])
2818
elif revision_id is not None:
2819
# Find ghosts: search for revisions pointing from one repository to
2820
# the other, and vice versa, anywhere in the history of revision_id.
2821
graph = self.target.get_graph(other_repository=self.source)
2822
searcher = graph._make_breadth_first_searcher([revision_id])
2826
next_revs, ghosts = searcher.next_with_ghosts()
2827
except StopIteration:
2829
if revision_id in ghosts:
2830
raise errors.NoSuchRevision(self.source, revision_id)
2831
found_ids.update(next_revs)
2832
found_ids.update(ghosts)
2833
found_ids = frozenset(found_ids)
2834
# Double query here: should be able to avoid this by changing the
2835
# graph api further.
2836
result_set = found_ids - frozenset(
2837
self.target.get_parent_map(found_ids))
2839
source_ids = self.source.all_revision_ids()
2840
# source_ids is the worst possible case we may need to pull.
2841
# now we want to filter source_ids against what we actually
2842
# have in target, but don't try to check for existence where we know
2843
# we do not have a revision as that would be pointless.
2844
target_ids = set(self.target.all_revision_ids())
2845
result_set = set(source_ids).difference(target_ids)
2846
return self.source.revision_ids_to_search_result(result_set)
2849
class InterModel1and2(InterRepository):
2852
def _get_repo_format_to_test(self):
2856
def is_compatible(source, target):
2857
if not source.supports_rich_root() and target.supports_rich_root():
2863
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2864
"""See InterRepository.fetch()."""
2865
from bzrlib.fetch import Model1toKnit2Fetcher
2866
f = Model1toKnit2Fetcher(to_repository=self.target,
2867
from_repository=self.source,
2868
last_revision=revision_id,
2869
pb=pb, find_ghosts=find_ghosts)
2870
return f.count_copied, f.failed_revisions
2873
def copy_content(self, revision_id=None):
2874
"""Make a complete copy of the content in self into destination.
2876
This is a destructive operation! Do not use it on existing
2879
:param revision_id: Only copy the content needed to construct
2880
revision_id and its parents.
2883
self.target.set_make_working_trees(self.source.make_working_trees())
2884
except NotImplementedError:
2886
# but don't bother fetching if we have the needed data now.
2887
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2888
self.target.has_revision(revision_id)):
2890
self.target.fetch(self.source, revision_id=revision_id)
2893
class InterKnit1and2(InterKnitRepo):
2896
def _get_repo_format_to_test(self):
2900
def is_compatible(source, target):
2901
"""Be compatible with Knit1 source and Knit3 target"""
2903
from bzrlib.repofmt.knitrepo import (
2904
RepositoryFormatKnit1,
2905
RepositoryFormatKnit3,
2907
from bzrlib.repofmt.pack_repo import (
2908
RepositoryFormatKnitPack1,
2909
RepositoryFormatKnitPack3,
2910
RepositoryFormatKnitPack4,
2911
RepositoryFormatKnitPack5,
2912
RepositoryFormatKnitPack5RichRoot,
2913
RepositoryFormatPackDevelopment1,
2914
RepositoryFormatPackDevelopment1Subtree,
2917
RepositoryFormatKnit1, # no rr, no subtree
2918
RepositoryFormatKnitPack1, # no rr, no subtree
2919
RepositoryFormatPackDevelopment1, # no rr, no subtree
2920
RepositoryFormatKnitPack5, # no rr, no subtree
2923
RepositoryFormatKnit3, # rr, subtree
2924
RepositoryFormatKnitPack3, # rr, subtree
2925
RepositoryFormatKnitPack4, # rr, no subtree
2926
RepositoryFormatKnitPack5RichRoot,# rr, no subtree
2927
RepositoryFormatPackDevelopment1Subtree, # rr, subtree
2929
for format in norichroot:
2930
if format.rich_root_data:
2931
raise AssertionError('Format %s is a rich-root format'
2932
' but is included in the non-rich-root list'
2934
for format in richroot:
2935
if not format.rich_root_data:
2936
raise AssertionError('Format %s is not a rich-root format'
2937
' but is included in the rich-root list'
2939
# TODO: One alternative is to just check format.rich_root_data,
2940
# instead of keeping membership lists. However, the formats
2941
# *also* have to use the same 'Knit' style of storage
2942
# (line-deltas, fulltexts, etc.)
2943
return (isinstance(source._format, norichroot) and
2944
isinstance(target._format, richroot))
2945
except AttributeError:
2949
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2950
"""See InterRepository.fetch()."""
2951
from bzrlib.fetch import Knit1to2Fetcher
2952
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2953
self.source, self.source._format, self.target,
2954
self.target._format)
2955
f = Knit1to2Fetcher(to_repository=self.target,
2956
from_repository=self.source,
2957
last_revision=revision_id,
2958
pb=pb, find_ghosts=find_ghosts)
2959
return f.count_copied, f.failed_revisions
2962
class InterDifferingSerializer(InterKnitRepo):
2965
def _get_repo_format_to_test(self):
2969
def is_compatible(source, target):
2970
"""Be compatible with Knit2 source and Knit3 target"""
2971
if source.supports_rich_root() != target.supports_rich_root():
2973
# Ideally, we'd support fetching if the source had no tree references
2974
# even if it supported them...
2975
if (getattr(source, '_format.supports_tree_reference', False) and
2976
not getattr(target, '_format.supports_tree_reference', False)):
2981
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2982
"""See InterRepository.fetch()."""
2983
revision_ids = self.target.search_missing_revision_ids(self.source,
2984
revision_id, find_ghosts=find_ghosts).get_keys()
2985
revision_ids = tsort.topo_sort(
2986
self.source.get_graph().get_parent_map(revision_ids))
2987
def revisions_iterator():
2988
for current_revision_id in revision_ids:
2989
revision = self.source.get_revision(current_revision_id)
2990
tree = self.source.revision_tree(current_revision_id)
2992
signature = self.source.get_signature_text(
2993
current_revision_id)
2994
except errors.NoSuchRevision:
2996
yield revision, tree, signature
2998
my_pb = ui.ui_factory.nested_progress_bar()
3003
install_revisions(self.target, revisions_iterator(),
3004
len(revision_ids), pb)
3006
if my_pb is not None:
3008
return len(revision_ids), 0
3011
class InterOtherToRemote(InterRepository):
3013
def __init__(self, source, target):
3014
InterRepository.__init__(self, source, target)
3015
self._real_inter = None
3018
def is_compatible(source, target):
3019
if isinstance(target, remote.RemoteRepository):
3023
def _ensure_real_inter(self):
3024
if self._real_inter is None:
3025
self.target._ensure_real()
3026
real_target = self.target._real_repository
3027
self._real_inter = InterRepository.get(self.source, real_target)
3029
def copy_content(self, revision_id=None):
3030
self._ensure_real_inter()
3031
self._real_inter.copy_content(revision_id=revision_id)
3033
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3034
self._ensure_real_inter()
3035
return self._real_inter.fetch(revision_id=revision_id, pb=pb,
3036
find_ghosts=find_ghosts)
3039
def _get_repo_format_to_test(self):
3043
class InterRemoteToOther(InterRepository):
3045
def __init__(self, source, target):
3046
InterRepository.__init__(self, source, target)
3047
self._real_inter = None
3050
def is_compatible(source, target):
3051
if not isinstance(source, remote.RemoteRepository):
3053
# Is source's model compatible with target's model?
3054
source._ensure_real()
3055
real_source = source._real_repository
3056
if isinstance(real_source, remote.RemoteRepository):
3057
raise NotImplementedError(
3058
"We don't support remote repos backed by remote repos yet.")
3059
return InterRepository._same_model(real_source, target)
3061
def _ensure_real_inter(self):
3062
if self._real_inter is None:
3063
self.source._ensure_real()
3064
real_source = self.source._real_repository
3065
self._real_inter = InterRepository.get(real_source, self.target)
3067
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3068
self._ensure_real_inter()
3069
return self._real_inter.fetch(revision_id=revision_id, pb=pb,
3070
find_ghosts=find_ghosts)
3072
def copy_content(self, revision_id=None):
3073
self._ensure_real_inter()
3074
self._real_inter.copy_content(revision_id=revision_id)
3077
def _get_repo_format_to_test(self):
3082
InterRepository.register_optimiser(InterDifferingSerializer)
3083
InterRepository.register_optimiser(InterSameDataRepository)
3084
InterRepository.register_optimiser(InterWeaveRepo)
3085
InterRepository.register_optimiser(InterKnitRepo)
3086
InterRepository.register_optimiser(InterModel1and2)
3087
InterRepository.register_optimiser(InterKnit1and2)
3088
InterRepository.register_optimiser(InterPackRepo)
3089
InterRepository.register_optimiser(InterOtherToRemote)
3090
InterRepository.register_optimiser(InterRemoteToOther)
3093
class CopyConverter(object):
3094
"""A repository conversion tool which just performs a copy of the content.
3096
This is slow but quite reliable.
3099
def __init__(self, target_format):
3100
"""Create a CopyConverter.
3102
:param target_format: The format the resulting repository should be.
3104
self.target_format = target_format
3106
def convert(self, repo, pb):
3107
"""Perform the conversion of to_convert, giving feedback via pb.
3109
:param to_convert: The disk object to convert.
3110
:param pb: a progress bar to use for progress information.
3115
# this is only useful with metadir layouts - separated repo content.
3116
# trigger an assertion if not such
3117
repo._format.get_format_string()
3118
self.repo_dir = repo.bzrdir
3119
self.step('Moving repository to repository.backup')
3120
self.repo_dir.transport.move('repository', 'repository.backup')
3121
backup_transport = self.repo_dir.transport.clone('repository.backup')
3122
repo._format.check_conversion_target(self.target_format)
3123
self.source_repo = repo._format.open(self.repo_dir,
3125
_override_transport=backup_transport)
3126
self.step('Creating new repository')
3127
converted = self.target_format.initialize(self.repo_dir,
3128
self.source_repo.is_shared())
3129
converted.lock_write()
3131
self.step('Copying content into repository.')
3132
self.source_repo.copy_content_into(converted)
3135
self.step('Deleting old repository content.')
3136
self.repo_dir.transport.delete_tree('repository.backup')
3137
self.pb.note('repository converted')
3139
def step(self, message):
3140
"""Update the pb by a step."""
3142
self.pb.update(message, self.count, self.total)
3154
def _unescaper(match, _map=_unescape_map):
3155
code = match.group(1)
3159
if not code.startswith('#'):
3161
return unichr(int(code[1:])).encode('utf8')
3167
def _unescape_xml(data):
3168
"""Unescape predefined XML entities in a string of data."""
3170
if _unescape_re is None:
3171
_unescape_re = re.compile('\&([^;]*);')
3172
return _unescape_re.sub(_unescaper, data)
3175
class _VersionedFileChecker(object):
3177
def __init__(self, repository):
3178
self.repository = repository
3179
self.text_index = self.repository._generate_text_key_index()
3181
def calculate_file_version_parents(self, text_key):
3182
"""Calculate the correct parents for a file version according to
3185
parent_keys = self.text_index[text_key]
3186
if parent_keys == [_mod_revision.NULL_REVISION]:
3188
return tuple(parent_keys)
3190
def check_file_version_parents(self, texts, progress_bar=None):
3191
"""Check the parents stored in a versioned file are correct.
3193
It also detects file versions that are not referenced by their
3194
corresponding revision's inventory.
3196
:returns: A tuple of (wrong_parents, dangling_file_versions).
3197
wrong_parents is a dict mapping {revision_id: (stored_parents,
3198
correct_parents)} for each revision_id where the stored parents
3199
are not correct. dangling_file_versions is a set of (file_id,
3200
revision_id) tuples for versions that are present in this versioned
3201
file, but not used by the corresponding inventory.
3204
self.file_ids = set([file_id for file_id, _ in
3205
self.text_index.iterkeys()])
3206
# text keys is now grouped by file_id
3207
n_weaves = len(self.file_ids)
3208
files_in_revisions = {}
3209
revisions_of_files = {}
3210
n_versions = len(self.text_index)
3211
progress_bar.update('loading text store', 0, n_versions)
3212
parent_map = self.repository.texts.get_parent_map(self.text_index)
3213
# On unlistable transports this could well be empty/error...
3214
text_keys = self.repository.texts.keys()
3215
unused_keys = frozenset(text_keys) - set(self.text_index)
3216
for num, key in enumerate(self.text_index.iterkeys()):
3217
if progress_bar is not None:
3218
progress_bar.update('checking text graph', num, n_versions)
3219
correct_parents = self.calculate_file_version_parents(key)
3221
knit_parents = parent_map[key]
3222
except errors.RevisionNotPresent:
3225
if correct_parents != knit_parents:
3226
wrong_parents[key] = (knit_parents, correct_parents)
3227
return wrong_parents, unused_keys
3230
def _old_get_graph(repository, revision_id):
3231
"""DO NOT USE. That is all. I'm serious."""
3232
graph = repository.get_graph()
3233
revision_graph = dict(((key, value) for key, value in
3234
graph.iter_ancestry([revision_id]) if value is not None))
3235
return _strip_NULL_ghosts(revision_graph)
3238
def _strip_NULL_ghosts(revision_graph):
3239
"""Also don't use this. more compatibility code for unmigrated clients."""
3240
# Filter ghosts, and null:
3241
if _mod_revision.NULL_REVISION in revision_graph:
3242
del revision_graph[_mod_revision.NULL_REVISION]
3243
for key, parents in revision_graph.items():
3244
revision_graph[key] = tuple(parent for parent in parents if parent
3246
return revision_graph