1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
40
revision as _mod_revision,
46
from bzrlib.bundle import serializer
47
from bzrlib.revisiontree import RevisionTree
48
from bzrlib.store.versioned import VersionedFileStore
49
from bzrlib.store.text import TextStore
50
from bzrlib.testament import Testament
51
from bzrlib.util import bencode
54
from bzrlib.decorators import needs_read_lock, needs_write_lock
55
from bzrlib.inter import InterObject
56
from bzrlib.inventory import Inventory, InventoryDirectory, ROOT_ID
57
from bzrlib.symbol_versioning import (
60
from bzrlib.trace import mutter, mutter_callsite, note, warning
63
# Old formats display a warning, but only once
64
_deprecation_warning_done = False
67
class CommitBuilder(object):
68
"""Provides an interface to build up a commit.
70
This allows describing a tree to be committed without needing to
71
know the internals of the format of the repository.
74
# all clients should supply tree roots.
75
record_root_entry = True
76
# the default CommitBuilder does not manage trees whose root is versioned.
77
_versioned_root = False
79
def __init__(self, repository, parents, config, timestamp=None,
80
timezone=None, committer=None, revprops=None,
82
"""Initiate a CommitBuilder.
84
:param repository: Repository to commit to.
85
:param parents: Revision ids of the parents of the new revision.
86
:param config: Configuration to use.
87
:param timestamp: Optional timestamp recorded for commit.
88
:param timezone: Optional timezone for timestamp.
89
:param committer: Optional committer to set for commit.
90
:param revprops: Optional dictionary of revision properties.
91
:param revision_id: Optional revision id.
96
self._committer = self._config.username()
98
assert isinstance(committer, basestring), type(committer)
99
self._committer = committer
101
self.new_inventory = Inventory(None)
102
self._new_revision_id = revision_id
103
self.parents = parents
104
self.repository = repository
107
if revprops is not None:
108
self._revprops.update(revprops)
110
if timestamp is None:
111
timestamp = time.time()
112
# Restrict resolution to 1ms
113
self._timestamp = round(timestamp, 3)
116
self._timezone = osutils.local_time_offset()
118
self._timezone = int(timezone)
120
self._generate_revision_if_needed()
121
self.__heads = graph.HeadsCache(repository.get_graph()).heads
123
def commit(self, message):
124
"""Make the actual commit.
126
:return: The revision id of the recorded revision.
128
rev = _mod_revision.Revision(
129
timestamp=self._timestamp,
130
timezone=self._timezone,
131
committer=self._committer,
133
inventory_sha1=self.inv_sha1,
134
revision_id=self._new_revision_id,
135
properties=self._revprops)
136
rev.parent_ids = self.parents
137
self.repository.add_revision(self._new_revision_id, rev,
138
self.new_inventory, self._config)
139
self.repository.commit_write_group()
140
return self._new_revision_id
143
"""Abort the commit that is being built.
145
self.repository.abort_write_group()
147
def revision_tree(self):
148
"""Return the tree that was just committed.
150
After calling commit() this can be called to get a RevisionTree
151
representing the newly committed tree. This is preferred to
152
calling Repository.revision_tree() because that may require
153
deserializing the inventory, while we already have a copy in
156
return RevisionTree(self.repository, self.new_inventory,
157
self._new_revision_id)
159
def finish_inventory(self):
160
"""Tell the builder that the inventory is finished."""
161
if self.new_inventory.root is None:
162
raise AssertionError('Root entry should be supplied to'
163
' record_entry_contents, as of bzr 0.10.',
164
DeprecationWarning, stacklevel=2)
165
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
166
self.new_inventory.revision_id = self._new_revision_id
167
self.inv_sha1 = self.repository.add_inventory(
168
self._new_revision_id,
173
def _gen_revision_id(self):
174
"""Return new revision-id."""
175
return generate_ids.gen_revision_id(self._config.username(),
178
def _generate_revision_if_needed(self):
179
"""Create a revision id if None was supplied.
181
If the repository can not support user-specified revision ids
182
they should override this function and raise CannotSetRevisionId
183
if _new_revision_id is not None.
185
:raises: CannotSetRevisionId
187
if self._new_revision_id is None:
188
self._new_revision_id = self._gen_revision_id()
189
self.random_revid = True
191
self.random_revid = False
193
def _heads(self, file_id, revision_ids):
194
"""Calculate the graph heads for revision_ids in the graph of file_id.
196
This can use either a per-file graph or a global revision graph as we
197
have an identity relationship between the two graphs.
199
return self.__heads(revision_ids)
201
def _check_root(self, ie, parent_invs, tree):
202
"""Helper for record_entry_contents.
204
:param ie: An entry being added.
205
:param parent_invs: The inventories of the parent revisions of the
207
:param tree: The tree that is being committed.
209
# In this revision format, root entries have no knit or weave When
210
# serializing out to disk and back in root.revision is always
212
ie.revision = self._new_revision_id
214
def _get_delta(self, ie, basis_inv, path):
215
"""Get a delta against the basis inventory for ie."""
216
if ie.file_id not in basis_inv:
218
return (None, path, ie.file_id, ie)
219
elif ie != basis_inv[ie.file_id]:
221
# TODO: avoid tis id2path call.
222
return (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
227
def record_entry_contents(self, ie, parent_invs, path, tree,
229
"""Record the content of ie from tree into the commit if needed.
231
Side effect: sets ie.revision when unchanged
233
:param ie: An inventory entry present in the commit.
234
:param parent_invs: The inventories of the parent revisions of the
236
:param path: The path the entry is at in the tree.
237
:param tree: The tree which contains this entry and should be used to
239
:param content_summary: Summary data from the tree about the paths
240
content - stat, length, exec, sha/link target. This is only
241
accessed when the entry has a revision of None - that is when it is
242
a candidate to commit.
243
:return: A tuple (change_delta, version_recorded). change_delta is
244
an inventory_delta change for this entry against the basis tree of
245
the commit, or None if no change occured against the basis tree.
246
version_recorded is True if a new version of the entry has been
247
recorded. For instance, committing a merge where a file was only
248
changed on the other side will return (delta, False).
250
if self.new_inventory.root is None:
251
if ie.parent_id is not None:
252
raise errors.RootMissing()
253
self._check_root(ie, parent_invs, tree)
254
if ie.revision is None:
255
kind = content_summary[0]
257
# ie is carried over from a prior commit
259
# XXX: repository specific check for nested tree support goes here - if
260
# the repo doesn't want nested trees we skip it ?
261
if (kind == 'tree-reference' and
262
not self.repository._format.supports_tree_reference):
263
# mismatch between commit builder logic and repository:
264
# this needs the entry creation pushed down into the builder.
265
raise NotImplementedError('Missing repository subtree support.')
266
self.new_inventory.add(ie)
268
# TODO: slow, take it out of the inner loop.
270
basis_inv = parent_invs[0]
272
basis_inv = Inventory(root_id=None)
274
# ie.revision is always None if the InventoryEntry is considered
275
# for committing. We may record the previous parents revision if the
276
# content is actually unchanged against a sole head.
277
if ie.revision is not None:
278
if not self._versioned_root and path == '':
279
# repositories that do not version the root set the root's
280
# revision to the new commit even when no change occurs, and
281
# this masks when a change may have occurred against the basis,
282
# so calculate if one happened.
283
if ie.file_id in basis_inv:
284
delta = (basis_inv.id2path(ie.file_id), path,
288
delta = (None, path, ie.file_id, ie)
291
# we don't need to commit this, because the caller already
292
# determined that an existing revision of this file is
294
return None, (ie.revision == self._new_revision_id)
295
# XXX: Friction: parent_candidates should return a list not a dict
296
# so that we don't have to walk the inventories again.
297
parent_candiate_entries = ie.parent_candidates(parent_invs)
298
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
300
for inv in parent_invs:
301
if ie.file_id in inv:
302
old_rev = inv[ie.file_id].revision
303
if old_rev in head_set:
304
heads.append(inv[ie.file_id].revision)
305
head_set.remove(inv[ie.file_id].revision)
308
# now we check to see if we need to write a new record to the
310
# We write a new entry unless there is one head to the ancestors, and
311
# the kind-derived content is unchanged.
313
# Cheapest check first: no ancestors, or more the one head in the
314
# ancestors, we write a new node.
318
# There is a single head, look it up for comparison
319
parent_entry = parent_candiate_entries[heads[0]]
320
# if the non-content specific data has changed, we'll be writing a
322
if (parent_entry.parent_id != ie.parent_id or
323
parent_entry.name != ie.name):
325
# now we need to do content specific checks:
327
# if the kind changed the content obviously has
328
if kind != parent_entry.kind:
331
assert content_summary[2] is not None, \
332
"Files must not have executable = None"
334
if (# if the file length changed we have to store:
335
parent_entry.text_size != content_summary[1] or
336
# if the exec bit has changed we have to store:
337
parent_entry.executable != content_summary[2]):
339
elif parent_entry.text_sha1 == content_summary[3]:
340
# all meta and content is unchanged (using a hash cache
341
# hit to check the sha)
342
ie.revision = parent_entry.revision
343
ie.text_size = parent_entry.text_size
344
ie.text_sha1 = parent_entry.text_sha1
345
ie.executable = parent_entry.executable
346
return self._get_delta(ie, basis_inv, path), False
348
# Either there is only a hash change(no hash cache entry,
349
# or same size content change), or there is no change on
351
# Provide the parent's hash to the store layer, so that the
352
# content is unchanged we will not store a new node.
353
nostore_sha = parent_entry.text_sha1
355
# We want to record a new node regardless of the presence or
356
# absence of a content change in the file.
358
ie.executable = content_summary[2]
359
lines = tree.get_file(ie.file_id, path).readlines()
361
ie.text_sha1, ie.text_size = self._add_text_to_weave(
362
ie.file_id, lines, heads, nostore_sha)
363
except errors.ExistingContent:
364
# Turns out that the file content was unchanged, and we were
365
# only going to store a new node if it was changed. Carry over
367
ie.revision = parent_entry.revision
368
ie.text_size = parent_entry.text_size
369
ie.text_sha1 = parent_entry.text_sha1
370
ie.executable = parent_entry.executable
371
return self._get_delta(ie, basis_inv, path), False
372
elif kind == 'directory':
374
# all data is meta here, nothing specific to directory, so
376
ie.revision = parent_entry.revision
377
return self._get_delta(ie, basis_inv, path), False
379
self._add_text_to_weave(ie.file_id, lines, heads, None)
380
elif kind == 'symlink':
381
current_link_target = content_summary[3]
383
# symlink target is not generic metadata, check if it has
385
if current_link_target != parent_entry.symlink_target:
388
# unchanged, carry over.
389
ie.revision = parent_entry.revision
390
ie.symlink_target = parent_entry.symlink_target
391
return self._get_delta(ie, basis_inv, path), False
392
ie.symlink_target = current_link_target
394
self._add_text_to_weave(ie.file_id, lines, heads, None)
395
elif kind == 'tree-reference':
397
if content_summary[3] != parent_entry.reference_revision:
400
# unchanged, carry over.
401
ie.reference_revision = parent_entry.reference_revision
402
ie.revision = parent_entry.revision
403
return self._get_delta(ie, basis_inv, path), False
404
ie.reference_revision = content_summary[3]
406
self._add_text_to_weave(ie.file_id, lines, heads, None)
408
raise NotImplementedError('unknown kind')
409
ie.revision = self._new_revision_id
410
return self._get_delta(ie, basis_inv, path), True
412
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
413
versionedfile = self.repository.weave_store.get_weave_or_empty(
414
file_id, self.repository.get_transaction())
415
# Don't change this to add_lines - add_lines_with_ghosts is cheaper
416
# than add_lines, and allows committing when a parent is ghosted for
418
# Note: as we read the content directly from the tree, we know its not
419
# been turned into unicode or badly split - but a broken tree
420
# implementation could give us bad output from readlines() so this is
421
# not a guarantee of safety. What would be better is always checking
422
# the content during test suite execution. RBC 20070912
424
return versionedfile.add_lines_with_ghosts(
425
self._new_revision_id, parents, new_lines,
426
nostore_sha=nostore_sha, random_id=self.random_revid,
427
check_content=False)[0:2]
429
versionedfile.clear_cache()
432
class RootCommitBuilder(CommitBuilder):
433
"""This commitbuilder actually records the root id"""
435
# the root entry gets versioned properly by this builder.
436
_versioned_root = True
438
def _check_root(self, ie, parent_invs, tree):
439
"""Helper for record_entry_contents.
441
:param ie: An entry being added.
442
:param parent_invs: The inventories of the parent revisions of the
444
:param tree: The tree that is being committed.
448
######################################################################
451
class Repository(object):
452
"""Repository holding history for one or more branches.
454
The repository holds and retrieves historical information including
455
revisions and file history. It's normally accessed only by the Branch,
456
which views a particular line of development through that history.
458
The Repository builds on top of Stores and a Transport, which respectively
459
describe the disk data format and the way of accessing the (possibly
463
# What class to use for a CommitBuilder. Often its simpler to change this
464
# in a Repository class subclass rather than to override
465
# get_commit_builder.
466
_commit_builder_class = CommitBuilder
467
# The search regex used by xml based repositories to determine what things
468
# where changed in a single commit.
469
_file_ids_altered_regex = lazy_regex.lazy_compile(
470
r'file_id="(?P<file_id>[^"]+)"'
471
r'.* revision="(?P<revision_id>[^"]+)"'
474
def abort_write_group(self):
475
"""Commit the contents accrued within the current write group.
477
:seealso: start_write_group.
479
if self._write_group is not self.get_transaction():
480
# has an unlock or relock occured ?
481
raise errors.BzrError('mismatched lock context and write group.')
482
self._abort_write_group()
483
self._write_group = None
485
def _abort_write_group(self):
486
"""Template method for per-repository write group cleanup.
488
This is called during abort before the write group is considered to be
489
finished and should cleanup any internal state accrued during the write
490
group. There is no requirement that data handed to the repository be
491
*not* made available - this is not a rollback - but neither should any
492
attempt be made to ensure that data added is fully commited. Abort is
493
invoked when an error has occured so futher disk or network operations
494
may not be possible or may error and if possible should not be
499
def add_inventory(self, revision_id, inv, parents):
500
"""Add the inventory inv to the repository as revision_id.
502
:param parents: The revision ids of the parents that revision_id
503
is known to have and are in the repository already.
505
returns the sha1 of the serialized inventory.
507
assert self.is_in_write_group()
508
_mod_revision.check_not_reserved_id(revision_id)
509
assert inv.revision_id is None or inv.revision_id == revision_id, \
510
"Mismatch between inventory revision" \
511
" id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
512
assert inv.root is not None
513
inv_lines = self._serialise_inventory_to_lines(inv)
514
inv_vf = self.get_inventory_weave()
515
return self._inventory_add_lines(inv_vf, revision_id, parents,
516
inv_lines, check_content=False)
518
def _inventory_add_lines(self, inv_vf, revision_id, parents, lines,
520
"""Store lines in inv_vf and return the sha1 of the inventory."""
522
for parent in parents:
524
final_parents.append(parent)
525
return inv_vf.add_lines(revision_id, final_parents, lines,
526
check_content=check_content)[0]
529
def add_revision(self, revision_id, rev, inv=None, config=None):
530
"""Add rev to the revision store as revision_id.
532
:param revision_id: the revision id to use.
533
:param rev: The revision object.
534
:param inv: The inventory for the revision. if None, it will be looked
535
up in the inventory storer
536
:param config: If None no digital signature will be created.
537
If supplied its signature_needed method will be used
538
to determine if a signature should be made.
540
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
542
_mod_revision.check_not_reserved_id(revision_id)
543
if config is not None and config.signature_needed():
545
inv = self.get_inventory(revision_id)
546
plaintext = Testament(rev, inv).as_short_text()
547
self.store_revision_signature(
548
gpg.GPGStrategy(config), plaintext, revision_id)
549
if not revision_id in self.get_inventory_weave():
551
raise errors.WeaveRevisionNotPresent(revision_id,
552
self.get_inventory_weave())
554
# yes, this is not suitable for adding with ghosts.
555
self.add_inventory(revision_id, inv, rev.parent_ids)
556
self._revision_store.add_revision(rev, self.get_transaction())
558
def _add_revision_text(self, revision_id, text):
559
revision = self._revision_store._serializer.read_revision_from_string(
561
self._revision_store._add_revision(revision, StringIO(text),
562
self.get_transaction())
564
def all_revision_ids(self):
565
"""Returns a list of all the revision ids in the repository.
567
This is deprecated because code should generally work on the graph
568
reachable from a particular revision, and ignore any other revisions
569
that might be present. There is no direct replacement method.
571
if 'evil' in debug.debug_flags:
572
mutter_callsite(2, "all_revision_ids is linear with history.")
573
return self._all_revision_ids()
575
def _all_revision_ids(self):
576
"""Returns a list of all the revision ids in the repository.
578
These are in as much topological order as the underlying store can
581
raise NotImplementedError(self._all_revision_ids)
583
def break_lock(self):
584
"""Break a lock if one is present from another instance.
586
Uses the ui factory to ask for confirmation if the lock may be from
589
self.control_files.break_lock()
592
def _eliminate_revisions_not_present(self, revision_ids):
593
"""Check every revision id in revision_ids to see if we have it.
595
Returns a set of the present revisions.
598
for id in revision_ids:
599
if self.has_revision(id):
604
def create(a_bzrdir):
605
"""Construct the current default format repository in a_bzrdir."""
606
return RepositoryFormat.get_default_format().initialize(a_bzrdir)
608
def __init__(self, _format, a_bzrdir, control_files, _revision_store, control_store, text_store):
609
"""instantiate a Repository.
611
:param _format: The format of the repository on disk.
612
:param a_bzrdir: The BzrDir of the repository.
614
In the future we will have a single api for all stores for
615
getting file texts, inventories and revisions, then
616
this construct will accept instances of those things.
618
super(Repository, self).__init__()
619
self._format = _format
620
# the following are part of the public API for Repository:
621
self.bzrdir = a_bzrdir
622
self.control_files = control_files
623
self._revision_store = _revision_store
624
# backwards compatibility
625
self.weave_store = text_store
627
self._reconcile_does_inventory_gc = True
628
self._reconcile_fixes_text_parents = False
629
self._reconcile_backsup_inventory = True
630
# not right yet - should be more semantically clear ?
632
self.control_store = control_store
633
self.control_weaves = control_store
634
# TODO: make sure to construct the right store classes, etc, depending
635
# on whether escaping is required.
636
self._warn_if_deprecated()
637
self._write_group = None
638
self.base = control_files._transport.base
641
return '%s(%r)' % (self.__class__.__name__,
644
def has_same_location(self, other):
645
"""Returns a boolean indicating if this repository is at the same
646
location as another repository.
648
This might return False even when two repository objects are accessing
649
the same physical repository via different URLs.
651
if self.__class__ is not other.__class__:
653
return (self.control_files._transport.base ==
654
other.control_files._transport.base)
656
def is_in_write_group(self):
657
"""Return True if there is an open write group.
659
:seealso: start_write_group.
661
return self._write_group is not None
664
return self.control_files.is_locked()
666
def is_write_locked(self):
667
"""Return True if this object is write locked."""
668
return self.is_locked() and self.control_files._lock_mode == 'w'
670
def lock_write(self, token=None):
671
"""Lock this repository for writing.
673
This causes caching within the repository obejct to start accumlating
674
data during reads, and allows a 'write_group' to be obtained. Write
675
groups must be used for actual data insertion.
677
:param token: if this is already locked, then lock_write will fail
678
unless the token matches the existing lock.
679
:returns: a token if this instance supports tokens, otherwise None.
680
:raises TokenLockingNotSupported: when a token is given but this
681
instance doesn't support using token locks.
682
:raises MismatchedToken: if the specified token doesn't match the token
683
of the existing lock.
684
:seealso: start_write_group.
686
A token should be passed in if you know that you have locked the object
687
some other way, and need to synchronise this object's state with that
690
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
692
result = self.control_files.lock_write(token=token)
697
self.control_files.lock_read()
700
def get_physical_lock_status(self):
701
return self.control_files.get_physical_lock_status()
703
def leave_lock_in_place(self):
704
"""Tell this repository not to release the physical lock when this
707
If lock_write doesn't return a token, then this method is not supported.
709
self.control_files.leave_in_place()
711
def dont_leave_lock_in_place(self):
712
"""Tell this repository to release the physical lock when this
713
object is unlocked, even if it didn't originally acquire it.
715
If lock_write doesn't return a token, then this method is not supported.
717
self.control_files.dont_leave_in_place()
720
def gather_stats(self, revid=None, committers=None):
721
"""Gather statistics from a revision id.
723
:param revid: The revision id to gather statistics from, if None, then
724
no revision specific statistics are gathered.
725
:param committers: Optional parameter controlling whether to grab
726
a count of committers from the revision specific statistics.
727
:return: A dictionary of statistics. Currently this contains:
728
committers: The number of committers if requested.
729
firstrev: A tuple with timestamp, timezone for the penultimate left
730
most ancestor of revid, if revid is not the NULL_REVISION.
731
latestrev: A tuple with timestamp, timezone for revid, if revid is
732
not the NULL_REVISION.
733
revisions: The total revision count in the repository.
734
size: An estimate disk size of the repository in bytes.
737
if revid and committers:
738
result['committers'] = 0
739
if revid and revid != _mod_revision.NULL_REVISION:
741
all_committers = set()
742
revisions = self.get_ancestry(revid)
743
# pop the leading None
745
first_revision = None
747
# ignore the revisions in the middle - just grab first and last
748
revisions = revisions[0], revisions[-1]
749
for revision in self.get_revisions(revisions):
750
if not first_revision:
751
first_revision = revision
753
all_committers.add(revision.committer)
754
last_revision = revision
756
result['committers'] = len(all_committers)
757
result['firstrev'] = (first_revision.timestamp,
758
first_revision.timezone)
759
result['latestrev'] = (last_revision.timestamp,
760
last_revision.timezone)
762
# now gather global repository information
763
if self.bzrdir.root_transport.listable():
764
c, t = self._revision_store.total_size(self.get_transaction())
765
result['revisions'] = c
769
def find_branches(self, using=False):
770
"""Find branches underneath this repository.
772
:param using: If True, list only branches using this repository.
775
class Evaluator(object):
778
self.first_call = True
780
def __call__(self, bzrdir):
781
# On the first call, the parameter is always the bzrdir
782
# containing the current repo.
783
if not self.first_call:
785
repository = bzrdir.open_repository()
786
except errors.NoRepositoryPresent:
789
return False, (None, repository)
790
self.first_call = False
792
value = (bzrdir.open_branch(), None)
793
except errors.NotBranchError:
798
for branch, repository in bzrdir.BzrDir.find_bzrdirs(
799
self.bzrdir.root_transport, evaluate=Evaluator()):
800
if branch is not None:
801
branches.append(branch)
802
if not using and repository is not None:
803
branches.extend(repository.find_branches())
806
def get_data_stream(self, revision_ids):
807
raise NotImplementedError(self.get_data_stream)
809
def insert_data_stream(self, stream):
810
"""XXX What does this really do?
812
Is it a substitute for fetch?
813
Should it manage its own write group ?
815
for item_key, bytes in stream:
816
if item_key[0] == 'file':
817
(file_id,) = item_key[1:]
818
knit = self.weave_store.get_weave_or_empty(
819
file_id, self.get_transaction())
820
elif item_key == ('inventory',):
821
knit = self.get_inventory_weave()
822
elif item_key == ('revisions',):
823
knit = self._revision_store.get_revision_file(
824
self.get_transaction())
825
elif item_key == ('signatures',):
826
knit = self._revision_store.get_signature_file(
827
self.get_transaction())
829
raise RepositoryDataStreamError(
830
"Unrecognised data stream key '%s'" % (item_key,))
831
decoded_list = bencode.bdecode(bytes)
832
format = decoded_list.pop(0)
835
for version, options, parents, some_bytes in decoded_list:
836
data_list.append((version, options, len(some_bytes), parents))
837
knit_bytes += some_bytes
838
knit.insert_data_stream(
839
(format, data_list, StringIO(knit_bytes).read))
842
def missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
843
"""Return the revision ids that other has that this does not.
845
These are returned in topological order.
847
revision_id: only return revision ids included by revision_id.
849
return InterRepository.get(other, self).missing_revision_ids(
850
revision_id, find_ghosts)
854
"""Open the repository rooted at base.
856
For instance, if the repository is at URL/.bzr/repository,
857
Repository.open(URL) -> a Repository instance.
859
control = bzrdir.BzrDir.open(base)
860
return control.open_repository()
862
def copy_content_into(self, destination, revision_id=None):
863
"""Make a complete copy of the content in self into destination.
865
This is a destructive operation! Do not use it on existing
868
return InterRepository.get(self, destination).copy_content(revision_id)
870
def commit_write_group(self):
871
"""Commit the contents accrued within the current write group.
873
:seealso: start_write_group.
875
if self._write_group is not self.get_transaction():
876
# has an unlock or relock occured ?
877
raise errors.BzrError('mismatched lock context %r and '
879
(self.get_transaction(), self._write_group))
880
self._commit_write_group()
881
self._write_group = None
883
def _commit_write_group(self):
884
"""Template method for per-repository write group cleanup.
886
This is called before the write group is considered to be
887
finished and should ensure that all data handed to the repository
888
for writing during the write group is safely committed (to the
889
extent possible considering file system caching etc).
892
def fetch(self, source, revision_id=None, pb=None, find_ghosts=False):
893
"""Fetch the content required to construct revision_id from source.
895
If revision_id is None all content is copied.
896
:param find_ghosts: Find and copy revisions in the source that are
897
ghosts in the target (and not reachable directly by walking out to
898
the first-present revision in target from revision_id).
900
# fast path same-url fetch operations
901
if self.has_same_location(source):
902
# check that last_revision is in 'from' and then return a
904
if (revision_id is not None and
905
not _mod_revision.is_null(revision_id)):
906
self.get_revision(revision_id)
908
inter = InterRepository.get(source, self)
910
return inter.fetch(revision_id=revision_id, pb=pb, find_ghosts=find_ghosts)
911
except NotImplementedError:
912
raise errors.IncompatibleRepositories(source, self)
914
def create_bundle(self, target, base, fileobj, format=None):
915
return serializer.write_bundle(self, target, base, fileobj, format)
917
def get_commit_builder(self, branch, parents, config, timestamp=None,
918
timezone=None, committer=None, revprops=None,
920
"""Obtain a CommitBuilder for this repository.
922
:param branch: Branch to commit to.
923
:param parents: Revision ids of the parents of the new revision.
924
:param config: Configuration to use.
925
:param timestamp: Optional timestamp recorded for commit.
926
:param timezone: Optional timezone for timestamp.
927
:param committer: Optional committer to set for commit.
928
:param revprops: Optional dictionary of revision properties.
929
:param revision_id: Optional revision id.
931
result = self._commit_builder_class(self, parents, config,
932
timestamp, timezone, committer, revprops, revision_id)
933
self.start_write_group()
937
if (self.control_files._lock_count == 1 and
938
self.control_files._lock_mode == 'w'):
939
if self._write_group is not None:
940
self.abort_write_group()
941
self.control_files.unlock()
942
raise errors.BzrError(
943
'Must end write groups before releasing write locks.')
944
self.control_files.unlock()
947
def clone(self, a_bzrdir, revision_id=None):
948
"""Clone this repository into a_bzrdir using the current format.
950
Currently no check is made that the format of this repository and
951
the bzrdir format are compatible. FIXME RBC 20060201.
953
:return: The newly created destination repository.
955
# TODO: deprecate after 0.16; cloning this with all its settings is
956
# probably not very useful -- mbp 20070423
957
dest_repo = self._create_sprouting_repo(a_bzrdir, shared=self.is_shared())
958
self.copy_content_into(dest_repo, revision_id)
961
def start_write_group(self):
962
"""Start a write group in the repository.
964
Write groups are used by repositories which do not have a 1:1 mapping
965
between file ids and backend store to manage the insertion of data from
966
both fetch and commit operations.
968
A write lock is required around the start_write_group/commit_write_group
969
for the support of lock-requiring repository formats.
971
One can only insert data into a repository inside a write group.
975
if not self.is_write_locked():
976
raise errors.NotWriteLocked(self)
977
if self._write_group:
978
raise errors.BzrError('already in a write group')
979
self._start_write_group()
980
# so we can detect unlock/relock - the write group is now entered.
981
self._write_group = self.get_transaction()
983
def _start_write_group(self):
984
"""Template method for per-repository write group startup.
986
This is called before the write group is considered to be
991
def sprout(self, to_bzrdir, revision_id=None):
992
"""Create a descendent repository for new development.
994
Unlike clone, this does not copy the settings of the repository.
996
dest_repo = self._create_sprouting_repo(to_bzrdir, shared=False)
997
dest_repo.fetch(self, revision_id=revision_id)
1000
def _create_sprouting_repo(self, a_bzrdir, shared):
1001
if not isinstance(a_bzrdir._format, self.bzrdir._format.__class__):
1002
# use target default format.
1003
dest_repo = a_bzrdir.create_repository()
1005
# Most control formats need the repository to be specifically
1006
# created, but on some old all-in-one formats it's not needed
1008
dest_repo = self._format.initialize(a_bzrdir, shared=shared)
1009
except errors.UninitializableFormat:
1010
dest_repo = a_bzrdir.open_repository()
1014
def has_revision(self, revision_id):
1015
"""True if this repository has a copy of the revision."""
1016
if 'evil' in debug.debug_flags:
1017
mutter_callsite(3, "has_revision is a LBYL symptom.")
1018
return self._revision_store.has_revision_id(revision_id,
1019
self.get_transaction())
1022
def get_revision(self, revision_id):
1023
"""Return the Revision object for a named revision."""
1024
return self.get_revisions([revision_id])[0]
1027
def get_revision_reconcile(self, revision_id):
1028
"""'reconcile' helper routine that allows access to a revision always.
1030
This variant of get_revision does not cross check the weave graph
1031
against the revision one as get_revision does: but it should only
1032
be used by reconcile, or reconcile-alike commands that are correcting
1033
or testing the revision graph.
1035
return self._get_revisions([revision_id])[0]
1038
def get_revisions(self, revision_ids):
1039
"""Get many revisions at once."""
1040
return self._get_revisions(revision_ids)
1043
def _get_revisions(self, revision_ids):
1044
"""Core work logic to get many revisions without sanity checks."""
1045
for rev_id in revision_ids:
1046
if not rev_id or not isinstance(rev_id, basestring):
1047
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1048
revs = self._revision_store.get_revisions(revision_ids,
1049
self.get_transaction())
1051
assert not isinstance(rev.revision_id, unicode)
1052
for parent_id in rev.parent_ids:
1053
assert not isinstance(parent_id, unicode)
1057
def get_revision_xml(self, revision_id):
1058
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1059
# would have already do it.
1060
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1061
rev = self.get_revision(revision_id)
1062
rev_tmp = StringIO()
1063
# the current serializer..
1064
self._revision_store._serializer.write_revision(rev, rev_tmp)
1066
return rev_tmp.getvalue()
1069
def get_deltas_for_revisions(self, revisions):
1070
"""Produce a generator of revision deltas.
1072
Note that the input is a sequence of REVISIONS, not revision_ids.
1073
Trees will be held in memory until the generator exits.
1074
Each delta is relative to the revision's lefthand predecessor.
1076
required_trees = set()
1077
for revision in revisions:
1078
required_trees.add(revision.revision_id)
1079
required_trees.update(revision.parent_ids[:1])
1080
trees = dict((t.get_revision_id(), t) for
1081
t in self.revision_trees(required_trees))
1082
for revision in revisions:
1083
if not revision.parent_ids:
1084
old_tree = self.revision_tree(None)
1086
old_tree = trees[revision.parent_ids[0]]
1087
yield trees[revision.revision_id].changes_from(old_tree)
1090
def get_revision_delta(self, revision_id):
1091
"""Return the delta for one revision.
1093
The delta is relative to the left-hand predecessor of the
1096
r = self.get_revision(revision_id)
1097
return list(self.get_deltas_for_revisions([r]))[0]
1100
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
1101
signature = gpg_strategy.sign(plaintext)
1102
self.add_signature_text(revision_id, signature)
1105
def add_signature_text(self, revision_id, signature):
1106
self._revision_store.add_revision_signature_text(revision_id,
1108
self.get_transaction())
1110
def find_text_key_references(self):
1111
"""Find the text key references within the repository.
1113
:return: a dictionary mapping (file_id, revision_id) tuples to altered file-ids to an iterable of
1114
revision_ids. Each altered file-ids has the exact revision_ids that
1115
altered it listed explicitly.
1116
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1117
to whether they were referred to by the inventory of the
1118
revision_id that they contain. The inventory texts from all present
1119
revision ids are assessed to generate this report.
1121
revision_ids = self.all_revision_ids()
1122
w = self.get_inventory_weave()
1123
pb = ui.ui_factory.nested_progress_bar()
1125
return self._find_text_key_references_from_xml_inventory_lines(
1126
w.iter_lines_added_or_present_in_versions(revision_ids, pb=pb))
1130
def _find_text_key_references_from_xml_inventory_lines(self,
1132
"""Core routine for extracting references to texts from inventories.
1134
This performs the translation of xml lines to revision ids.
1136
:param line_iterator: An iterator of lines, origin_version_id
1137
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1138
to whether they were referred to by the inventory of the
1139
revision_id that they contain. Note that if that revision_id was
1140
not part of the line_iterator's output then False will be given -
1141
even though it may actually refer to that key.
1143
if not self._serializer.support_altered_by_hack:
1144
raise AssertionError(
1145
"_find_text_key_references_from_xml_inventory_lines only "
1146
"supported for branches which store inventory as unnested xml"
1147
", not on %r" % self)
1150
# this code needs to read every new line in every inventory for the
1151
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
1152
# not present in one of those inventories is unnecessary but not
1153
# harmful because we are filtering by the revision id marker in the
1154
# inventory lines : we only select file ids altered in one of those
1155
# revisions. We don't need to see all lines in the inventory because
1156
# only those added in an inventory in rev X can contain a revision=X
1158
unescape_revid_cache = {}
1159
unescape_fileid_cache = {}
1161
# jam 20061218 In a big fetch, this handles hundreds of thousands
1162
# of lines, so it has had a lot of inlining and optimizing done.
1163
# Sorry that it is a little bit messy.
1164
# Move several functions to be local variables, since this is a long
1166
search = self._file_ids_altered_regex.search
1167
unescape = _unescape_xml
1168
setdefault = result.setdefault
1169
for line, version_id in line_iterator:
1170
match = search(line)
1173
# One call to match.group() returning multiple items is quite a
1174
# bit faster than 2 calls to match.group() each returning 1
1175
file_id, revision_id = match.group('file_id', 'revision_id')
1177
# Inlining the cache lookups helps a lot when you make 170,000
1178
# lines and 350k ids, versus 8.4 unique ids.
1179
# Using a cache helps in 2 ways:
1180
# 1) Avoids unnecessary decoding calls
1181
# 2) Re-uses cached strings, which helps in future set and
1183
# (2) is enough that removing encoding entirely along with
1184
# the cache (so we are using plain strings) results in no
1185
# performance improvement.
1187
revision_id = unescape_revid_cache[revision_id]
1189
unescaped = unescape(revision_id)
1190
unescape_revid_cache[revision_id] = unescaped
1191
revision_id = unescaped
1193
# Note that unconditionally unescaping means that we deserialise
1194
# every fileid, which for general 'pull' is not great, but we don't
1195
# really want to have some many fulltexts that this matters anyway.
1198
file_id = unescape_fileid_cache[file_id]
1200
unescaped = unescape(file_id)
1201
unescape_fileid_cache[file_id] = unescaped
1204
key = (file_id, revision_id)
1205
setdefault(key, False)
1206
if revision_id == version_id:
1210
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1212
"""Helper routine for fileids_altered_by_revision_ids.
1214
This performs the translation of xml lines to revision ids.
1216
:param line_iterator: An iterator of lines, origin_version_id
1217
:param revision_ids: The revision ids to filter for. This should be a
1218
set or other type which supports efficient __contains__ lookups, as
1219
the revision id from each parsed line will be looked up in the
1220
revision_ids filter.
1221
:return: a dictionary mapping altered file-ids to an iterable of
1222
revision_ids. Each altered file-ids has the exact revision_ids that
1223
altered it listed explicitly.
1226
setdefault = result.setdefault
1227
for file_id, revision_id in \
1228
self._find_text_key_references_from_xml_inventory_lines(
1229
line_iterator).iterkeys():
1230
# once data is all ensured-consistent; then this is
1231
# if revision_id == version_id
1232
if revision_id in revision_ids:
1233
setdefault(file_id, set()).add(revision_id)
1236
def fileids_altered_by_revision_ids(self, revision_ids):
1237
"""Find the file ids and versions affected by revisions.
1239
:param revisions: an iterable containing revision ids.
1240
:return: a dictionary mapping altered file-ids to an iterable of
1241
revision_ids. Each altered file-ids has the exact revision_ids that
1242
altered it listed explicitly.
1244
selected_revision_ids = set(revision_ids)
1245
w = self.get_inventory_weave()
1246
pb = ui.ui_factory.nested_progress_bar()
1248
return self._find_file_ids_from_xml_inventory_lines(
1249
w.iter_lines_added_or_present_in_versions(
1250
selected_revision_ids, pb=pb),
1251
selected_revision_ids)
1255
def iter_files_bytes(self, desired_files):
1256
"""Iterate through file versions.
1258
Files will not necessarily be returned in the order they occur in
1259
desired_files. No specific order is guaranteed.
1261
Yields pairs of identifier, bytes_iterator. identifier is an opaque
1262
value supplied by the caller as part of desired_files. It should
1263
uniquely identify the file version in the caller's context. (Examples:
1264
an index number or a TreeTransform trans_id.)
1266
bytes_iterator is an iterable of bytestrings for the file. The
1267
kind of iterable and length of the bytestrings are unspecified, but for
1268
this implementation, it is a list of lines produced by
1269
VersionedFile.get_lines().
1271
:param desired_files: a list of (file_id, revision_id, identifier)
1274
transaction = self.get_transaction()
1275
for file_id, revision_id, callable_data in desired_files:
1277
weave = self.weave_store.get_weave(file_id, transaction)
1278
except errors.NoSuchFile:
1279
raise errors.NoSuchIdInRepository(self, file_id)
1280
yield callable_data, weave.get_lines(revision_id)
1282
def _generate_text_key_index(self, text_key_references=None,
1284
"""Generate a new text key index for the repository.
1286
This is an expensive function that will take considerable time to run.
1288
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1289
list of parents, also text keys. When a given key has no parents,
1290
the parents list will be [NULL_REVISION].
1292
# All revisions, to find inventory parents.
1293
if ancestors is None:
1294
revision_graph = self.get_revision_graph_with_ghosts()
1295
ancestors = revision_graph.get_ancestors()
1296
if text_key_references is None:
1297
text_key_references = self.find_text_key_references()
1298
pb = ui.ui_factory.nested_progress_bar()
1300
return self._do_generate_text_key_index(ancestors,
1301
text_key_references, pb)
1305
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1306
"""Helper for _generate_text_key_index to avoid deep nesting."""
1307
revision_order = tsort.topo_sort(ancestors)
1308
invalid_keys = set()
1310
for revision_id in revision_order:
1311
revision_keys[revision_id] = set()
1312
text_count = len(text_key_references)
1313
# a cache of the text keys to allow reuse; costs a dict of all the
1314
# keys, but saves a 2-tuple for every child of a given key.
1316
for text_key, valid in text_key_references.iteritems():
1318
invalid_keys.add(text_key)
1320
revision_keys[text_key[1]].add(text_key)
1321
text_key_cache[text_key] = text_key
1322
del text_key_references
1324
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1325
NULL_REVISION = _mod_revision.NULL_REVISION
1326
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
1327
# too small for large or very branchy trees. However, for 55K path
1328
# trees, it would be easy to use too much memory trivially. Ideally we
1329
# could gauge this by looking at available real memory etc, but this is
1330
# always a tricky proposition.
1331
inventory_cache = lru_cache.LRUCache(10)
1332
batch_size = 10 # should be ~150MB on a 55K path tree
1333
batch_count = len(revision_order) / batch_size + 1
1335
pb.update("Calculating text parents.", processed_texts, text_count)
1336
for offset in xrange(batch_count):
1337
to_query = revision_order[offset * batch_size:(offset + 1) *
1341
for rev_tree in self.revision_trees(to_query):
1342
revision_id = rev_tree.get_revision_id()
1343
parent_ids = ancestors[revision_id]
1344
for text_key in revision_keys[revision_id]:
1345
pb.update("Calculating text parents.", processed_texts)
1346
processed_texts += 1
1347
candidate_parents = []
1348
for parent_id in parent_ids:
1349
parent_text_key = (text_key[0], parent_id)
1351
check_parent = parent_text_key not in \
1352
revision_keys[parent_id]
1354
# the parent parent_id is a ghost:
1355
check_parent = False
1356
# truncate the derived graph against this ghost.
1357
parent_text_key = None
1359
# look at the parent commit details inventories to
1360
# determine possible candidates in the per file graph.
1363
inv = inventory_cache[parent_id]
1365
inv = self.revision_tree(parent_id).inventory
1366
inventory_cache[parent_id] = inv
1367
parent_entry = inv._byid.get(text_key[0], None)
1368
if parent_entry is not None:
1370
text_key[0], parent_entry.revision)
1372
parent_text_key = None
1373
if parent_text_key is not None:
1374
candidate_parents.append(
1375
text_key_cache[parent_text_key])
1376
parent_heads = text_graph.heads(candidate_parents)
1377
new_parents = list(parent_heads)
1378
new_parents.sort(key=lambda x:candidate_parents.index(x))
1379
if new_parents == []:
1380
new_parents = [NULL_REVISION]
1381
text_index[text_key] = new_parents
1383
for text_key in invalid_keys:
1384
text_index[text_key] = [NULL_REVISION]
1387
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
1388
"""Get an iterable listing the keys of all the data introduced by a set
1391
The keys will be ordered so that the corresponding items can be safely
1392
fetched and inserted in that order.
1394
:returns: An iterable producing tuples of (knit-kind, file-id,
1395
versions). knit-kind is one of 'file', 'inventory', 'signatures',
1396
'revisions'. file-id is None unless knit-kind is 'file'.
1398
# XXX: it's a bit weird to control the inventory weave caching in this
1399
# generator. Ideally the caching would be done in fetch.py I think. Or
1400
# maybe this generator should explicitly have the contract that it
1401
# should not be iterated until the previously yielded item has been
1404
inv_w = self.get_inventory_weave()
1405
inv_w.enable_cache()
1407
# file ids that changed
1408
file_ids = self.fileids_altered_by_revision_ids(revision_ids)
1410
num_file_ids = len(file_ids)
1411
for file_id, altered_versions in file_ids.iteritems():
1412
if _files_pb is not None:
1413
_files_pb.update("fetch texts", count, num_file_ids)
1415
yield ("file", file_id, altered_versions)
1416
# We're done with the files_pb. Note that it finished by the caller,
1417
# just as it was created by the caller.
1421
yield ("inventory", None, revision_ids)
1425
revisions_with_signatures = set()
1426
for rev_id in revision_ids:
1428
self.get_signature_text(rev_id)
1429
except errors.NoSuchRevision:
1433
revisions_with_signatures.add(rev_id)
1435
yield ("signatures", None, revisions_with_signatures)
1438
yield ("revisions", None, revision_ids)
1441
def get_inventory_weave(self):
1442
return self.control_weaves.get_weave('inventory',
1443
self.get_transaction())
1446
def get_inventory(self, revision_id):
1447
"""Get Inventory object by hash."""
1448
return self.deserialise_inventory(
1449
revision_id, self.get_inventory_xml(revision_id))
1451
def deserialise_inventory(self, revision_id, xml):
1452
"""Transform the xml into an inventory object.
1454
:param revision_id: The expected revision id of the inventory.
1455
:param xml: A serialised inventory.
1457
return self._serializer.read_inventory_from_string(xml, revision_id)
1459
def serialise_inventory(self, inv):
1460
return self._serializer.write_inventory_to_string(inv)
1462
def _serialise_inventory_to_lines(self, inv):
1463
return self._serializer.write_inventory_to_lines(inv)
1465
def get_serializer_format(self):
1466
return self._serializer.format_num
1469
def get_inventory_xml(self, revision_id):
1470
"""Get inventory XML as a file object."""
1472
assert isinstance(revision_id, str), type(revision_id)
1473
iw = self.get_inventory_weave()
1474
return iw.get_text(revision_id)
1476
raise errors.HistoryMissing(self, 'inventory', revision_id)
1479
def get_inventory_sha1(self, revision_id):
1480
"""Return the sha1 hash of the inventory entry
1482
return self.get_revision(revision_id).inventory_sha1
1485
def get_revision_graph(self, revision_id=None):
1486
"""Return a dictionary containing the revision graph.
1488
NB: This method should not be used as it accesses the entire graph all
1489
at once, which is much more data than most operations should require.
1491
:param revision_id: The revision_id to get a graph from. If None, then
1492
the entire revision graph is returned. This is a deprecated mode of
1493
operation and will be removed in the future.
1494
:return: a dictionary of revision_id->revision_parents_list.
1496
raise NotImplementedError(self.get_revision_graph)
1499
def get_revision_graph_with_ghosts(self, revision_ids=None):
1500
"""Return a graph of the revisions with ghosts marked as applicable.
1502
:param revision_ids: an iterable of revisions to graph or None for all.
1503
:return: a Graph object with the graph reachable from revision_ids.
1505
if 'evil' in debug.debug_flags:
1507
"get_revision_graph_with_ghosts scales with size of history.")
1508
result = deprecated_graph.Graph()
1509
if not revision_ids:
1510
pending = set(self.all_revision_ids())
1513
pending = set(revision_ids)
1514
# special case NULL_REVISION
1515
if _mod_revision.NULL_REVISION in pending:
1516
pending.remove(_mod_revision.NULL_REVISION)
1517
required = set(pending)
1520
revision_id = pending.pop()
1522
rev = self.get_revision(revision_id)
1523
except errors.NoSuchRevision:
1524
if revision_id in required:
1527
result.add_ghost(revision_id)
1529
for parent_id in rev.parent_ids:
1530
# is this queued or done ?
1531
if (parent_id not in pending and
1532
parent_id not in done):
1534
pending.add(parent_id)
1535
result.add_node(revision_id, rev.parent_ids)
1536
done.add(revision_id)
1539
def _get_history_vf(self):
1540
"""Get a versionedfile whose history graph reflects all revisions.
1542
For weave repositories, this is the inventory weave.
1544
return self.get_inventory_weave()
1546
def iter_reverse_revision_history(self, revision_id):
1547
"""Iterate backwards through revision ids in the lefthand history
1549
:param revision_id: The revision id to start with. All its lefthand
1550
ancestors will be traversed.
1552
if revision_id in (None, _mod_revision.NULL_REVISION):
1554
next_id = revision_id
1555
versionedfile = self._get_history_vf()
1558
parents = versionedfile.get_parents(next_id)
1559
if len(parents) == 0:
1562
next_id = parents[0]
1565
def get_revision_inventory(self, revision_id):
1566
"""Return inventory of a past revision."""
1567
# TODO: Unify this with get_inventory()
1568
# bzr 0.0.6 and later imposes the constraint that the inventory_id
1569
# must be the same as its revision, so this is trivial.
1570
if revision_id is None:
1571
# This does not make sense: if there is no revision,
1572
# then it is the current tree inventory surely ?!
1573
# and thus get_root_id() is something that looks at the last
1574
# commit on the branch, and the get_root_id is an inventory check.
1575
raise NotImplementedError
1576
# return Inventory(self.get_root_id())
1578
return self.get_inventory(revision_id)
1581
def is_shared(self):
1582
"""Return True if this repository is flagged as a shared repository."""
1583
raise NotImplementedError(self.is_shared)
1586
def reconcile(self, other=None, thorough=False):
1587
"""Reconcile this repository."""
1588
from bzrlib.reconcile import RepoReconciler
1589
reconciler = RepoReconciler(self, thorough=thorough)
1590
reconciler.reconcile()
1593
def _refresh_data(self):
1594
"""Helper called from lock_* to ensure coherency with disk.
1596
The default implementation does nothing; it is however possible
1597
for repositories to maintain loaded indices across multiple locks
1598
by checking inside their implementation of this method to see
1599
whether their indices are still valid. This depends of course on
1600
the disk format being validatable in this manner.
1604
def revision_tree(self, revision_id):
1605
"""Return Tree for a revision on this branch.
1607
`revision_id` may be None for the empty tree revision.
1609
# TODO: refactor this to use an existing revision object
1610
# so we don't need to read it in twice.
1611
if revision_id is None or revision_id == _mod_revision.NULL_REVISION:
1612
return RevisionTree(self, Inventory(root_id=None),
1613
_mod_revision.NULL_REVISION)
1615
inv = self.get_revision_inventory(revision_id)
1616
return RevisionTree(self, inv, revision_id)
1619
def revision_trees(self, revision_ids):
1620
"""Return Tree for a revision on this branch.
1622
`revision_id` may not be None or 'null:'"""
1623
assert None not in revision_ids
1624
assert _mod_revision.NULL_REVISION not in revision_ids
1625
texts = self.get_inventory_weave().get_texts(revision_ids)
1626
for text, revision_id in zip(texts, revision_ids):
1627
inv = self.deserialise_inventory(revision_id, text)
1628
yield RevisionTree(self, inv, revision_id)
1631
def get_ancestry(self, revision_id, topo_sorted=True):
1632
"""Return a list of revision-ids integrated by a revision.
1634
The first element of the list is always None, indicating the origin
1635
revision. This might change when we have history horizons, or
1636
perhaps we should have a new API.
1638
This is topologically sorted.
1640
if _mod_revision.is_null(revision_id):
1642
if not self.has_revision(revision_id):
1643
raise errors.NoSuchRevision(self, revision_id)
1644
w = self.get_inventory_weave()
1645
candidates = w.get_ancestry(revision_id, topo_sorted)
1646
return [None] + candidates # self._eliminate_revisions_not_present(candidates)
1649
"""Compress the data within the repository.
1651
This operation only makes sense for some repository types. For other
1652
types it should be a no-op that just returns.
1654
This stub method does not require a lock, but subclasses should use
1655
@needs_write_lock as this is a long running call its reasonable to
1656
implicitly lock for the user.
1660
def print_file(self, file, revision_id):
1661
"""Print `file` to stdout.
1663
FIXME RBC 20060125 as John Meinel points out this is a bad api
1664
- it writes to stdout, it assumes that that is valid etc. Fix
1665
by creating a new more flexible convenience function.
1667
tree = self.revision_tree(revision_id)
1668
# use inventory as it was in that revision
1669
file_id = tree.inventory.path2id(file)
1671
# TODO: jam 20060427 Write a test for this code path
1672
# it had a bug in it, and was raising the wrong
1674
raise errors.BzrError("%r is not present in revision %s" % (file, revision_id))
1675
tree.print_file(file_id)
1677
def get_transaction(self):
1678
return self.control_files.get_transaction()
1680
def revision_parents(self, revision_id):
1681
return self.get_inventory_weave().parent_names(revision_id)
1683
@deprecated_method(symbol_versioning.one_one)
1684
def get_parents(self, revision_ids):
1685
"""See StackedParentsProvider.get_parents"""
1686
parent_map = self.get_parent_map(revision_ids)
1687
return [parent_map.get(r, None) for r in revision_ids]
1689
def get_parent_map(self, keys):
1690
"""See graph._StackedParentsProvider.get_parent_map"""
1692
for revision_id in keys:
1693
if revision_id == _mod_revision.NULL_REVISION:
1694
parent_map[revision_id] = []
1697
parent_ids = self.get_revision(revision_id).parent_ids
1698
except errors.NoSuchRevision:
1701
if len(parent_ids) == 0:
1702
parent_ids = [_mod_revision.NULL_REVISION]
1703
parent_map[revision_id] = parent_ids
1706
def _make_parents_provider(self):
1709
def get_graph(self, other_repository=None):
1710
"""Return the graph walker for this repository format"""
1711
parents_provider = self._make_parents_provider()
1712
if (other_repository is not None and
1713
other_repository.bzrdir.transport.base !=
1714
self.bzrdir.transport.base):
1715
parents_provider = graph._StackedParentsProvider(
1716
[parents_provider, other_repository._make_parents_provider()])
1717
return graph.Graph(parents_provider)
1719
def _get_versioned_file_checker(self):
1720
"""Return an object suitable for checking versioned files."""
1721
return _VersionedFileChecker(self)
1724
def set_make_working_trees(self, new_value):
1725
"""Set the policy flag for making working trees when creating branches.
1727
This only applies to branches that use this repository.
1729
The default is 'True'.
1730
:param new_value: True to restore the default, False to disable making
1733
raise NotImplementedError(self.set_make_working_trees)
1735
def make_working_trees(self):
1736
"""Returns the policy for making working trees on new branches."""
1737
raise NotImplementedError(self.make_working_trees)
1740
def sign_revision(self, revision_id, gpg_strategy):
1741
plaintext = Testament.from_revision(self, revision_id).as_short_text()
1742
self.store_revision_signature(gpg_strategy, plaintext, revision_id)
1745
def has_signature_for_revision_id(self, revision_id):
1746
"""Query for a revision signature for revision_id in the repository."""
1747
return self._revision_store.has_signature(revision_id,
1748
self.get_transaction())
1751
def get_signature_text(self, revision_id):
1752
"""Return the text for a signature."""
1753
return self._revision_store.get_signature_text(revision_id,
1754
self.get_transaction())
1757
def check(self, revision_ids=None):
1758
"""Check consistency of all history of given revision_ids.
1760
Different repository implementations should override _check().
1762
:param revision_ids: A non-empty list of revision_ids whose ancestry
1763
will be checked. Typically the last revision_id of a branch.
1765
return self._check(revision_ids)
1767
def _check(self, revision_ids):
1768
result = check.Check(self)
1772
def _warn_if_deprecated(self):
1773
global _deprecation_warning_done
1774
if _deprecation_warning_done:
1776
_deprecation_warning_done = True
1777
warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
1778
% (self._format, self.bzrdir.transport.base))
1780
def supports_rich_root(self):
1781
return self._format.rich_root_data
1783
def _check_ascii_revisionid(self, revision_id, method):
1784
"""Private helper for ascii-only repositories."""
1785
# weave repositories refuse to store revisionids that are non-ascii.
1786
if revision_id is not None:
1787
# weaves require ascii revision ids.
1788
if isinstance(revision_id, unicode):
1790
revision_id.encode('ascii')
1791
except UnicodeEncodeError:
1792
raise errors.NonAsciiRevisionId(method, self)
1795
revision_id.decode('ascii')
1796
except UnicodeDecodeError:
1797
raise errors.NonAsciiRevisionId(method, self)
1799
def revision_graph_can_have_wrong_parents(self):
1800
"""Is it possible for this repository to have a revision graph with
1803
If True, then this repository must also implement
1804
_find_inconsistent_revision_parents so that check and reconcile can
1805
check for inconsistencies before proceeding with other checks that may
1806
depend on the revision index being consistent.
1808
raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
1810
# remove these delegates a while after bzr 0.15
1811
def __make_delegated(name, from_module):
1812
def _deprecated_repository_forwarder():
1813
symbol_versioning.warn('%s moved to %s in bzr 0.15'
1814
% (name, from_module),
1817
m = __import__(from_module, globals(), locals(), [name])
1819
return getattr(m, name)
1820
except AttributeError:
1821
raise AttributeError('module %s has no name %s'
1823
globals()[name] = _deprecated_repository_forwarder
1826
'AllInOneRepository',
1827
'WeaveMetaDirRepository',
1828
'PreSplitOutRepositoryFormat',
1829
'RepositoryFormat4',
1830
'RepositoryFormat5',
1831
'RepositoryFormat6',
1832
'RepositoryFormat7',
1834
__make_delegated(_name, 'bzrlib.repofmt.weaverepo')
1838
'RepositoryFormatKnit',
1839
'RepositoryFormatKnit1',
1841
__make_delegated(_name, 'bzrlib.repofmt.knitrepo')
1844
def install_revision(repository, rev, revision_tree):
1845
"""Install all revision data into a repository."""
1846
install_revisions(repository, [(rev, revision_tree, None)])
1849
def install_revisions(repository, iterable):
1850
"""Install all revision data into a repository.
1852
Accepts an iterable of revision, tree, signature tuples. The signature
1855
repository.start_write_group()
1857
for revision, revision_tree, signature in iterable:
1858
_install_revision(repository, revision, revision_tree, signature)
1860
repository.abort_write_group()
1863
repository.commit_write_group()
1866
def _install_revision(repository, rev, revision_tree, signature):
1867
"""Install all revision data into a repository."""
1868
present_parents = []
1870
for p_id in rev.parent_ids:
1871
if repository.has_revision(p_id):
1872
present_parents.append(p_id)
1873
parent_trees[p_id] = repository.revision_tree(p_id)
1875
parent_trees[p_id] = repository.revision_tree(None)
1877
inv = revision_tree.inventory
1878
entries = inv.iter_entries()
1879
# backwards compatibility hack: skip the root id.
1880
if not repository.supports_rich_root():
1881
path, root = entries.next()
1882
if root.revision != rev.revision_id:
1883
raise errors.IncompatibleRevision(repr(repository))
1884
# Add the texts that are not already present
1885
for path, ie in entries:
1886
w = repository.weave_store.get_weave_or_empty(ie.file_id,
1887
repository.get_transaction())
1888
if ie.revision not in w:
1890
# FIXME: TODO: The following loop *may* be overlapping/duplicate
1891
# with InventoryEntry.find_previous_heads(). if it is, then there
1892
# is a latent bug here where the parents may have ancestors of each
1894
for revision, tree in parent_trees.iteritems():
1895
if ie.file_id not in tree:
1897
parent_id = tree.inventory[ie.file_id].revision
1898
if parent_id in text_parents:
1900
text_parents.append(parent_id)
1902
vfile = repository.weave_store.get_weave_or_empty(ie.file_id,
1903
repository.get_transaction())
1904
lines = revision_tree.get_file(ie.file_id).readlines()
1905
vfile.add_lines(rev.revision_id, text_parents, lines)
1907
# install the inventory
1908
repository.add_inventory(rev.revision_id, inv, present_parents)
1909
except errors.RevisionAlreadyPresent:
1911
if signature is not None:
1912
repository.add_signature_text(rev.revision_id, signature)
1913
repository.add_revision(rev.revision_id, rev, inv)
1916
class MetaDirRepository(Repository):
1917
"""Repositories in the new meta-dir layout."""
1919
def __init__(self, _format, a_bzrdir, control_files, _revision_store, control_store, text_store):
1920
super(MetaDirRepository, self).__init__(_format,
1926
dir_mode = self.control_files._dir_mode
1927
file_mode = self.control_files._file_mode
1930
def is_shared(self):
1931
"""Return True if this repository is flagged as a shared repository."""
1932
return self.control_files._transport.has('shared-storage')
1935
def set_make_working_trees(self, new_value):
1936
"""Set the policy flag for making working trees when creating branches.
1938
This only applies to branches that use this repository.
1940
The default is 'True'.
1941
:param new_value: True to restore the default, False to disable making
1946
self.control_files._transport.delete('no-working-trees')
1947
except errors.NoSuchFile:
1950
self.control_files.put_utf8('no-working-trees', '')
1952
def make_working_trees(self):
1953
"""Returns the policy for making working trees on new branches."""
1954
return not self.control_files._transport.has('no-working-trees')
1957
class RepositoryFormatRegistry(registry.Registry):
1958
"""Registry of RepositoryFormats."""
1960
def get(self, format_string):
1961
r = registry.Registry.get(self, format_string)
1967
format_registry = RepositoryFormatRegistry()
1968
"""Registry of formats, indexed by their identifying format string.
1970
This can contain either format instances themselves, or classes/factories that
1971
can be called to obtain one.
1975
#####################################################################
1976
# Repository Formats
1978
class RepositoryFormat(object):
1979
"""A repository format.
1981
Formats provide three things:
1982
* An initialization routine to construct repository data on disk.
1983
* a format string which is used when the BzrDir supports versioned
1985
* an open routine which returns a Repository instance.
1987
There is one and only one Format subclass for each on-disk format. But
1988
there can be one Repository subclass that is used for several different
1989
formats. The _format attribute on a Repository instance can be used to
1990
determine the disk format.
1992
Formats are placed in an dict by their format string for reference
1993
during opening. These should be subclasses of RepositoryFormat
1996
Once a format is deprecated, just deprecate the initialize and open
1997
methods on the format class. Do not deprecate the object, as the
1998
object will be created every system load.
2000
Common instance attributes:
2001
_matchingbzrdir - the bzrdir format that the repository format was
2002
originally written to work with. This can be used if manually
2003
constructing a bzrdir and repository, or more commonly for test suite
2007
# Set to True or False in derived classes. True indicates that the format
2008
# supports ghosts gracefully.
2009
supports_ghosts = None
2012
return "<%s>" % self.__class__.__name__
2014
def __eq__(self, other):
2015
# format objects are generally stateless
2016
return isinstance(other, self.__class__)
2018
def __ne__(self, other):
2019
return not self == other
2022
def find_format(klass, a_bzrdir):
2023
"""Return the format for the repository object in a_bzrdir.
2025
This is used by bzr native formats that have a "format" file in
2026
the repository. Other methods may be used by different types of
2030
transport = a_bzrdir.get_repository_transport(None)
2031
format_string = transport.get("format").read()
2032
return format_registry.get(format_string)
2033
except errors.NoSuchFile:
2034
raise errors.NoRepositoryPresent(a_bzrdir)
2036
raise errors.UnknownFormatError(format=format_string)
2039
def register_format(klass, format):
2040
format_registry.register(format.get_format_string(), format)
2043
def unregister_format(klass, format):
2044
format_registry.remove(format.get_format_string())
2047
def get_default_format(klass):
2048
"""Return the current default format."""
2049
from bzrlib import bzrdir
2050
return bzrdir.format_registry.make_bzrdir('default').repository_format
2052
def _get_control_store(self, repo_transport, control_files):
2053
"""Return the control store for this repository."""
2054
raise NotImplementedError(self._get_control_store)
2056
def get_format_string(self):
2057
"""Return the ASCII format string that identifies this format.
2059
Note that in pre format ?? repositories the format string is
2060
not permitted nor written to disk.
2062
raise NotImplementedError(self.get_format_string)
2064
def get_format_description(self):
2065
"""Return the short description for this format."""
2066
raise NotImplementedError(self.get_format_description)
2068
def _get_revision_store(self, repo_transport, control_files):
2069
"""Return the revision store object for this a_bzrdir."""
2070
raise NotImplementedError(self._get_revision_store)
2072
def _get_text_rev_store(self,
2079
"""Common logic for getting a revision store for a repository.
2081
see self._get_revision_store for the subclass-overridable method to
2082
get the store for a repository.
2084
from bzrlib.store.revision.text import TextRevisionStore
2085
dir_mode = control_files._dir_mode
2086
file_mode = control_files._file_mode
2087
text_store = TextStore(transport.clone(name),
2089
compressed=compressed,
2091
file_mode=file_mode)
2092
_revision_store = TextRevisionStore(text_store, serializer)
2093
return _revision_store
2095
# TODO: this shouldn't be in the base class, it's specific to things that
2096
# use weaves or knits -- mbp 20070207
2097
def _get_versioned_file_store(self,
2102
versionedfile_class=None,
2103
versionedfile_kwargs={},
2105
if versionedfile_class is None:
2106
versionedfile_class = self._versionedfile_class
2107
weave_transport = control_files._transport.clone(name)
2108
dir_mode = control_files._dir_mode
2109
file_mode = control_files._file_mode
2110
return VersionedFileStore(weave_transport, prefixed=prefixed,
2112
file_mode=file_mode,
2113
versionedfile_class=versionedfile_class,
2114
versionedfile_kwargs=versionedfile_kwargs,
2117
def initialize(self, a_bzrdir, shared=False):
2118
"""Initialize a repository of this format in a_bzrdir.
2120
:param a_bzrdir: The bzrdir to put the new repository in it.
2121
:param shared: The repository should be initialized as a sharable one.
2122
:returns: The new repository object.
2124
This may raise UninitializableFormat if shared repository are not
2125
compatible the a_bzrdir.
2127
raise NotImplementedError(self.initialize)
2129
def is_supported(self):
2130
"""Is this format supported?
2132
Supported formats must be initializable and openable.
2133
Unsupported formats may not support initialization or committing or
2134
some other features depending on the reason for not being supported.
2138
def check_conversion_target(self, target_format):
2139
raise NotImplementedError(self.check_conversion_target)
2141
def open(self, a_bzrdir, _found=False):
2142
"""Return an instance of this format for the bzrdir a_bzrdir.
2144
_found is a private parameter, do not use it.
2146
raise NotImplementedError(self.open)
2149
class MetaDirRepositoryFormat(RepositoryFormat):
2150
"""Common base class for the new repositories using the metadir layout."""
2152
rich_root_data = False
2153
supports_tree_reference = False
2154
_matchingbzrdir = bzrdir.BzrDirMetaFormat1()
2157
super(MetaDirRepositoryFormat, self).__init__()
2159
def _create_control_files(self, a_bzrdir):
2160
"""Create the required files and the initial control_files object."""
2161
# FIXME: RBC 20060125 don't peek under the covers
2162
# NB: no need to escape relative paths that are url safe.
2163
repository_transport = a_bzrdir.get_repository_transport(self)
2164
control_files = lockable_files.LockableFiles(repository_transport,
2165
'lock', lockdir.LockDir)
2166
control_files.create_lock()
2167
return control_files
2169
def _upload_blank_content(self, a_bzrdir, dirs, files, utf8_files, shared):
2170
"""Upload the initial blank content."""
2171
control_files = self._create_control_files(a_bzrdir)
2172
control_files.lock_write()
2174
control_files._transport.mkdir_multi(dirs,
2175
mode=control_files._dir_mode)
2176
for file, content in files:
2177
control_files.put(file, content)
2178
for file, content in utf8_files:
2179
control_files.put_utf8(file, content)
2181
control_files.put_utf8('shared-storage', '')
2183
control_files.unlock()
2186
# formats which have no format string are not discoverable
2187
# and not independently creatable, so are not registered. They're
2188
# all in bzrlib.repofmt.weaverepo now. When an instance of one of these is
2189
# needed, it's constructed directly by the BzrDir. Non-native formats where
2190
# the repository is not separately opened are similar.
2192
format_registry.register_lazy(
2193
'Bazaar-NG Repository format 7',
2194
'bzrlib.repofmt.weaverepo',
2198
format_registry.register_lazy(
2199
'Bazaar-NG Knit Repository Format 1',
2200
'bzrlib.repofmt.knitrepo',
2201
'RepositoryFormatKnit1',
2204
format_registry.register_lazy(
2205
'Bazaar Knit Repository Format 3 (bzr 0.15)\n',
2206
'bzrlib.repofmt.knitrepo',
2207
'RepositoryFormatKnit3',
2210
format_registry.register_lazy(
2211
'Bazaar Knit Repository Format 4 (bzr 1.0)\n',
2212
'bzrlib.repofmt.knitrepo',
2213
'RepositoryFormatKnit4',
2216
# Pack-based formats. There is one format for pre-subtrees, and one for
2217
# post-subtrees to allow ease of testing.
2218
# NOTE: These are experimental in 0.92.
2219
format_registry.register_lazy(
2220
'Bazaar pack repository format 1 (needs bzr 0.92)\n',
2221
'bzrlib.repofmt.pack_repo',
2222
'RepositoryFormatKnitPack1',
2224
format_registry.register_lazy(
2225
'Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n',
2226
'bzrlib.repofmt.pack_repo',
2227
'RepositoryFormatKnitPack3',
2229
format_registry.register_lazy(
2230
'Bazaar pack repository format 1 with rich root (needs bzr 1.0)\n',
2231
'bzrlib.repofmt.pack_repo',
2232
'RepositoryFormatKnitPack4',
2236
class InterRepository(InterObject):
2237
"""This class represents operations taking place between two repositories.
2239
Its instances have methods like copy_content and fetch, and contain
2240
references to the source and target repositories these operations can be
2243
Often we will provide convenience methods on 'repository' which carry out
2244
operations with another repository - they will always forward to
2245
InterRepository.get(other).method_name(parameters).
2249
"""The available optimised InterRepository types."""
2251
def copy_content(self, revision_id=None):
2252
raise NotImplementedError(self.copy_content)
2254
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2255
"""Fetch the content required to construct revision_id.
2257
The content is copied from self.source to self.target.
2259
:param revision_id: if None all content is copied, if NULL_REVISION no
2261
:param pb: optional progress bar to use for progress reports. If not
2262
provided a default one will be created.
2264
Returns the copied revision count and the failed revisions in a tuple:
2267
raise NotImplementedError(self.fetch)
2270
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2271
"""Return the revision ids that source has that target does not.
2273
These are returned in topological order.
2275
:param revision_id: only return revision ids included by this
2278
# generic, possibly worst case, slow code path.
2279
target_ids = set(self.target.all_revision_ids())
2280
if revision_id is not None:
2281
source_ids = self.source.get_ancestry(revision_id)
2282
assert source_ids[0] is None
2285
source_ids = self.source.all_revision_ids()
2286
result_set = set(source_ids).difference(target_ids)
2287
# this may look like a no-op: its not. It preserves the ordering
2288
# other_ids had while only returning the members from other_ids
2289
# that we've decided we need.
2290
return [rev_id for rev_id in source_ids if rev_id in result_set]
2293
def _same_model(source, target):
2294
"""True if source and target have the same data representation."""
2295
if source.supports_rich_root() != target.supports_rich_root():
2297
if source._serializer != target._serializer:
2302
class InterSameDataRepository(InterRepository):
2303
"""Code for converting between repositories that represent the same data.
2305
Data format and model must match for this to work.
2309
def _get_repo_format_to_test(self):
2310
"""Repository format for testing with.
2312
InterSameData can pull from subtree to subtree and from non-subtree to
2313
non-subtree, so we test this with the richest repository format.
2315
from bzrlib.repofmt import knitrepo
2316
return knitrepo.RepositoryFormatKnit3()
2319
def is_compatible(source, target):
2320
return InterRepository._same_model(source, target)
2323
def copy_content(self, revision_id=None):
2324
"""Make a complete copy of the content in self into destination.
2326
This copies both the repository's revision data, and configuration information
2327
such as the make_working_trees setting.
2329
This is a destructive operation! Do not use it on existing
2332
:param revision_id: Only copy the content needed to construct
2333
revision_id and its parents.
2336
self.target.set_make_working_trees(self.source.make_working_trees())
2337
except NotImplementedError:
2339
# but don't bother fetching if we have the needed data now.
2340
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2341
self.target.has_revision(revision_id)):
2343
self.target.fetch(self.source, revision_id=revision_id)
2346
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2347
"""See InterRepository.fetch()."""
2348
from bzrlib.fetch import GenericRepoFetcher
2349
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2350
self.source, self.source._format, self.target,
2351
self.target._format)
2352
f = GenericRepoFetcher(to_repository=self.target,
2353
from_repository=self.source,
2354
last_revision=revision_id,
2356
return f.count_copied, f.failed_revisions
2359
class InterWeaveRepo(InterSameDataRepository):
2360
"""Optimised code paths between Weave based repositories.
2362
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
2363
implemented lazy inter-object optimisation.
2367
def _get_repo_format_to_test(self):
2368
from bzrlib.repofmt import weaverepo
2369
return weaverepo.RepositoryFormat7()
2372
def is_compatible(source, target):
2373
"""Be compatible with known Weave formats.
2375
We don't test for the stores being of specific types because that
2376
could lead to confusing results, and there is no need to be
2379
from bzrlib.repofmt.weaverepo import (
2385
return (isinstance(source._format, (RepositoryFormat5,
2387
RepositoryFormat7)) and
2388
isinstance(target._format, (RepositoryFormat5,
2390
RepositoryFormat7)))
2391
except AttributeError:
2395
def copy_content(self, revision_id=None):
2396
"""See InterRepository.copy_content()."""
2397
# weave specific optimised path:
2399
self.target.set_make_working_trees(self.source.make_working_trees())
2400
except NotImplementedError:
2402
# FIXME do not peek!
2403
if self.source.control_files._transport.listable():
2404
pb = ui.ui_factory.nested_progress_bar()
2406
self.target.weave_store.copy_all_ids(
2407
self.source.weave_store,
2409
from_transaction=self.source.get_transaction(),
2410
to_transaction=self.target.get_transaction())
2411
pb.update('copying inventory', 0, 1)
2412
self.target.control_weaves.copy_multi(
2413
self.source.control_weaves, ['inventory'],
2414
from_transaction=self.source.get_transaction(),
2415
to_transaction=self.target.get_transaction())
2416
self.target._revision_store.text_store.copy_all_ids(
2417
self.source._revision_store.text_store,
2422
self.target.fetch(self.source, revision_id=revision_id)
2425
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2426
"""See InterRepository.fetch()."""
2427
from bzrlib.fetch import GenericRepoFetcher
2428
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2429
self.source, self.source._format, self.target, self.target._format)
2430
f = GenericRepoFetcher(to_repository=self.target,
2431
from_repository=self.source,
2432
last_revision=revision_id,
2434
return f.count_copied, f.failed_revisions
2437
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2438
"""See InterRepository.missing_revision_ids()."""
2439
# we want all revisions to satisfy revision_id in source.
2440
# but we don't want to stat every file here and there.
2441
# we want then, all revisions other needs to satisfy revision_id
2442
# checked, but not those that we have locally.
2443
# so the first thing is to get a subset of the revisions to
2444
# satisfy revision_id in source, and then eliminate those that
2445
# we do already have.
2446
# this is slow on high latency connection to self, but as as this
2447
# disk format scales terribly for push anyway due to rewriting
2448
# inventory.weave, this is considered acceptable.
2450
if revision_id is not None:
2451
source_ids = self.source.get_ancestry(revision_id)
2452
assert source_ids[0] is None
2455
source_ids = self.source._all_possible_ids()
2456
source_ids_set = set(source_ids)
2457
# source_ids is the worst possible case we may need to pull.
2458
# now we want to filter source_ids against what we actually
2459
# have in target, but don't try to check for existence where we know
2460
# we do not have a revision as that would be pointless.
2461
target_ids = set(self.target._all_possible_ids())
2462
possibly_present_revisions = target_ids.intersection(source_ids_set)
2463
actually_present_revisions = set(self.target._eliminate_revisions_not_present(possibly_present_revisions))
2464
required_revisions = source_ids_set.difference(actually_present_revisions)
2465
required_topo_revisions = [rev_id for rev_id in source_ids if rev_id in required_revisions]
2466
if revision_id is not None:
2467
# we used get_ancestry to determine source_ids then we are assured all
2468
# revisions referenced are present as they are installed in topological order.
2469
# and the tip revision was validated by get_ancestry.
2470
return required_topo_revisions
2472
# if we just grabbed the possibly available ids, then
2473
# we only have an estimate of whats available and need to validate
2474
# that against the revision records.
2475
return self.source._eliminate_revisions_not_present(required_topo_revisions)
2478
class InterKnitRepo(InterSameDataRepository):
2479
"""Optimised code paths between Knit based repositories."""
2482
def _get_repo_format_to_test(self):
2483
from bzrlib.repofmt import knitrepo
2484
return knitrepo.RepositoryFormatKnit1()
2487
def is_compatible(source, target):
2488
"""Be compatible with known Knit formats.
2490
We don't test for the stores being of specific types because that
2491
could lead to confusing results, and there is no need to be
2494
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
2496
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
2497
isinstance(target._format, RepositoryFormatKnit))
2498
except AttributeError:
2500
return are_knits and InterRepository._same_model(source, target)
2503
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2504
"""See InterRepository.fetch()."""
2505
from bzrlib.fetch import KnitRepoFetcher
2506
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2507
self.source, self.source._format, self.target, self.target._format)
2508
f = KnitRepoFetcher(to_repository=self.target,
2509
from_repository=self.source,
2510
last_revision=revision_id,
2512
return f.count_copied, f.failed_revisions
2515
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2516
"""See InterRepository.missing_revision_ids()."""
2517
if revision_id is not None:
2518
source_ids = self.source.get_ancestry(revision_id)
2519
assert source_ids[0] is None
2522
source_ids = self.source.all_revision_ids()
2523
source_ids_set = set(source_ids)
2524
# source_ids is the worst possible case we may need to pull.
2525
# now we want to filter source_ids against what we actually
2526
# have in target, but don't try to check for existence where we know
2527
# we do not have a revision as that would be pointless.
2528
target_ids = set(self.target.all_revision_ids())
2529
possibly_present_revisions = target_ids.intersection(source_ids_set)
2530
actually_present_revisions = set(self.target._eliminate_revisions_not_present(possibly_present_revisions))
2531
required_revisions = source_ids_set.difference(actually_present_revisions)
2532
required_topo_revisions = [rev_id for rev_id in source_ids if rev_id in required_revisions]
2533
if revision_id is not None:
2534
# we used get_ancestry to determine source_ids then we are assured all
2535
# revisions referenced are present as they are installed in topological order.
2536
# and the tip revision was validated by get_ancestry.
2537
return required_topo_revisions
2539
# if we just grabbed the possibly available ids, then
2540
# we only have an estimate of whats available and need to validate
2541
# that against the revision records.
2542
return self.source._eliminate_revisions_not_present(required_topo_revisions)
2545
class InterPackRepo(InterSameDataRepository):
2546
"""Optimised code paths between Pack based repositories."""
2549
def _get_repo_format_to_test(self):
2550
from bzrlib.repofmt import pack_repo
2551
return pack_repo.RepositoryFormatKnitPack1()
2554
def is_compatible(source, target):
2555
"""Be compatible with known Pack formats.
2557
We don't test for the stores being of specific types because that
2558
could lead to confusing results, and there is no need to be
2561
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
2563
are_packs = (isinstance(source._format, RepositoryFormatPack) and
2564
isinstance(target._format, RepositoryFormatPack))
2565
except AttributeError:
2567
return are_packs and InterRepository._same_model(source, target)
2570
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2571
"""See InterRepository.fetch()."""
2572
from bzrlib.repofmt.pack_repo import Packer
2573
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2574
self.source, self.source._format, self.target, self.target._format)
2575
self.count_copied = 0
2576
if revision_id is None:
2578
# everything to do - use pack logic
2579
# to fetch from all packs to one without
2580
# inventory parsing etc, IFF nothing to be copied is in the target.
2582
revision_ids = self.source.all_revision_ids()
2583
# implementing the TODO will involve:
2584
# - detecting when all of a pack is selected
2585
# - avoiding as much as possible pre-selection, so the
2586
# more-core routines such as create_pack_from_packs can filter in
2587
# a just-in-time fashion. (though having a HEADS list on a
2588
# repository might make this a lot easier, because we could
2589
# sensibly detect 'new revisions' without doing a full index scan.
2590
elif _mod_revision.is_null(revision_id):
2595
revision_ids = self.missing_revision_ids(revision_id,
2596
find_ghosts=find_ghosts)
2597
except errors.NoSuchRevision:
2598
raise errors.InstallFailed([revision_id])
2599
packs = self.source._pack_collection.all_packs()
2600
pack = Packer(self.target._pack_collection, packs, '.fetch',
2601
revision_ids).pack()
2602
if pack is not None:
2603
self.target._pack_collection._save_pack_names()
2604
# Trigger an autopack. This may duplicate effort as we've just done
2605
# a pack creation, but for now it is simpler to think about as
2606
# 'upload data, then repack if needed'.
2607
self.target._pack_collection.autopack()
2608
return (pack.get_revision_count(), [])
2613
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2614
"""See InterRepository.missing_revision_ids().
2616
:param find_ghosts: Find ghosts throughough the ancestry of
2619
if not find_ghosts and revision_id is not None:
2620
graph = self.source.get_graph()
2621
missing_revs = set()
2622
searcher = graph._make_breadth_first_searcher([revision_id])
2624
self.target._pack_collection.revision_index.combined_index
2625
null_set = frozenset([_mod_revision.NULL_REVISION])
2628
next_revs = set(searcher.next())
2629
except StopIteration:
2631
next_revs.difference_update(null_set)
2632
target_keys = [(key,) for key in next_revs]
2633
have_revs = frozenset(node[1][0] for node in
2634
target_index.iter_entries(target_keys))
2635
missing_revs.update(next_revs - have_revs)
2636
searcher.stop_searching_any(have_revs)
2637
if next_revs - have_revs == set([revision_id]):
2638
# we saw the start rev itself, but no parents from it (or
2639
# next_revs would have been updated to e.g. set(). We remove
2640
# have_revs because if we found revision_id locally we
2641
# stop_searching at the first time around.
2642
raise errors.NoSuchRevision(self.source, revision_id)
2644
elif revision_id is not None:
2645
source_ids = self.source.get_ancestry(revision_id)
2646
assert source_ids[0] is None
2649
source_ids = self.source.all_revision_ids()
2650
# source_ids is the worst possible case we may need to pull.
2651
# now we want to filter source_ids against what we actually
2652
# have in target, but don't try to check for existence where we know
2653
# we do not have a revision as that would be pointless.
2654
target_ids = set(self.target.all_revision_ids())
2655
return [r for r in source_ids if (r not in target_ids)]
2658
class InterModel1and2(InterRepository):
2661
def _get_repo_format_to_test(self):
2665
def is_compatible(source, target):
2666
if not source.supports_rich_root() and target.supports_rich_root():
2672
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2673
"""See InterRepository.fetch()."""
2674
from bzrlib.fetch import Model1toKnit2Fetcher
2675
f = Model1toKnit2Fetcher(to_repository=self.target,
2676
from_repository=self.source,
2677
last_revision=revision_id,
2679
return f.count_copied, f.failed_revisions
2682
def copy_content(self, revision_id=None):
2683
"""Make a complete copy of the content in self into destination.
2685
This is a destructive operation! Do not use it on existing
2688
:param revision_id: Only copy the content needed to construct
2689
revision_id and its parents.
2692
self.target.set_make_working_trees(self.source.make_working_trees())
2693
except NotImplementedError:
2695
# but don't bother fetching if we have the needed data now.
2696
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2697
self.target.has_revision(revision_id)):
2699
self.target.fetch(self.source, revision_id=revision_id)
2702
class InterKnit1and2(InterKnitRepo):
2705
def _get_repo_format_to_test(self):
2709
def is_compatible(source, target):
2710
"""Be compatible with Knit1 source and Knit3 target"""
2711
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit3
2713
from bzrlib.repofmt.knitrepo import (RepositoryFormatKnit1,
2714
RepositoryFormatKnit3)
2715
from bzrlib.repofmt.pack_repo import (RepositoryFormatKnitPack1,
2716
RepositoryFormatKnitPack3)
2717
return (isinstance(source._format,
2718
(RepositoryFormatKnit1, RepositoryFormatKnitPack1)) and
2719
isinstance(target._format,
2720
(RepositoryFormatKnit3, RepositoryFormatKnitPack3))
2722
except AttributeError:
2726
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2727
"""See InterRepository.fetch()."""
2728
from bzrlib.fetch import Knit1to2Fetcher
2729
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2730
self.source, self.source._format, self.target,
2731
self.target._format)
2732
f = Knit1to2Fetcher(to_repository=self.target,
2733
from_repository=self.source,
2734
last_revision=revision_id,
2736
return f.count_copied, f.failed_revisions
2739
class InterDifferingSerializer(InterKnitRepo):
2742
def _get_repo_format_to_test(self):
2746
def is_compatible(source, target):
2747
"""Be compatible with Knit2 source and Knit3 target"""
2748
if source.supports_rich_root() != target.supports_rich_root():
2750
# Ideally, we'd support fetching if the source had no tree references
2751
# even if it supported them...
2752
if (getattr(source, '_format.supports_tree_reference', False) and
2753
not getattr(target, '_format.supports_tree_reference', False)):
2758
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2759
"""See InterRepository.fetch()."""
2760
revision_ids = self.target.missing_revision_ids(self.source,
2762
def revisions_iterator():
2763
for current_revision_id in revision_ids:
2764
revision = self.source.get_revision(current_revision_id)
2765
tree = self.source.revision_tree(current_revision_id)
2767
signature = self.source.get_signature_text(
2768
current_revision_id)
2769
except errors.NoSuchRevision:
2771
yield revision, tree, signature
2772
install_revisions(self.target, revisions_iterator())
2773
return len(revision_ids), 0
2776
class InterRemoteToOther(InterRepository):
2778
def __init__(self, source, target):
2779
InterRepository.__init__(self, source, target)
2780
self._real_inter = None
2783
def is_compatible(source, target):
2784
if not isinstance(source, remote.RemoteRepository):
2786
source._ensure_real()
2787
real_source = source._real_repository
2788
# Is source's model compatible with target's model, and are they the
2789
# same format? Currently we can only optimise fetching from an
2790
# identical model & format repo.
2791
assert not isinstance(real_source, remote.RemoteRepository), (
2792
"We don't support remote repos backed by remote repos yet.")
2793
return real_source._format == target._format
2796
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2797
"""See InterRepository.fetch()."""
2798
from bzrlib.fetch import RemoteToOtherFetcher
2799
mutter("Using fetch logic to copy between %s(remote) and %s(%s)",
2800
self.source, self.target, self.target._format)
2801
# TODO: jam 20070210 This should be an assert, not a translate
2802
revision_id = osutils.safe_revision_id(revision_id)
2803
f = RemoteToOtherFetcher(to_repository=self.target,
2804
from_repository=self.source,
2805
last_revision=revision_id,
2807
return f.count_copied, f.failed_revisions
2810
def _get_repo_format_to_test(self):
2814
class InterOtherToRemote(InterRepository):
2816
def __init__(self, source, target):
2817
InterRepository.__init__(self, source, target)
2818
self._real_inter = None
2821
def is_compatible(source, target):
2822
if isinstance(target, remote.RemoteRepository):
2826
def _ensure_real_inter(self):
2827
if self._real_inter is None:
2828
self.target._ensure_real()
2829
real_target = self.target._real_repository
2830
self._real_inter = InterRepository.get(self.source, real_target)
2832
def copy_content(self, revision_id=None):
2833
self._ensure_real_inter()
2834
self._real_inter.copy_content(revision_id=revision_id)
2836
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2837
self._ensure_real_inter()
2838
self._real_inter.fetch(revision_id=revision_id, pb=pb)
2841
def _get_repo_format_to_test(self):
2845
InterRepository.register_optimiser(InterDifferingSerializer)
2846
InterRepository.register_optimiser(InterSameDataRepository)
2847
InterRepository.register_optimiser(InterWeaveRepo)
2848
InterRepository.register_optimiser(InterKnitRepo)
2849
InterRepository.register_optimiser(InterModel1and2)
2850
InterRepository.register_optimiser(InterKnit1and2)
2851
InterRepository.register_optimiser(InterPackRepo)
2852
InterRepository.register_optimiser(InterRemoteToOther)
2853
InterRepository.register_optimiser(InterOtherToRemote)
2856
class CopyConverter(object):
2857
"""A repository conversion tool which just performs a copy of the content.
2859
This is slow but quite reliable.
2862
def __init__(self, target_format):
2863
"""Create a CopyConverter.
2865
:param target_format: The format the resulting repository should be.
2867
self.target_format = target_format
2869
def convert(self, repo, pb):
2870
"""Perform the conversion of to_convert, giving feedback via pb.
2872
:param to_convert: The disk object to convert.
2873
:param pb: a progress bar to use for progress information.
2878
# this is only useful with metadir layouts - separated repo content.
2879
# trigger an assertion if not such
2880
repo._format.get_format_string()
2881
self.repo_dir = repo.bzrdir
2882
self.step('Moving repository to repository.backup')
2883
self.repo_dir.transport.move('repository', 'repository.backup')
2884
backup_transport = self.repo_dir.transport.clone('repository.backup')
2885
repo._format.check_conversion_target(self.target_format)
2886
self.source_repo = repo._format.open(self.repo_dir,
2888
_override_transport=backup_transport)
2889
self.step('Creating new repository')
2890
converted = self.target_format.initialize(self.repo_dir,
2891
self.source_repo.is_shared())
2892
converted.lock_write()
2894
self.step('Copying content into repository.')
2895
self.source_repo.copy_content_into(converted)
2898
self.step('Deleting old repository content.')
2899
self.repo_dir.transport.delete_tree('repository.backup')
2900
self.pb.note('repository converted')
2902
def step(self, message):
2903
"""Update the pb by a step."""
2905
self.pb.update(message, self.count, self.total)
2917
def _unescaper(match, _map=_unescape_map):
2918
code = match.group(1)
2922
if not code.startswith('#'):
2924
return unichr(int(code[1:])).encode('utf8')
2930
def _unescape_xml(data):
2931
"""Unescape predefined XML entities in a string of data."""
2933
if _unescape_re is None:
2934
_unescape_re = re.compile('\&([^;]*);')
2935
return _unescape_re.sub(_unescaper, data)
2938
class _VersionedFileChecker(object):
2940
def __init__(self, repository):
2941
self.repository = repository
2942
self.text_index = self.repository._generate_text_key_index()
2944
def calculate_file_version_parents(self, revision_id, file_id):
2945
"""Calculate the correct parents for a file version according to
2948
parent_keys = self.text_index[(file_id, revision_id)]
2949
if parent_keys == [_mod_revision.NULL_REVISION]:
2951
# strip the file_id, for the weave api
2952
return tuple([revision_id for file_id, revision_id in parent_keys])
2954
def check_file_version_parents(self, weave, file_id):
2955
"""Check the parents stored in a versioned file are correct.
2957
It also detects file versions that are not referenced by their
2958
corresponding revision's inventory.
2960
:returns: A tuple of (wrong_parents, dangling_file_versions).
2961
wrong_parents is a dict mapping {revision_id: (stored_parents,
2962
correct_parents)} for each revision_id where the stored parents
2963
are not correct. dangling_file_versions is a set of (file_id,
2964
revision_id) tuples for versions that are present in this versioned
2965
file, but not used by the corresponding inventory.
2968
unused_versions = set()
2969
for num, revision_id in enumerate(weave.versions()):
2971
correct_parents = self.calculate_file_version_parents(
2972
revision_id, file_id)
2974
# The version is not part of the used keys.
2975
unused_versions.add(revision_id)
2978
knit_parents = tuple(weave.get_parents(revision_id))
2979
except errors.RevisionNotPresent:
2981
if correct_parents != knit_parents:
2982
wrong_parents[revision_id] = (knit_parents, correct_parents)
2983
return wrong_parents, unused_versions