1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
40
revision as _mod_revision,
46
from bzrlib.bundle import serializer
47
from bzrlib.revisiontree import RevisionTree
48
from bzrlib.store.versioned import VersionedFileStore
49
from bzrlib.store.text import TextStore
50
from bzrlib.testament import Testament
51
from bzrlib.util import bencode
54
from bzrlib.decorators import needs_read_lock, needs_write_lock
55
from bzrlib.inter import InterObject
56
from bzrlib.inventory import Inventory, InventoryDirectory, ROOT_ID
57
from bzrlib.symbol_versioning import (
60
from bzrlib.trace import mutter, mutter_callsite, note, warning
63
# Old formats display a warning, but only once
64
_deprecation_warning_done = False
67
class CommitBuilder(object):
68
"""Provides an interface to build up a commit.
70
This allows describing a tree to be committed without needing to
71
know the internals of the format of the repository.
74
# all clients should supply tree roots.
75
record_root_entry = True
76
# the default CommitBuilder does not manage trees whose root is versioned.
77
_versioned_root = False
79
def __init__(self, repository, parents, config, timestamp=None,
80
timezone=None, committer=None, revprops=None,
82
"""Initiate a CommitBuilder.
84
:param repository: Repository to commit to.
85
:param parents: Revision ids of the parents of the new revision.
86
:param config: Configuration to use.
87
:param timestamp: Optional timestamp recorded for commit.
88
:param timezone: Optional timezone for timestamp.
89
:param committer: Optional committer to set for commit.
90
:param revprops: Optional dictionary of revision properties.
91
:param revision_id: Optional revision id.
96
self._committer = self._config.username()
98
self._committer = committer
100
self.new_inventory = Inventory(None)
101
self._new_revision_id = revision_id
102
self.parents = parents
103
self.repository = repository
106
if revprops is not None:
107
self._revprops.update(revprops)
109
if timestamp is None:
110
timestamp = time.time()
111
# Restrict resolution to 1ms
112
self._timestamp = round(timestamp, 3)
115
self._timezone = osutils.local_time_offset()
117
self._timezone = int(timezone)
119
self._generate_revision_if_needed()
120
self.__heads = graph.HeadsCache(repository.get_graph()).heads
122
def commit(self, message):
123
"""Make the actual commit.
125
:return: The revision id of the recorded revision.
127
rev = _mod_revision.Revision(
128
timestamp=self._timestamp,
129
timezone=self._timezone,
130
committer=self._committer,
132
inventory_sha1=self.inv_sha1,
133
revision_id=self._new_revision_id,
134
properties=self._revprops)
135
rev.parent_ids = self.parents
136
self.repository.add_revision(self._new_revision_id, rev,
137
self.new_inventory, self._config)
138
self.repository.commit_write_group()
139
return self._new_revision_id
142
"""Abort the commit that is being built.
144
self.repository.abort_write_group()
146
def revision_tree(self):
147
"""Return the tree that was just committed.
149
After calling commit() this can be called to get a RevisionTree
150
representing the newly committed tree. This is preferred to
151
calling Repository.revision_tree() because that may require
152
deserializing the inventory, while we already have a copy in
155
return RevisionTree(self.repository, self.new_inventory,
156
self._new_revision_id)
158
def finish_inventory(self):
159
"""Tell the builder that the inventory is finished."""
160
if self.new_inventory.root is None:
161
raise AssertionError('Root entry should be supplied to'
162
' record_entry_contents, as of bzr 0.10.',
163
DeprecationWarning, stacklevel=2)
164
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
165
self.new_inventory.revision_id = self._new_revision_id
166
self.inv_sha1 = self.repository.add_inventory(
167
self._new_revision_id,
172
def _gen_revision_id(self):
173
"""Return new revision-id."""
174
return generate_ids.gen_revision_id(self._config.username(),
177
def _generate_revision_if_needed(self):
178
"""Create a revision id if None was supplied.
180
If the repository can not support user-specified revision ids
181
they should override this function and raise CannotSetRevisionId
182
if _new_revision_id is not None.
184
:raises: CannotSetRevisionId
186
if self._new_revision_id is None:
187
self._new_revision_id = self._gen_revision_id()
188
self.random_revid = True
190
self.random_revid = False
192
def _heads(self, file_id, revision_ids):
193
"""Calculate the graph heads for revision_ids in the graph of file_id.
195
This can use either a per-file graph or a global revision graph as we
196
have an identity relationship between the two graphs.
198
return self.__heads(revision_ids)
200
def _check_root(self, ie, parent_invs, tree):
201
"""Helper for record_entry_contents.
203
:param ie: An entry being added.
204
:param parent_invs: The inventories of the parent revisions of the
206
:param tree: The tree that is being committed.
208
# In this revision format, root entries have no knit or weave When
209
# serializing out to disk and back in root.revision is always
211
ie.revision = self._new_revision_id
213
def _get_delta(self, ie, basis_inv, path):
214
"""Get a delta against the basis inventory for ie."""
215
if ie.file_id not in basis_inv:
217
return (None, path, ie.file_id, ie)
218
elif ie != basis_inv[ie.file_id]:
220
# TODO: avoid tis id2path call.
221
return (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
226
def record_entry_contents(self, ie, parent_invs, path, tree,
228
"""Record the content of ie from tree into the commit if needed.
230
Side effect: sets ie.revision when unchanged
232
:param ie: An inventory entry present in the commit.
233
:param parent_invs: The inventories of the parent revisions of the
235
:param path: The path the entry is at in the tree.
236
:param tree: The tree which contains this entry and should be used to
238
:param content_summary: Summary data from the tree about the paths
239
content - stat, length, exec, sha/link target. This is only
240
accessed when the entry has a revision of None - that is when it is
241
a candidate to commit.
242
:return: A tuple (change_delta, version_recorded). change_delta is
243
an inventory_delta change for this entry against the basis tree of
244
the commit, or None if no change occured against the basis tree.
245
version_recorded is True if a new version of the entry has been
246
recorded. For instance, committing a merge where a file was only
247
changed on the other side will return (delta, False).
249
if self.new_inventory.root is None:
250
if ie.parent_id is not None:
251
raise errors.RootMissing()
252
self._check_root(ie, parent_invs, tree)
253
if ie.revision is None:
254
kind = content_summary[0]
256
# ie is carried over from a prior commit
258
# XXX: repository specific check for nested tree support goes here - if
259
# the repo doesn't want nested trees we skip it ?
260
if (kind == 'tree-reference' and
261
not self.repository._format.supports_tree_reference):
262
# mismatch between commit builder logic and repository:
263
# this needs the entry creation pushed down into the builder.
264
raise NotImplementedError('Missing repository subtree support.')
265
self.new_inventory.add(ie)
267
# TODO: slow, take it out of the inner loop.
269
basis_inv = parent_invs[0]
271
basis_inv = Inventory(root_id=None)
273
# ie.revision is always None if the InventoryEntry is considered
274
# for committing. We may record the previous parents revision if the
275
# content is actually unchanged against a sole head.
276
if ie.revision is not None:
277
if not self._versioned_root and path == '':
278
# repositories that do not version the root set the root's
279
# revision to the new commit even when no change occurs, and
280
# this masks when a change may have occurred against the basis,
281
# so calculate if one happened.
282
if ie.file_id in basis_inv:
283
delta = (basis_inv.id2path(ie.file_id), path,
287
delta = (None, path, ie.file_id, ie)
290
# we don't need to commit this, because the caller already
291
# determined that an existing revision of this file is
293
return None, (ie.revision == self._new_revision_id)
294
# XXX: Friction: parent_candidates should return a list not a dict
295
# so that we don't have to walk the inventories again.
296
parent_candiate_entries = ie.parent_candidates(parent_invs)
297
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
299
for inv in parent_invs:
300
if ie.file_id in inv:
301
old_rev = inv[ie.file_id].revision
302
if old_rev in head_set:
303
heads.append(inv[ie.file_id].revision)
304
head_set.remove(inv[ie.file_id].revision)
307
# now we check to see if we need to write a new record to the
309
# We write a new entry unless there is one head to the ancestors, and
310
# the kind-derived content is unchanged.
312
# Cheapest check first: no ancestors, or more the one head in the
313
# ancestors, we write a new node.
317
# There is a single head, look it up for comparison
318
parent_entry = parent_candiate_entries[heads[0]]
319
# if the non-content specific data has changed, we'll be writing a
321
if (parent_entry.parent_id != ie.parent_id or
322
parent_entry.name != ie.name):
324
# now we need to do content specific checks:
326
# if the kind changed the content obviously has
327
if kind != parent_entry.kind:
330
if content_summary[2] is None:
331
raise ValueError("Files must not have executable = None")
333
if (# if the file length changed we have to store:
334
parent_entry.text_size != content_summary[1] or
335
# if the exec bit has changed we have to store:
336
parent_entry.executable != content_summary[2]):
338
elif parent_entry.text_sha1 == content_summary[3]:
339
# all meta and content is unchanged (using a hash cache
340
# hit to check the sha)
341
ie.revision = parent_entry.revision
342
ie.text_size = parent_entry.text_size
343
ie.text_sha1 = parent_entry.text_sha1
344
ie.executable = parent_entry.executable
345
return self._get_delta(ie, basis_inv, path), False
347
# Either there is only a hash change(no hash cache entry,
348
# or same size content change), or there is no change on
350
# Provide the parent's hash to the store layer, so that the
351
# content is unchanged we will not store a new node.
352
nostore_sha = parent_entry.text_sha1
354
# We want to record a new node regardless of the presence or
355
# absence of a content change in the file.
357
ie.executable = content_summary[2]
358
lines = tree.get_file(ie.file_id, path).readlines()
360
ie.text_sha1, ie.text_size = self._add_text_to_weave(
361
ie.file_id, lines, heads, nostore_sha)
362
except errors.ExistingContent:
363
# Turns out that the file content was unchanged, and we were
364
# only going to store a new node if it was changed. Carry over
366
ie.revision = parent_entry.revision
367
ie.text_size = parent_entry.text_size
368
ie.text_sha1 = parent_entry.text_sha1
369
ie.executable = parent_entry.executable
370
return self._get_delta(ie, basis_inv, path), False
371
elif kind == 'directory':
373
# all data is meta here, nothing specific to directory, so
375
ie.revision = parent_entry.revision
376
return self._get_delta(ie, basis_inv, path), False
378
self._add_text_to_weave(ie.file_id, lines, heads, None)
379
elif kind == 'symlink':
380
current_link_target = content_summary[3]
382
# symlink target is not generic metadata, check if it has
384
if current_link_target != parent_entry.symlink_target:
387
# unchanged, carry over.
388
ie.revision = parent_entry.revision
389
ie.symlink_target = parent_entry.symlink_target
390
return self._get_delta(ie, basis_inv, path), False
391
ie.symlink_target = current_link_target
393
self._add_text_to_weave(ie.file_id, lines, heads, None)
394
elif kind == 'tree-reference':
396
if content_summary[3] != parent_entry.reference_revision:
399
# unchanged, carry over.
400
ie.reference_revision = parent_entry.reference_revision
401
ie.revision = parent_entry.revision
402
return self._get_delta(ie, basis_inv, path), False
403
ie.reference_revision = content_summary[3]
405
self._add_text_to_weave(ie.file_id, lines, heads, None)
407
raise NotImplementedError('unknown kind')
408
ie.revision = self._new_revision_id
409
return self._get_delta(ie, basis_inv, path), True
411
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
412
versionedfile = self.repository.weave_store.get_weave_or_empty(
413
file_id, self.repository.get_transaction())
414
# Don't change this to add_lines - add_lines_with_ghosts is cheaper
415
# than add_lines, and allows committing when a parent is ghosted for
417
# Note: as we read the content directly from the tree, we know its not
418
# been turned into unicode or badly split - but a broken tree
419
# implementation could give us bad output from readlines() so this is
420
# not a guarantee of safety. What would be better is always checking
421
# the content during test suite execution. RBC 20070912
422
return versionedfile.add_lines_with_ghosts(
423
self._new_revision_id, parents, new_lines,
424
nostore_sha=nostore_sha, random_id=self.random_revid,
425
check_content=False)[0:2]
428
class RootCommitBuilder(CommitBuilder):
429
"""This commitbuilder actually records the root id"""
431
# the root entry gets versioned properly by this builder.
432
_versioned_root = True
434
def _check_root(self, ie, parent_invs, tree):
435
"""Helper for record_entry_contents.
437
:param ie: An entry being added.
438
:param parent_invs: The inventories of the parent revisions of the
440
:param tree: The tree that is being committed.
444
######################################################################
447
class Repository(object):
448
"""Repository holding history for one or more branches.
450
The repository holds and retrieves historical information including
451
revisions and file history. It's normally accessed only by the Branch,
452
which views a particular line of development through that history.
454
The Repository builds on top of Stores and a Transport, which respectively
455
describe the disk data format and the way of accessing the (possibly
458
:ivar _transport: Transport for file access to repository, typically
459
pointing to .bzr/repository.
462
# What class to use for a CommitBuilder. Often its simpler to change this
463
# in a Repository class subclass rather than to override
464
# get_commit_builder.
465
_commit_builder_class = CommitBuilder
466
# The search regex used by xml based repositories to determine what things
467
# where changed in a single commit.
468
_file_ids_altered_regex = lazy_regex.lazy_compile(
469
r'file_id="(?P<file_id>[^"]+)"'
470
r'.* revision="(?P<revision_id>[^"]+)"'
473
def abort_write_group(self):
474
"""Commit the contents accrued within the current write group.
476
:seealso: start_write_group.
478
if self._write_group is not self.get_transaction():
479
# has an unlock or relock occured ?
480
raise errors.BzrError('mismatched lock context and write group.')
481
self._abort_write_group()
482
self._write_group = None
484
def _abort_write_group(self):
485
"""Template method for per-repository write group cleanup.
487
This is called during abort before the write group is considered to be
488
finished and should cleanup any internal state accrued during the write
489
group. There is no requirement that data handed to the repository be
490
*not* made available - this is not a rollback - but neither should any
491
attempt be made to ensure that data added is fully commited. Abort is
492
invoked when an error has occured so futher disk or network operations
493
may not be possible or may error and if possible should not be
497
def add_inventory(self, revision_id, inv, parents):
498
"""Add the inventory inv to the repository as revision_id.
500
:param parents: The revision ids of the parents that revision_id
501
is known to have and are in the repository already.
503
:returns: The validator(which is a sha1 digest, though what is sha'd is
504
repository format specific) of the serialized inventory.
506
if not self.is_in_write_group():
507
raise AssertionError("%r not in write group" % (self,))
508
_mod_revision.check_not_reserved_id(revision_id)
509
if not (inv.revision_id is None or inv.revision_id == revision_id):
510
raise AssertionError(
511
"Mismatch between inventory revision"
512
" id and insertion revid (%r, %r)"
513
% (inv.revision_id, revision_id))
515
raise AssertionError()
516
inv_lines = self._serialise_inventory_to_lines(inv)
517
inv_vf = self.get_inventory_weave()
518
return self._inventory_add_lines(inv_vf, revision_id, parents,
519
inv_lines, check_content=False)
521
def _inventory_add_lines(self, inv_vf, revision_id, parents, lines,
523
"""Store lines in inv_vf and return the sha1 of the inventory."""
525
for parent in parents:
527
final_parents.append(parent)
528
return inv_vf.add_lines(revision_id, final_parents, lines,
529
check_content=check_content)[0]
531
def add_revision(self, revision_id, rev, inv=None, config=None):
532
"""Add rev to the revision store as revision_id.
534
:param revision_id: the revision id to use.
535
:param rev: The revision object.
536
:param inv: The inventory for the revision. if None, it will be looked
537
up in the inventory storer
538
:param config: If None no digital signature will be created.
539
If supplied its signature_needed method will be used
540
to determine if a signature should be made.
542
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
544
_mod_revision.check_not_reserved_id(revision_id)
545
if config is not None and config.signature_needed():
547
inv = self.get_inventory(revision_id)
548
plaintext = Testament(rev, inv).as_short_text()
549
self.store_revision_signature(
550
gpg.GPGStrategy(config), plaintext, revision_id)
551
inventory_vf = self.get_inventory_weave()
552
if not revision_id in inventory_vf:
554
raise errors.WeaveRevisionNotPresent(revision_id,
557
# yes, this is not suitable for adding with ghosts.
558
rev.inventory_sha1 = self.add_inventory(revision_id, inv,
561
rev.inventory_sha1 = inventory_vf.get_sha1s([revision_id])[0]
562
self._revision_store.add_revision(rev, self.get_transaction())
564
def _add_revision_text(self, revision_id, text):
565
revision = self._revision_store._serializer.read_revision_from_string(
567
self._revision_store._add_revision(revision, StringIO(text),
568
self.get_transaction())
570
def all_revision_ids(self):
571
"""Returns a list of all the revision ids in the repository.
573
This is deprecated because code should generally work on the graph
574
reachable from a particular revision, and ignore any other revisions
575
that might be present. There is no direct replacement method.
577
if 'evil' in debug.debug_flags:
578
mutter_callsite(2, "all_revision_ids is linear with history.")
579
return self._all_revision_ids()
581
def _all_revision_ids(self):
582
"""Returns a list of all the revision ids in the repository.
584
These are in as much topological order as the underlying store can
587
raise NotImplementedError(self._all_revision_ids)
589
def break_lock(self):
590
"""Break a lock if one is present from another instance.
592
Uses the ui factory to ask for confirmation if the lock may be from
595
self.control_files.break_lock()
598
def _eliminate_revisions_not_present(self, revision_ids):
599
"""Check every revision id in revision_ids to see if we have it.
601
Returns a set of the present revisions.
604
graph = self.get_graph()
605
parent_map = graph.get_parent_map(revision_ids)
606
# The old API returned a list, should this actually be a set?
607
return parent_map.keys()
610
def create(a_bzrdir):
611
"""Construct the current default format repository in a_bzrdir."""
612
return RepositoryFormat.get_default_format().initialize(a_bzrdir)
614
def __init__(self, _format, a_bzrdir, control_files,
615
_revision_store, control_store, text_store):
616
"""instantiate a Repository.
618
:param _format: The format of the repository on disk.
619
:param a_bzrdir: The BzrDir of the repository.
621
In the future we will have a single api for all stores for
622
getting file texts, inventories and revisions, then
623
this construct will accept instances of those things.
625
super(Repository, self).__init__()
626
self._format = _format
627
# the following are part of the public API for Repository:
628
self.bzrdir = a_bzrdir
629
self.control_files = control_files
630
self._transport = control_files._transport
631
self.base = self._transport.base
632
self._revision_store = _revision_store
633
# backwards compatibility
634
self.weave_store = text_store
636
self._reconcile_does_inventory_gc = True
637
self._reconcile_fixes_text_parents = False
638
self._reconcile_backsup_inventory = True
639
# not right yet - should be more semantically clear ?
641
self.control_store = control_store
642
self.control_weaves = control_store
643
# TODO: make sure to construct the right store classes, etc, depending
644
# on whether escaping is required.
645
self._warn_if_deprecated()
646
self._write_group = None
649
return '%s(%r)' % (self.__class__.__name__,
652
def has_same_location(self, other):
653
"""Returns a boolean indicating if this repository is at the same
654
location as another repository.
656
This might return False even when two repository objects are accessing
657
the same physical repository via different URLs.
659
if self.__class__ is not other.__class__:
661
return (self._transport.base == other._transport.base)
663
def is_in_write_group(self):
664
"""Return True if there is an open write group.
666
:seealso: start_write_group.
668
return self._write_group is not None
671
return self.control_files.is_locked()
673
def is_write_locked(self):
674
"""Return True if this object is write locked."""
675
return self.is_locked() and self.control_files._lock_mode == 'w'
677
def lock_write(self, token=None):
678
"""Lock this repository for writing.
680
This causes caching within the repository obejct to start accumlating
681
data during reads, and allows a 'write_group' to be obtained. Write
682
groups must be used for actual data insertion.
684
:param token: if this is already locked, then lock_write will fail
685
unless the token matches the existing lock.
686
:returns: a token if this instance supports tokens, otherwise None.
687
:raises TokenLockingNotSupported: when a token is given but this
688
instance doesn't support using token locks.
689
:raises MismatchedToken: if the specified token doesn't match the token
690
of the existing lock.
691
:seealso: start_write_group.
693
A token should be passed in if you know that you have locked the object
694
some other way, and need to synchronise this object's state with that
697
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
699
result = self.control_files.lock_write(token=token)
704
self.control_files.lock_read()
707
def get_physical_lock_status(self):
708
return self.control_files.get_physical_lock_status()
710
def leave_lock_in_place(self):
711
"""Tell this repository not to release the physical lock when this
714
If lock_write doesn't return a token, then this method is not supported.
716
self.control_files.leave_in_place()
718
def dont_leave_lock_in_place(self):
719
"""Tell this repository to release the physical lock when this
720
object is unlocked, even if it didn't originally acquire it.
722
If lock_write doesn't return a token, then this method is not supported.
724
self.control_files.dont_leave_in_place()
727
def gather_stats(self, revid=None, committers=None):
728
"""Gather statistics from a revision id.
730
:param revid: The revision id to gather statistics from, if None, then
731
no revision specific statistics are gathered.
732
:param committers: Optional parameter controlling whether to grab
733
a count of committers from the revision specific statistics.
734
:return: A dictionary of statistics. Currently this contains:
735
committers: The number of committers if requested.
736
firstrev: A tuple with timestamp, timezone for the penultimate left
737
most ancestor of revid, if revid is not the NULL_REVISION.
738
latestrev: A tuple with timestamp, timezone for revid, if revid is
739
not the NULL_REVISION.
740
revisions: The total revision count in the repository.
741
size: An estimate disk size of the repository in bytes.
744
if revid and committers:
745
result['committers'] = 0
746
if revid and revid != _mod_revision.NULL_REVISION:
748
all_committers = set()
749
revisions = self.get_ancestry(revid)
750
# pop the leading None
752
first_revision = None
754
# ignore the revisions in the middle - just grab first and last
755
revisions = revisions[0], revisions[-1]
756
for revision in self.get_revisions(revisions):
757
if not first_revision:
758
first_revision = revision
760
all_committers.add(revision.committer)
761
last_revision = revision
763
result['committers'] = len(all_committers)
764
result['firstrev'] = (first_revision.timestamp,
765
first_revision.timezone)
766
result['latestrev'] = (last_revision.timestamp,
767
last_revision.timezone)
769
# now gather global repository information
770
if self.bzrdir.root_transport.listable():
771
c, t = self._revision_store.total_size(self.get_transaction())
772
result['revisions'] = c
776
def find_branches(self, using=False):
777
"""Find branches underneath this repository.
779
This will include branches inside other branches.
781
:param using: If True, list only branches using this repository.
783
if using and not self.is_shared():
785
return [self.bzrdir.open_branch()]
786
except errors.NotBranchError:
788
class Evaluator(object):
791
self.first_call = True
793
def __call__(self, bzrdir):
794
# On the first call, the parameter is always the bzrdir
795
# containing the current repo.
796
if not self.first_call:
798
repository = bzrdir.open_repository()
799
except errors.NoRepositoryPresent:
802
return False, (None, repository)
803
self.first_call = False
805
value = (bzrdir.open_branch(), None)
806
except errors.NotBranchError:
811
for branch, repository in bzrdir.BzrDir.find_bzrdirs(
812
self.bzrdir.root_transport, evaluate=Evaluator()):
813
if branch is not None:
814
branches.append(branch)
815
if not using and repository is not None:
816
branches.extend(repository.find_branches())
819
def get_data_stream(self, revision_ids):
820
raise NotImplementedError(self.get_data_stream)
822
def get_data_stream_for_search(self, search_result):
823
"""Get a data stream that can be inserted to a repository.
825
:param search_result: A bzrlib.graph.SearchResult selecting the
827
:return: A data stream that can be inserted into a repository using
830
raise NotImplementedError(self.get_data_stream_for_search)
832
def insert_data_stream(self, stream):
833
"""XXX What does this really do?
835
Is it a substitute for fetch?
836
Should it manage its own write group ?
838
for item_key, bytes in stream:
839
if item_key[0] == 'file':
840
(file_id,) = item_key[1:]
841
knit = self.weave_store.get_weave_or_empty(
842
file_id, self.get_transaction())
843
elif item_key == ('inventory',):
844
knit = self.get_inventory_weave()
845
elif item_key == ('revisions',):
846
knit = self._revision_store.get_revision_file(
847
self.get_transaction())
848
elif item_key == ('signatures',):
849
knit = self._revision_store.get_signature_file(
850
self.get_transaction())
852
raise errors.RepositoryDataStreamError(
853
"Unrecognised data stream key '%s'" % (item_key,))
854
decoded_list = bencode.bdecode(bytes)
855
format = decoded_list.pop(0)
858
for version, options, parents, some_bytes in decoded_list:
859
data_list.append((version, options, len(some_bytes), parents))
860
knit_bytes += some_bytes
861
buffer = StringIO(knit_bytes)
862
def reader_func(count):
866
return buffer.read(count)
867
knit.insert_data_stream(
868
(format, data_list, reader_func))
871
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
872
"""Return the revision ids that other has that this does not.
874
These are returned in topological order.
876
revision_id: only return revision ids included by revision_id.
878
return InterRepository.get(other, self).search_missing_revision_ids(
879
revision_id, find_ghosts)
881
@deprecated_method(symbol_versioning.one_two)
883
def missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
884
"""Return the revision ids that other has that this does not.
886
These are returned in topological order.
888
revision_id: only return revision ids included by revision_id.
890
keys = self.search_missing_revision_ids(
891
other, revision_id, find_ghosts).get_keys()
894
parents = other.get_graph().get_parent_map(keys)
897
return tsort.topo_sort(parents)
901
"""Open the repository rooted at base.
903
For instance, if the repository is at URL/.bzr/repository,
904
Repository.open(URL) -> a Repository instance.
906
control = bzrdir.BzrDir.open(base)
907
return control.open_repository()
909
def copy_content_into(self, destination, revision_id=None):
910
"""Make a complete copy of the content in self into destination.
912
This is a destructive operation! Do not use it on existing
915
return InterRepository.get(self, destination).copy_content(revision_id)
917
def commit_write_group(self):
918
"""Commit the contents accrued within the current write group.
920
:seealso: start_write_group.
922
if self._write_group is not self.get_transaction():
923
# has an unlock or relock occured ?
924
raise errors.BzrError('mismatched lock context %r and '
926
(self.get_transaction(), self._write_group))
927
self._commit_write_group()
928
self._write_group = None
930
def _commit_write_group(self):
931
"""Template method for per-repository write group cleanup.
933
This is called before the write group is considered to be
934
finished and should ensure that all data handed to the repository
935
for writing during the write group is safely committed (to the
936
extent possible considering file system caching etc).
939
def fetch(self, source, revision_id=None, pb=None, find_ghosts=False):
940
"""Fetch the content required to construct revision_id from source.
942
If revision_id is None all content is copied.
943
:param find_ghosts: Find and copy revisions in the source that are
944
ghosts in the target (and not reachable directly by walking out to
945
the first-present revision in target from revision_id).
947
# fast path same-url fetch operations
948
if self.has_same_location(source):
949
# check that last_revision is in 'from' and then return a
951
if (revision_id is not None and
952
not _mod_revision.is_null(revision_id)):
953
self.get_revision(revision_id)
955
inter = InterRepository.get(source, self)
957
return inter.fetch(revision_id=revision_id, pb=pb, find_ghosts=find_ghosts)
958
except NotImplementedError:
959
raise errors.IncompatibleRepositories(source, self)
961
def create_bundle(self, target, base, fileobj, format=None):
962
return serializer.write_bundle(self, target, base, fileobj, format)
964
def get_commit_builder(self, branch, parents, config, timestamp=None,
965
timezone=None, committer=None, revprops=None,
967
"""Obtain a CommitBuilder for this repository.
969
:param branch: Branch to commit to.
970
:param parents: Revision ids of the parents of the new revision.
971
:param config: Configuration to use.
972
:param timestamp: Optional timestamp recorded for commit.
973
:param timezone: Optional timezone for timestamp.
974
:param committer: Optional committer to set for commit.
975
:param revprops: Optional dictionary of revision properties.
976
:param revision_id: Optional revision id.
978
result = self._commit_builder_class(self, parents, config,
979
timestamp, timezone, committer, revprops, revision_id)
980
self.start_write_group()
984
if (self.control_files._lock_count == 1 and
985
self.control_files._lock_mode == 'w'):
986
if self._write_group is not None:
987
self.abort_write_group()
988
self.control_files.unlock()
989
raise errors.BzrError(
990
'Must end write groups before releasing write locks.')
991
self.control_files.unlock()
994
def clone(self, a_bzrdir, revision_id=None):
995
"""Clone this repository into a_bzrdir using the current format.
997
Currently no check is made that the format of this repository and
998
the bzrdir format are compatible. FIXME RBC 20060201.
1000
:return: The newly created destination repository.
1002
# TODO: deprecate after 0.16; cloning this with all its settings is
1003
# probably not very useful -- mbp 20070423
1004
dest_repo = self._create_sprouting_repo(a_bzrdir, shared=self.is_shared())
1005
self.copy_content_into(dest_repo, revision_id)
1008
def start_write_group(self):
1009
"""Start a write group in the repository.
1011
Write groups are used by repositories which do not have a 1:1 mapping
1012
between file ids and backend store to manage the insertion of data from
1013
both fetch and commit operations.
1015
A write lock is required around the start_write_group/commit_write_group
1016
for the support of lock-requiring repository formats.
1018
One can only insert data into a repository inside a write group.
1022
if not self.is_write_locked():
1023
raise errors.NotWriteLocked(self)
1024
if self._write_group:
1025
raise errors.BzrError('already in a write group')
1026
self._start_write_group()
1027
# so we can detect unlock/relock - the write group is now entered.
1028
self._write_group = self.get_transaction()
1030
def _start_write_group(self):
1031
"""Template method for per-repository write group startup.
1033
This is called before the write group is considered to be
1038
def sprout(self, to_bzrdir, revision_id=None):
1039
"""Create a descendent repository for new development.
1041
Unlike clone, this does not copy the settings of the repository.
1043
dest_repo = self._create_sprouting_repo(to_bzrdir, shared=False)
1044
dest_repo.fetch(self, revision_id=revision_id)
1047
def _create_sprouting_repo(self, a_bzrdir, shared):
1048
if not isinstance(a_bzrdir._format, self.bzrdir._format.__class__):
1049
# use target default format.
1050
dest_repo = a_bzrdir.create_repository()
1052
# Most control formats need the repository to be specifically
1053
# created, but on some old all-in-one formats it's not needed
1055
dest_repo = self._format.initialize(a_bzrdir, shared=shared)
1056
except errors.UninitializableFormat:
1057
dest_repo = a_bzrdir.open_repository()
1061
def has_revision(self, revision_id):
1062
"""True if this repository has a copy of the revision."""
1063
return revision_id in self.has_revisions((revision_id,))
1065
def has_revisions(self, revision_ids):
1066
"""Probe to find out the presence of multiple revisions.
1068
:param revision_ids: An iterable of revision_ids.
1069
:return: A set of the revision_ids that were present.
1071
raise NotImplementedError(self.has_revisions)
1073
return self._revision_store.has_revision_id(revision_id,
1074
self.get_transaction())
1077
def get_revision(self, revision_id):
1078
"""Return the Revision object for a named revision."""
1079
return self.get_revisions([revision_id])[0]
1082
def get_revision_reconcile(self, revision_id):
1083
"""'reconcile' helper routine that allows access to a revision always.
1085
This variant of get_revision does not cross check the weave graph
1086
against the revision one as get_revision does: but it should only
1087
be used by reconcile, or reconcile-alike commands that are correcting
1088
or testing the revision graph.
1090
return self._get_revisions([revision_id])[0]
1093
def get_revisions(self, revision_ids):
1094
"""Get many revisions at once."""
1095
return self._get_revisions(revision_ids)
1098
def _get_revisions(self, revision_ids):
1099
"""Core work logic to get many revisions without sanity checks."""
1100
for rev_id in revision_ids:
1101
if not rev_id or not isinstance(rev_id, basestring):
1102
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1103
revs = self._revision_store.get_revisions(revision_ids,
1104
self.get_transaction())
1108
def get_revision_xml(self, revision_id):
1109
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1110
# would have already do it.
1111
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1112
rev = self.get_revision(revision_id)
1113
rev_tmp = StringIO()
1114
# the current serializer..
1115
self._revision_store._serializer.write_revision(rev, rev_tmp)
1117
return rev_tmp.getvalue()
1119
def get_deltas_for_revisions(self, revisions):
1120
"""Produce a generator of revision deltas.
1122
Note that the input is a sequence of REVISIONS, not revision_ids.
1123
Trees will be held in memory until the generator exits.
1124
Each delta is relative to the revision's lefthand predecessor.
1126
required_trees = set()
1127
for revision in revisions:
1128
required_trees.add(revision.revision_id)
1129
required_trees.update(revision.parent_ids[:1])
1130
trees = dict((t.get_revision_id(), t) for
1131
t in self.revision_trees(required_trees))
1132
for revision in revisions:
1133
if not revision.parent_ids:
1134
old_tree = self.revision_tree(None)
1136
old_tree = trees[revision.parent_ids[0]]
1137
yield trees[revision.revision_id].changes_from(old_tree)
1140
def get_revision_delta(self, revision_id):
1141
"""Return the delta for one revision.
1143
The delta is relative to the left-hand predecessor of the
1146
r = self.get_revision(revision_id)
1147
return list(self.get_deltas_for_revisions([r]))[0]
1150
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
1151
signature = gpg_strategy.sign(plaintext)
1152
self.add_signature_text(revision_id, signature)
1155
def add_signature_text(self, revision_id, signature):
1156
self._revision_store.add_revision_signature_text(revision_id,
1158
self.get_transaction())
1160
def find_text_key_references(self):
1161
"""Find the text key references within the repository.
1163
:return: a dictionary mapping (file_id, revision_id) tuples to altered file-ids to an iterable of
1164
revision_ids. Each altered file-ids has the exact revision_ids that
1165
altered it listed explicitly.
1166
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1167
to whether they were referred to by the inventory of the
1168
revision_id that they contain. The inventory texts from all present
1169
revision ids are assessed to generate this report.
1171
revision_ids = self.all_revision_ids()
1172
w = self.get_inventory_weave()
1173
pb = ui.ui_factory.nested_progress_bar()
1175
return self._find_text_key_references_from_xml_inventory_lines(
1176
w.iter_lines_added_or_present_in_versions(revision_ids, pb=pb))
1180
def _find_text_key_references_from_xml_inventory_lines(self,
1182
"""Core routine for extracting references to texts from inventories.
1184
This performs the translation of xml lines to revision ids.
1186
:param line_iterator: An iterator of lines, origin_version_id
1187
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1188
to whether they were referred to by the inventory of the
1189
revision_id that they contain. Note that if that revision_id was
1190
not part of the line_iterator's output then False will be given -
1191
even though it may actually refer to that key.
1193
if not self._serializer.support_altered_by_hack:
1194
raise AssertionError(
1195
"_find_text_key_references_from_xml_inventory_lines only "
1196
"supported for branches which store inventory as unnested xml"
1197
", not on %r" % self)
1200
# this code needs to read every new line in every inventory for the
1201
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
1202
# not present in one of those inventories is unnecessary but not
1203
# harmful because we are filtering by the revision id marker in the
1204
# inventory lines : we only select file ids altered in one of those
1205
# revisions. We don't need to see all lines in the inventory because
1206
# only those added in an inventory in rev X can contain a revision=X
1208
unescape_revid_cache = {}
1209
unescape_fileid_cache = {}
1211
# jam 20061218 In a big fetch, this handles hundreds of thousands
1212
# of lines, so it has had a lot of inlining and optimizing done.
1213
# Sorry that it is a little bit messy.
1214
# Move several functions to be local variables, since this is a long
1216
search = self._file_ids_altered_regex.search
1217
unescape = _unescape_xml
1218
setdefault = result.setdefault
1219
for line, version_id in line_iterator:
1220
match = search(line)
1223
# One call to match.group() returning multiple items is quite a
1224
# bit faster than 2 calls to match.group() each returning 1
1225
file_id, revision_id = match.group('file_id', 'revision_id')
1227
# Inlining the cache lookups helps a lot when you make 170,000
1228
# lines and 350k ids, versus 8.4 unique ids.
1229
# Using a cache helps in 2 ways:
1230
# 1) Avoids unnecessary decoding calls
1231
# 2) Re-uses cached strings, which helps in future set and
1233
# (2) is enough that removing encoding entirely along with
1234
# the cache (so we are using plain strings) results in no
1235
# performance improvement.
1237
revision_id = unescape_revid_cache[revision_id]
1239
unescaped = unescape(revision_id)
1240
unescape_revid_cache[revision_id] = unescaped
1241
revision_id = unescaped
1243
# Note that unconditionally unescaping means that we deserialise
1244
# every fileid, which for general 'pull' is not great, but we don't
1245
# really want to have some many fulltexts that this matters anyway.
1248
file_id = unescape_fileid_cache[file_id]
1250
unescaped = unescape(file_id)
1251
unescape_fileid_cache[file_id] = unescaped
1254
key = (file_id, revision_id)
1255
setdefault(key, False)
1256
if revision_id == version_id:
1260
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1262
"""Helper routine for fileids_altered_by_revision_ids.
1264
This performs the translation of xml lines to revision ids.
1266
:param line_iterator: An iterator of lines, origin_version_id
1267
:param revision_ids: The revision ids to filter for. This should be a
1268
set or other type which supports efficient __contains__ lookups, as
1269
the revision id from each parsed line will be looked up in the
1270
revision_ids filter.
1271
:return: a dictionary mapping altered file-ids to an iterable of
1272
revision_ids. Each altered file-ids has the exact revision_ids that
1273
altered it listed explicitly.
1276
setdefault = result.setdefault
1277
for file_id, revision_id in \
1278
self._find_text_key_references_from_xml_inventory_lines(
1279
line_iterator).iterkeys():
1280
# once data is all ensured-consistent; then this is
1281
# if revision_id == version_id
1282
if revision_id in revision_ids:
1283
setdefault(file_id, set()).add(revision_id)
1286
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1287
"""Find the file ids and versions affected by revisions.
1289
:param revisions: an iterable containing revision ids.
1290
:param _inv_weave: The inventory weave from this repository or None.
1291
If None, the inventory weave will be opened automatically.
1292
:return: a dictionary mapping altered file-ids to an iterable of
1293
revision_ids. Each altered file-ids has the exact revision_ids that
1294
altered it listed explicitly.
1296
selected_revision_ids = set(revision_ids)
1297
w = _inv_weave or self.get_inventory_weave()
1298
pb = ui.ui_factory.nested_progress_bar()
1300
return self._find_file_ids_from_xml_inventory_lines(
1301
w.iter_lines_added_or_present_in_versions(
1302
selected_revision_ids, pb=pb),
1303
selected_revision_ids)
1307
def iter_files_bytes(self, desired_files):
1308
"""Iterate through file versions.
1310
Files will not necessarily be returned in the order they occur in
1311
desired_files. No specific order is guaranteed.
1313
Yields pairs of identifier, bytes_iterator. identifier is an opaque
1314
value supplied by the caller as part of desired_files. It should
1315
uniquely identify the file version in the caller's context. (Examples:
1316
an index number or a TreeTransform trans_id.)
1318
bytes_iterator is an iterable of bytestrings for the file. The
1319
kind of iterable and length of the bytestrings are unspecified, but for
1320
this implementation, it is a list of lines produced by
1321
VersionedFile.get_lines().
1323
:param desired_files: a list of (file_id, revision_id, identifier)
1326
transaction = self.get_transaction()
1327
for file_id, revision_id, callable_data in desired_files:
1329
weave = self.weave_store.get_weave(file_id, transaction)
1330
except errors.NoSuchFile:
1331
raise errors.NoSuchIdInRepository(self, file_id)
1332
yield callable_data, weave.get_lines(revision_id)
1334
def _generate_text_key_index(self, text_key_references=None,
1336
"""Generate a new text key index for the repository.
1338
This is an expensive function that will take considerable time to run.
1340
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1341
list of parents, also text keys. When a given key has no parents,
1342
the parents list will be [NULL_REVISION].
1344
# All revisions, to find inventory parents.
1345
if ancestors is None:
1346
graph = self.get_graph()
1347
ancestors = graph.get_parent_map(self.all_revision_ids())
1348
if text_key_references is None:
1349
text_key_references = self.find_text_key_references()
1350
pb = ui.ui_factory.nested_progress_bar()
1352
return self._do_generate_text_key_index(ancestors,
1353
text_key_references, pb)
1357
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1358
"""Helper for _generate_text_key_index to avoid deep nesting."""
1359
revision_order = tsort.topo_sort(ancestors)
1360
invalid_keys = set()
1362
for revision_id in revision_order:
1363
revision_keys[revision_id] = set()
1364
text_count = len(text_key_references)
1365
# a cache of the text keys to allow reuse; costs a dict of all the
1366
# keys, but saves a 2-tuple for every child of a given key.
1368
for text_key, valid in text_key_references.iteritems():
1370
invalid_keys.add(text_key)
1372
revision_keys[text_key[1]].add(text_key)
1373
text_key_cache[text_key] = text_key
1374
del text_key_references
1376
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1377
NULL_REVISION = _mod_revision.NULL_REVISION
1378
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
1379
# too small for large or very branchy trees. However, for 55K path
1380
# trees, it would be easy to use too much memory trivially. Ideally we
1381
# could gauge this by looking at available real memory etc, but this is
1382
# always a tricky proposition.
1383
inventory_cache = lru_cache.LRUCache(10)
1384
batch_size = 10 # should be ~150MB on a 55K path tree
1385
batch_count = len(revision_order) / batch_size + 1
1387
pb.update("Calculating text parents.", processed_texts, text_count)
1388
for offset in xrange(batch_count):
1389
to_query = revision_order[offset * batch_size:(offset + 1) *
1393
for rev_tree in self.revision_trees(to_query):
1394
revision_id = rev_tree.get_revision_id()
1395
parent_ids = ancestors[revision_id]
1396
for text_key in revision_keys[revision_id]:
1397
pb.update("Calculating text parents.", processed_texts)
1398
processed_texts += 1
1399
candidate_parents = []
1400
for parent_id in parent_ids:
1401
parent_text_key = (text_key[0], parent_id)
1403
check_parent = parent_text_key not in \
1404
revision_keys[parent_id]
1406
# the parent parent_id is a ghost:
1407
check_parent = False
1408
# truncate the derived graph against this ghost.
1409
parent_text_key = None
1411
# look at the parent commit details inventories to
1412
# determine possible candidates in the per file graph.
1415
inv = inventory_cache[parent_id]
1417
inv = self.revision_tree(parent_id).inventory
1418
inventory_cache[parent_id] = inv
1419
parent_entry = inv._byid.get(text_key[0], None)
1420
if parent_entry is not None:
1422
text_key[0], parent_entry.revision)
1424
parent_text_key = None
1425
if parent_text_key is not None:
1426
candidate_parents.append(
1427
text_key_cache[parent_text_key])
1428
parent_heads = text_graph.heads(candidate_parents)
1429
new_parents = list(parent_heads)
1430
new_parents.sort(key=lambda x:candidate_parents.index(x))
1431
if new_parents == []:
1432
new_parents = [NULL_REVISION]
1433
text_index[text_key] = new_parents
1435
for text_key in invalid_keys:
1436
text_index[text_key] = [NULL_REVISION]
1439
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
1440
"""Get an iterable listing the keys of all the data introduced by a set
1443
The keys will be ordered so that the corresponding items can be safely
1444
fetched and inserted in that order.
1446
:returns: An iterable producing tuples of (knit-kind, file-id,
1447
versions). knit-kind is one of 'file', 'inventory', 'signatures',
1448
'revisions'. file-id is None unless knit-kind is 'file'.
1450
# XXX: it's a bit weird to control the inventory weave caching in this
1451
# generator. Ideally the caching would be done in fetch.py I think. Or
1452
# maybe this generator should explicitly have the contract that it
1453
# should not be iterated until the previously yielded item has been
1455
inv_w = self.get_inventory_weave()
1457
# file ids that changed
1458
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
1460
num_file_ids = len(file_ids)
1461
for file_id, altered_versions in file_ids.iteritems():
1462
if _files_pb is not None:
1463
_files_pb.update("fetch texts", count, num_file_ids)
1465
yield ("file", file_id, altered_versions)
1466
# We're done with the files_pb. Note that it finished by the caller,
1467
# just as it was created by the caller.
1471
yield ("inventory", None, revision_ids)
1474
revisions_with_signatures = set()
1475
for rev_id in revision_ids:
1477
self.get_signature_text(rev_id)
1478
except errors.NoSuchRevision:
1482
revisions_with_signatures.add(rev_id)
1483
yield ("signatures", None, revisions_with_signatures)
1486
yield ("revisions", None, revision_ids)
1489
def get_inventory_weave(self):
1490
return self.control_weaves.get_weave('inventory',
1491
self.get_transaction())
1494
def get_inventory(self, revision_id):
1495
"""Get Inventory object by revision id."""
1496
return self.iter_inventories([revision_id]).next()
1498
def iter_inventories(self, revision_ids):
1499
"""Get many inventories by revision_ids.
1501
This will buffer some or all of the texts used in constructing the
1502
inventories in memory, but will only parse a single inventory at a
1505
:return: An iterator of inventories.
1507
if ((None in revision_ids)
1508
or (_mod_revision.NULL_REVISION in revision_ids)):
1509
raise ValueError('cannot get null revision inventory')
1510
return self._iter_inventories(revision_ids)
1512
def _iter_inventories(self, revision_ids):
1513
"""single-document based inventory iteration."""
1514
texts = self.get_inventory_weave().get_texts(revision_ids)
1515
for text, revision_id in zip(texts, revision_ids):
1516
yield self.deserialise_inventory(revision_id, text)
1518
def deserialise_inventory(self, revision_id, xml):
1519
"""Transform the xml into an inventory object.
1521
:param revision_id: The expected revision id of the inventory.
1522
:param xml: A serialised inventory.
1524
result = self._serializer.read_inventory_from_string(xml, revision_id)
1525
if result.revision_id != revision_id:
1526
raise AssertionError('revision id mismatch %s != %s' % (
1527
result.revision_id, revision_id))
1530
def serialise_inventory(self, inv):
1531
return self._serializer.write_inventory_to_string(inv)
1533
def _serialise_inventory_to_lines(self, inv):
1534
return self._serializer.write_inventory_to_lines(inv)
1536
def get_serializer_format(self):
1537
return self._serializer.format_num
1540
def get_inventory_xml(self, revision_id):
1541
"""Get inventory XML as a file object."""
1543
iw = self.get_inventory_weave()
1544
return iw.get_text(revision_id)
1546
raise errors.HistoryMissing(self, 'inventory', revision_id)
1549
def get_inventory_sha1(self, revision_id):
1550
"""Return the sha1 hash of the inventory entry
1552
return self.get_revision(revision_id).inventory_sha1
1555
@deprecated_method(symbol_versioning.one_four)
1556
def get_revision_graph(self, revision_id=None):
1557
"""Return a dictionary containing the revision graph.
1559
NB: This method should not be used as it accesses the entire graph all
1560
at once, which is much more data than most operations should require.
1562
:param revision_id: The revision_id to get a graph from. If None, then
1563
the entire revision graph is returned. This is a deprecated mode of
1564
operation and will be removed in the future.
1565
:return: a dictionary of revision_id->revision_parents_list.
1567
raise NotImplementedError(self.get_revision_graph)
1570
@deprecated_method(symbol_versioning.one_three)
1571
def get_revision_graph_with_ghosts(self, revision_ids=None):
1572
"""Return a graph of the revisions with ghosts marked as applicable.
1574
:param revision_ids: an iterable of revisions to graph or None for all.
1575
:return: a Graph object with the graph reachable from revision_ids.
1577
if 'evil' in debug.debug_flags:
1579
"get_revision_graph_with_ghosts scales with size of history.")
1580
result = deprecated_graph.Graph()
1581
if not revision_ids:
1582
pending = set(self.all_revision_ids())
1585
pending = set(revision_ids)
1586
# special case NULL_REVISION
1587
if _mod_revision.NULL_REVISION in pending:
1588
pending.remove(_mod_revision.NULL_REVISION)
1589
required = set(pending)
1592
revision_id = pending.pop()
1594
rev = self.get_revision(revision_id)
1595
except errors.NoSuchRevision:
1596
if revision_id in required:
1599
result.add_ghost(revision_id)
1601
for parent_id in rev.parent_ids:
1602
# is this queued or done ?
1603
if (parent_id not in pending and
1604
parent_id not in done):
1606
pending.add(parent_id)
1607
result.add_node(revision_id, rev.parent_ids)
1608
done.add(revision_id)
1611
def iter_reverse_revision_history(self, revision_id):
1612
"""Iterate backwards through revision ids in the lefthand history
1614
:param revision_id: The revision id to start with. All its lefthand
1615
ancestors will be traversed.
1617
graph = self.get_graph()
1618
next_id = revision_id
1620
if next_id in (None, _mod_revision.NULL_REVISION):
1623
# Note: The following line may raise KeyError in the event of
1624
# truncated history. We decided not to have a try:except:raise
1625
# RevisionNotPresent here until we see a use for it, because of the
1626
# cost in an inner loop that is by its very nature O(history).
1627
# Robert Collins 20080326
1628
parents = graph.get_parent_map([next_id])[next_id]
1629
if len(parents) == 0:
1632
next_id = parents[0]
1635
def get_revision_inventory(self, revision_id):
1636
"""Return inventory of a past revision."""
1637
# TODO: Unify this with get_inventory()
1638
# bzr 0.0.6 and later imposes the constraint that the inventory_id
1639
# must be the same as its revision, so this is trivial.
1640
if revision_id is None:
1641
# This does not make sense: if there is no revision,
1642
# then it is the current tree inventory surely ?!
1643
# and thus get_root_id() is something that looks at the last
1644
# commit on the branch, and the get_root_id is an inventory check.
1645
raise NotImplementedError
1646
# return Inventory(self.get_root_id())
1648
return self.get_inventory(revision_id)
1651
def is_shared(self):
1652
"""Return True if this repository is flagged as a shared repository."""
1653
raise NotImplementedError(self.is_shared)
1656
def reconcile(self, other=None, thorough=False):
1657
"""Reconcile this repository."""
1658
from bzrlib.reconcile import RepoReconciler
1659
reconciler = RepoReconciler(self, thorough=thorough)
1660
reconciler.reconcile()
1663
def _refresh_data(self):
1664
"""Helper called from lock_* to ensure coherency with disk.
1666
The default implementation does nothing; it is however possible
1667
for repositories to maintain loaded indices across multiple locks
1668
by checking inside their implementation of this method to see
1669
whether their indices are still valid. This depends of course on
1670
the disk format being validatable in this manner.
1674
def revision_tree(self, revision_id):
1675
"""Return Tree for a revision on this branch.
1677
`revision_id` may be None for the empty tree revision.
1679
# TODO: refactor this to use an existing revision object
1680
# so we don't need to read it in twice.
1681
if revision_id is None or revision_id == _mod_revision.NULL_REVISION:
1682
return RevisionTree(self, Inventory(root_id=None),
1683
_mod_revision.NULL_REVISION)
1685
inv = self.get_revision_inventory(revision_id)
1686
return RevisionTree(self, inv, revision_id)
1688
def revision_trees(self, revision_ids):
1689
"""Return Tree for a revision on this branch.
1691
`revision_id` may not be None or 'null:'"""
1692
inventories = self.iter_inventories(revision_ids)
1693
for inv in inventories:
1694
yield RevisionTree(self, inv, inv.revision_id)
1697
def get_ancestry(self, revision_id, topo_sorted=True):
1698
"""Return a list of revision-ids integrated by a revision.
1700
The first element of the list is always None, indicating the origin
1701
revision. This might change when we have history horizons, or
1702
perhaps we should have a new API.
1704
This is topologically sorted.
1706
if _mod_revision.is_null(revision_id):
1708
if not self.has_revision(revision_id):
1709
raise errors.NoSuchRevision(self, revision_id)
1710
w = self.get_inventory_weave()
1711
candidates = w.get_ancestry(revision_id, topo_sorted)
1712
return [None] + candidates # self._eliminate_revisions_not_present(candidates)
1715
"""Compress the data within the repository.
1717
This operation only makes sense for some repository types. For other
1718
types it should be a no-op that just returns.
1720
This stub method does not require a lock, but subclasses should use
1721
@needs_write_lock as this is a long running call its reasonable to
1722
implicitly lock for the user.
1726
def print_file(self, file, revision_id):
1727
"""Print `file` to stdout.
1729
FIXME RBC 20060125 as John Meinel points out this is a bad api
1730
- it writes to stdout, it assumes that that is valid etc. Fix
1731
by creating a new more flexible convenience function.
1733
tree = self.revision_tree(revision_id)
1734
# use inventory as it was in that revision
1735
file_id = tree.inventory.path2id(file)
1737
# TODO: jam 20060427 Write a test for this code path
1738
# it had a bug in it, and was raising the wrong
1740
raise errors.BzrError("%r is not present in revision %s" % (file, revision_id))
1741
tree.print_file(file_id)
1743
def get_transaction(self):
1744
return self.control_files.get_transaction()
1746
@deprecated_method(symbol_versioning.one_five)
1747
def revision_parents(self, revision_id):
1748
return self.get_inventory_weave().parent_names(revision_id)
1750
@deprecated_method(symbol_versioning.one_one)
1751
def get_parents(self, revision_ids):
1752
"""See StackedParentsProvider.get_parents"""
1753
parent_map = self.get_parent_map(revision_ids)
1754
return [parent_map.get(r, None) for r in revision_ids]
1756
def get_parent_map(self, keys):
1757
"""See graph._StackedParentsProvider.get_parent_map"""
1759
for revision_id in keys:
1760
if revision_id == _mod_revision.NULL_REVISION:
1761
parent_map[revision_id] = ()
1764
parent_id_list = self.get_revision(revision_id).parent_ids
1765
except errors.NoSuchRevision:
1768
if len(parent_id_list) == 0:
1769
parent_ids = (_mod_revision.NULL_REVISION,)
1771
parent_ids = tuple(parent_id_list)
1772
parent_map[revision_id] = parent_ids
1775
def _make_parents_provider(self):
1778
def get_graph(self, other_repository=None):
1779
"""Return the graph walker for this repository format"""
1780
parents_provider = self._make_parents_provider()
1781
if (other_repository is not None and
1782
not self.has_same_location(other_repository)):
1783
parents_provider = graph._StackedParentsProvider(
1784
[parents_provider, other_repository._make_parents_provider()])
1785
return graph.Graph(parents_provider)
1787
def _get_versioned_file_checker(self):
1788
"""Return an object suitable for checking versioned files."""
1789
return _VersionedFileChecker(self)
1791
def revision_ids_to_search_result(self, result_set):
1792
"""Convert a set of revision ids to a graph SearchResult."""
1793
result_parents = set()
1794
for parents in self.get_graph().get_parent_map(
1795
result_set).itervalues():
1796
result_parents.update(parents)
1797
included_keys = result_set.intersection(result_parents)
1798
start_keys = result_set.difference(included_keys)
1799
exclude_keys = result_parents.difference(result_set)
1800
result = graph.SearchResult(start_keys, exclude_keys,
1801
len(result_set), result_set)
1805
def set_make_working_trees(self, new_value):
1806
"""Set the policy flag for making working trees when creating branches.
1808
This only applies to branches that use this repository.
1810
The default is 'True'.
1811
:param new_value: True to restore the default, False to disable making
1814
raise NotImplementedError(self.set_make_working_trees)
1816
def make_working_trees(self):
1817
"""Returns the policy for making working trees on new branches."""
1818
raise NotImplementedError(self.make_working_trees)
1821
def sign_revision(self, revision_id, gpg_strategy):
1822
plaintext = Testament.from_revision(self, revision_id).as_short_text()
1823
self.store_revision_signature(gpg_strategy, plaintext, revision_id)
1826
def has_signature_for_revision_id(self, revision_id):
1827
"""Query for a revision signature for revision_id in the repository."""
1828
return self._revision_store.has_signature(revision_id,
1829
self.get_transaction())
1832
def get_signature_text(self, revision_id):
1833
"""Return the text for a signature."""
1834
return self._revision_store.get_signature_text(revision_id,
1835
self.get_transaction())
1838
def check(self, revision_ids=None):
1839
"""Check consistency of all history of given revision_ids.
1841
Different repository implementations should override _check().
1843
:param revision_ids: A non-empty list of revision_ids whose ancestry
1844
will be checked. Typically the last revision_id of a branch.
1846
return self._check(revision_ids)
1848
def _check(self, revision_ids):
1849
result = check.Check(self)
1853
def _warn_if_deprecated(self):
1854
global _deprecation_warning_done
1855
if _deprecation_warning_done:
1857
_deprecation_warning_done = True
1858
warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
1859
% (self._format, self.bzrdir.transport.base))
1861
def supports_rich_root(self):
1862
return self._format.rich_root_data
1864
def _check_ascii_revisionid(self, revision_id, method):
1865
"""Private helper for ascii-only repositories."""
1866
# weave repositories refuse to store revisionids that are non-ascii.
1867
if revision_id is not None:
1868
# weaves require ascii revision ids.
1869
if isinstance(revision_id, unicode):
1871
revision_id.encode('ascii')
1872
except UnicodeEncodeError:
1873
raise errors.NonAsciiRevisionId(method, self)
1876
revision_id.decode('ascii')
1877
except UnicodeDecodeError:
1878
raise errors.NonAsciiRevisionId(method, self)
1880
def revision_graph_can_have_wrong_parents(self):
1881
"""Is it possible for this repository to have a revision graph with
1884
If True, then this repository must also implement
1885
_find_inconsistent_revision_parents so that check and reconcile can
1886
check for inconsistencies before proceeding with other checks that may
1887
depend on the revision index being consistent.
1889
raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
1892
# remove these delegates a while after bzr 0.15
1893
def __make_delegated(name, from_module):
1894
def _deprecated_repository_forwarder():
1895
symbol_versioning.warn('%s moved to %s in bzr 0.15'
1896
% (name, from_module),
1899
m = __import__(from_module, globals(), locals(), [name])
1901
return getattr(m, name)
1902
except AttributeError:
1903
raise AttributeError('module %s has no name %s'
1905
globals()[name] = _deprecated_repository_forwarder
1908
'AllInOneRepository',
1909
'WeaveMetaDirRepository',
1910
'PreSplitOutRepositoryFormat',
1911
'RepositoryFormat4',
1912
'RepositoryFormat5',
1913
'RepositoryFormat6',
1914
'RepositoryFormat7',
1916
__make_delegated(_name, 'bzrlib.repofmt.weaverepo')
1920
'RepositoryFormatKnit',
1921
'RepositoryFormatKnit1',
1923
__make_delegated(_name, 'bzrlib.repofmt.knitrepo')
1926
def install_revision(repository, rev, revision_tree):
1927
"""Install all revision data into a repository."""
1928
install_revisions(repository, [(rev, revision_tree, None)])
1931
def install_revisions(repository, iterable, num_revisions=None, pb=None):
1932
"""Install all revision data into a repository.
1934
Accepts an iterable of revision, tree, signature tuples. The signature
1937
repository.start_write_group()
1939
for n, (revision, revision_tree, signature) in enumerate(iterable):
1940
_install_revision(repository, revision, revision_tree, signature)
1942
pb.update('Transferring revisions', n + 1, num_revisions)
1944
repository.abort_write_group()
1947
repository.commit_write_group()
1950
def _install_revision(repository, rev, revision_tree, signature):
1951
"""Install all revision data into a repository."""
1952
present_parents = []
1954
for p_id in rev.parent_ids:
1955
if repository.has_revision(p_id):
1956
present_parents.append(p_id)
1957
parent_trees[p_id] = repository.revision_tree(p_id)
1959
parent_trees[p_id] = repository.revision_tree(None)
1961
inv = revision_tree.inventory
1962
entries = inv.iter_entries()
1963
# backwards compatibility hack: skip the root id.
1964
if not repository.supports_rich_root():
1965
path, root = entries.next()
1966
if root.revision != rev.revision_id:
1967
raise errors.IncompatibleRevision(repr(repository))
1968
# Add the texts that are not already present
1969
for path, ie in entries:
1970
w = repository.weave_store.get_weave_or_empty(ie.file_id,
1971
repository.get_transaction())
1972
if ie.revision not in w:
1974
# FIXME: TODO: The following loop *may* be overlapping/duplicate
1975
# with InventoryEntry.find_previous_heads(). if it is, then there
1976
# is a latent bug here where the parents may have ancestors of each
1978
for revision, tree in parent_trees.iteritems():
1979
if ie.file_id not in tree:
1981
parent_id = tree.inventory[ie.file_id].revision
1982
if parent_id in text_parents:
1984
text_parents.append(parent_id)
1986
vfile = repository.weave_store.get_weave_or_empty(ie.file_id,
1987
repository.get_transaction())
1988
lines = revision_tree.get_file(ie.file_id).readlines()
1989
vfile.add_lines(rev.revision_id, text_parents, lines)
1991
# install the inventory
1992
repository.add_inventory(rev.revision_id, inv, present_parents)
1993
except errors.RevisionAlreadyPresent:
1995
if signature is not None:
1996
repository.add_signature_text(rev.revision_id, signature)
1997
repository.add_revision(rev.revision_id, rev, inv)
2000
class MetaDirRepository(Repository):
2001
"""Repositories in the new meta-dir layout.
2003
:ivar _transport: Transport for access to repository control files,
2004
typically pointing to .bzr/repository.
2007
def __init__(self, _format, a_bzrdir, control_files, _revision_store, control_store, text_store):
2008
super(MetaDirRepository, self).__init__(_format,
2014
self._transport = control_files._transport
2017
def is_shared(self):
2018
"""Return True if this repository is flagged as a shared repository."""
2019
return self._transport.has('shared-storage')
2022
def set_make_working_trees(self, new_value):
2023
"""Set the policy flag for making working trees when creating branches.
2025
This only applies to branches that use this repository.
2027
The default is 'True'.
2028
:param new_value: True to restore the default, False to disable making
2033
self._transport.delete('no-working-trees')
2034
except errors.NoSuchFile:
2037
self._transport.put_bytes('no-working-trees', '',
2038
mode=self.control_files._file_mode)
2040
def make_working_trees(self):
2041
"""Returns the policy for making working trees on new branches."""
2042
return not self._transport.has('no-working-trees')
2045
class MetaDirVersionedFileRepository(MetaDirRepository):
2046
"""Repositories in a meta-dir, that work via versioned file objects."""
2048
def __init__(self, _format, a_bzrdir, control_files, _revision_store, control_store, text_store):
2049
super(MetaDirVersionedFileRepository, self).__init__(_format, a_bzrdir,
2050
control_files, _revision_store, control_store, text_store)
2051
_revision_store.get_scope = self.get_transaction
2052
control_store.get_scope = self.get_transaction
2053
text_store.get_scope = self.get_transaction
2056
class RepositoryFormatRegistry(registry.Registry):
2057
"""Registry of RepositoryFormats."""
2059
def get(self, format_string):
2060
r = registry.Registry.get(self, format_string)
2066
format_registry = RepositoryFormatRegistry()
2067
"""Registry of formats, indexed by their identifying format string.
2069
This can contain either format instances themselves, or classes/factories that
2070
can be called to obtain one.
2074
#####################################################################
2075
# Repository Formats
2077
class RepositoryFormat(object):
2078
"""A repository format.
2080
Formats provide three things:
2081
* An initialization routine to construct repository data on disk.
2082
* a format string which is used when the BzrDir supports versioned
2084
* an open routine which returns a Repository instance.
2086
There is one and only one Format subclass for each on-disk format. But
2087
there can be one Repository subclass that is used for several different
2088
formats. The _format attribute on a Repository instance can be used to
2089
determine the disk format.
2091
Formats are placed in an dict by their format string for reference
2092
during opening. These should be subclasses of RepositoryFormat
2095
Once a format is deprecated, just deprecate the initialize and open
2096
methods on the format class. Do not deprecate the object, as the
2097
object will be created every system load.
2099
Common instance attributes:
2100
_matchingbzrdir - the bzrdir format that the repository format was
2101
originally written to work with. This can be used if manually
2102
constructing a bzrdir and repository, or more commonly for test suite
2106
# Set to True or False in derived classes. True indicates that the format
2107
# supports ghosts gracefully.
2108
supports_ghosts = None
2109
# Can this repository be given external locations to lookup additional
2110
# data. Set to True or False in derived classes.
2111
supports_external_lookups = None
2114
return "<%s>" % self.__class__.__name__
2116
def __eq__(self, other):
2117
# format objects are generally stateless
2118
return isinstance(other, self.__class__)
2120
def __ne__(self, other):
2121
return not self == other
2124
def find_format(klass, a_bzrdir):
2125
"""Return the format for the repository object in a_bzrdir.
2127
This is used by bzr native formats that have a "format" file in
2128
the repository. Other methods may be used by different types of
2132
transport = a_bzrdir.get_repository_transport(None)
2133
format_string = transport.get("format").read()
2134
return format_registry.get(format_string)
2135
except errors.NoSuchFile:
2136
raise errors.NoRepositoryPresent(a_bzrdir)
2138
raise errors.UnknownFormatError(format=format_string,
2142
def register_format(klass, format):
2143
format_registry.register(format.get_format_string(), format)
2146
def unregister_format(klass, format):
2147
format_registry.remove(format.get_format_string())
2150
def get_default_format(klass):
2151
"""Return the current default format."""
2152
from bzrlib import bzrdir
2153
return bzrdir.format_registry.make_bzrdir('default').repository_format
2155
def _get_control_store(self, repo_transport, control_files):
2156
"""Return the control store for this repository."""
2157
raise NotImplementedError(self._get_control_store)
2159
def get_format_string(self):
2160
"""Return the ASCII format string that identifies this format.
2162
Note that in pre format ?? repositories the format string is
2163
not permitted nor written to disk.
2165
raise NotImplementedError(self.get_format_string)
2167
def get_format_description(self):
2168
"""Return the short description for this format."""
2169
raise NotImplementedError(self.get_format_description)
2171
def _get_revision_store(self, repo_transport, control_files):
2172
"""Return the revision store object for this a_bzrdir."""
2173
raise NotImplementedError(self._get_revision_store)
2175
def _get_text_rev_store(self,
2182
"""Common logic for getting a revision store for a repository.
2184
see self._get_revision_store for the subclass-overridable method to
2185
get the store for a repository.
2187
from bzrlib.store.revision.text import TextRevisionStore
2188
dir_mode = control_files._dir_mode
2189
file_mode = control_files._file_mode
2190
text_store = TextStore(transport.clone(name),
2192
compressed=compressed,
2194
file_mode=file_mode)
2195
_revision_store = TextRevisionStore(text_store, serializer)
2196
return _revision_store
2198
# TODO: this shouldn't be in the base class, it's specific to things that
2199
# use weaves or knits -- mbp 20070207
2200
def _get_versioned_file_store(self,
2205
versionedfile_class=None,
2206
versionedfile_kwargs={},
2208
if versionedfile_class is None:
2209
versionedfile_class = self._versionedfile_class
2210
weave_transport = control_files._transport.clone(name)
2211
dir_mode = control_files._dir_mode
2212
file_mode = control_files._file_mode
2213
return VersionedFileStore(weave_transport, prefixed=prefixed,
2215
file_mode=file_mode,
2216
versionedfile_class=versionedfile_class,
2217
versionedfile_kwargs=versionedfile_kwargs,
2220
def initialize(self, a_bzrdir, shared=False):
2221
"""Initialize a repository of this format in a_bzrdir.
2223
:param a_bzrdir: The bzrdir to put the new repository in it.
2224
:param shared: The repository should be initialized as a sharable one.
2225
:returns: The new repository object.
2227
This may raise UninitializableFormat if shared repository are not
2228
compatible the a_bzrdir.
2230
raise NotImplementedError(self.initialize)
2232
def is_supported(self):
2233
"""Is this format supported?
2235
Supported formats must be initializable and openable.
2236
Unsupported formats may not support initialization or committing or
2237
some other features depending on the reason for not being supported.
2241
def check_conversion_target(self, target_format):
2242
raise NotImplementedError(self.check_conversion_target)
2244
def open(self, a_bzrdir, _found=False):
2245
"""Return an instance of this format for the bzrdir a_bzrdir.
2247
_found is a private parameter, do not use it.
2249
raise NotImplementedError(self.open)
2252
class MetaDirRepositoryFormat(RepositoryFormat):
2253
"""Common base class for the new repositories using the metadir layout."""
2255
rich_root_data = False
2256
supports_tree_reference = False
2257
supports_external_lookups = False
2258
_matchingbzrdir = bzrdir.BzrDirMetaFormat1()
2261
super(MetaDirRepositoryFormat, self).__init__()
2263
def _create_control_files(self, a_bzrdir):
2264
"""Create the required files and the initial control_files object."""
2265
# FIXME: RBC 20060125 don't peek under the covers
2266
# NB: no need to escape relative paths that are url safe.
2267
repository_transport = a_bzrdir.get_repository_transport(self)
2268
control_files = lockable_files.LockableFiles(repository_transport,
2269
'lock', lockdir.LockDir)
2270
control_files.create_lock()
2271
return control_files
2273
def _upload_blank_content(self, a_bzrdir, dirs, files, utf8_files, shared):
2274
"""Upload the initial blank content."""
2275
control_files = self._create_control_files(a_bzrdir)
2276
control_files.lock_write()
2277
transport = control_files._transport
2279
utf8_files += [('shared-storage', '')]
2281
transport.mkdir_multi(dirs, mode=control_files._dir_mode)
2282
for (filename, content_stream) in files:
2283
transport.put_file(filename, content_stream,
2284
mode=control_files._file_mode)
2285
for (filename, content_bytes) in utf8_files:
2286
transport.put_bytes_non_atomic(filename, content_bytes,
2287
mode=control_files._file_mode)
2289
control_files.unlock()
2292
# formats which have no format string are not discoverable
2293
# and not independently creatable, so are not registered. They're
2294
# all in bzrlib.repofmt.weaverepo now. When an instance of one of these is
2295
# needed, it's constructed directly by the BzrDir. Non-native formats where
2296
# the repository is not separately opened are similar.
2298
format_registry.register_lazy(
2299
'Bazaar-NG Repository format 7',
2300
'bzrlib.repofmt.weaverepo',
2304
format_registry.register_lazy(
2305
'Bazaar-NG Knit Repository Format 1',
2306
'bzrlib.repofmt.knitrepo',
2307
'RepositoryFormatKnit1',
2310
format_registry.register_lazy(
2311
'Bazaar Knit Repository Format 3 (bzr 0.15)\n',
2312
'bzrlib.repofmt.knitrepo',
2313
'RepositoryFormatKnit3',
2316
format_registry.register_lazy(
2317
'Bazaar Knit Repository Format 4 (bzr 1.0)\n',
2318
'bzrlib.repofmt.knitrepo',
2319
'RepositoryFormatKnit4',
2322
# Pack-based formats. There is one format for pre-subtrees, and one for
2323
# post-subtrees to allow ease of testing.
2324
# NOTE: These are experimental in 0.92. Stable in 1.0 and above
2325
format_registry.register_lazy(
2326
'Bazaar pack repository format 1 (needs bzr 0.92)\n',
2327
'bzrlib.repofmt.pack_repo',
2328
'RepositoryFormatKnitPack1',
2330
format_registry.register_lazy(
2331
'Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n',
2332
'bzrlib.repofmt.pack_repo',
2333
'RepositoryFormatKnitPack3',
2335
format_registry.register_lazy(
2336
'Bazaar pack repository format 1 with rich root (needs bzr 1.0)\n',
2337
'bzrlib.repofmt.pack_repo',
2338
'RepositoryFormatKnitPack4',
2340
# Development formats.
2342
# development 0 - stub to introduce development versioning scheme.
2343
format_registry.register_lazy(
2344
"Bazaar development format 0 (needs bzr.dev from before 1.3)\n",
2345
'bzrlib.repofmt.pack_repo',
2346
'RepositoryFormatPackDevelopment0',
2348
format_registry.register_lazy(
2349
("Bazaar development format 0 with subtree support "
2350
"(needs bzr.dev from before 1.3)\n"),
2351
'bzrlib.repofmt.pack_repo',
2352
'RepositoryFormatPackDevelopment0Subtree',
2354
# 1.3->1.4 go below here
2357
class InterRepository(InterObject):
2358
"""This class represents operations taking place between two repositories.
2360
Its instances have methods like copy_content and fetch, and contain
2361
references to the source and target repositories these operations can be
2364
Often we will provide convenience methods on 'repository' which carry out
2365
operations with another repository - they will always forward to
2366
InterRepository.get(other).method_name(parameters).
2370
"""The available optimised InterRepository types."""
2372
def copy_content(self, revision_id=None):
2373
raise NotImplementedError(self.copy_content)
2375
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2376
"""Fetch the content required to construct revision_id.
2378
The content is copied from self.source to self.target.
2380
:param revision_id: if None all content is copied, if NULL_REVISION no
2382
:param pb: optional progress bar to use for progress reports. If not
2383
provided a default one will be created.
2385
Returns the copied revision count and the failed revisions in a tuple:
2388
raise NotImplementedError(self.fetch)
2390
def _walk_to_common_revisions(self, revision_ids):
2391
"""Walk out from revision_ids in source to revisions target has.
2393
:param revision_ids: The start point for the search.
2394
:return: A set of revision ids.
2396
target_graph = self.target.get_graph()
2397
revision_ids = frozenset(revision_ids)
2398
if set(target_graph.get_parent_map(revision_ids)) == revision_ids:
2399
return graph.SearchResult(revision_ids, set(), 0, set())
2400
missing_revs = set()
2401
source_graph = self.source.get_graph()
2402
# ensure we don't pay silly lookup costs.
2403
searcher = source_graph._make_breadth_first_searcher(revision_ids)
2404
null_set = frozenset([_mod_revision.NULL_REVISION])
2407
next_revs, ghosts = searcher.next_with_ghosts()
2408
except StopIteration:
2410
if revision_ids.intersection(ghosts):
2411
absent_ids = set(revision_ids.intersection(ghosts))
2412
# If all absent_ids are present in target, no error is needed.
2413
absent_ids.difference_update(
2414
set(target_graph.get_parent_map(absent_ids)))
2416
raise errors.NoSuchRevision(self.source, absent_ids.pop())
2417
# we don't care about other ghosts as we can't fetch them and
2418
# haven't been asked to.
2419
next_revs = set(next_revs)
2420
# we always have NULL_REVISION present.
2421
have_revs = set(target_graph.get_parent_map(next_revs)).union(null_set)
2422
missing_revs.update(next_revs - have_revs)
2423
searcher.stop_searching_any(have_revs)
2424
return searcher.get_result()
2426
@deprecated_method(symbol_versioning.one_two)
2428
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2429
"""Return the revision ids that source has that target does not.
2431
These are returned in topological order.
2433
:param revision_id: only return revision ids included by this
2435
:param find_ghosts: If True find missing revisions in deep history
2436
rather than just finding the surface difference.
2438
return list(self.search_missing_revision_ids(
2439
revision_id, find_ghosts).get_keys())
2442
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2443
"""Return the revision ids that source has that target does not.
2445
:param revision_id: only return revision ids included by this
2447
:param find_ghosts: If True find missing revisions in deep history
2448
rather than just finding the surface difference.
2449
:return: A bzrlib.graph.SearchResult.
2451
# stop searching at found target revisions.
2452
if not find_ghosts and revision_id is not None:
2453
return self._walk_to_common_revisions([revision_id])
2454
# generic, possibly worst case, slow code path.
2455
target_ids = set(self.target.all_revision_ids())
2456
if revision_id is not None:
2457
source_ids = self.source.get_ancestry(revision_id)
2458
if source_ids[0] is not None:
2459
raise AssertionError()
2462
source_ids = self.source.all_revision_ids()
2463
result_set = set(source_ids).difference(target_ids)
2464
return self.source.revision_ids_to_search_result(result_set)
2467
def _same_model(source, target):
2468
"""True if source and target have the same data representation."""
2469
if source.supports_rich_root() != target.supports_rich_root():
2471
if source._serializer != target._serializer:
2476
class InterSameDataRepository(InterRepository):
2477
"""Code for converting between repositories that represent the same data.
2479
Data format and model must match for this to work.
2483
def _get_repo_format_to_test(self):
2484
"""Repository format for testing with.
2486
InterSameData can pull from subtree to subtree and from non-subtree to
2487
non-subtree, so we test this with the richest repository format.
2489
from bzrlib.repofmt import knitrepo
2490
return knitrepo.RepositoryFormatKnit3()
2493
def is_compatible(source, target):
2494
return InterRepository._same_model(source, target)
2497
def copy_content(self, revision_id=None):
2498
"""Make a complete copy of the content in self into destination.
2500
This copies both the repository's revision data, and configuration information
2501
such as the make_working_trees setting.
2503
This is a destructive operation! Do not use it on existing
2506
:param revision_id: Only copy the content needed to construct
2507
revision_id and its parents.
2510
self.target.set_make_working_trees(self.source.make_working_trees())
2511
except NotImplementedError:
2513
# but don't bother fetching if we have the needed data now.
2514
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2515
self.target.has_revision(revision_id)):
2517
self.target.fetch(self.source, revision_id=revision_id)
2520
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2521
"""See InterRepository.fetch()."""
2522
from bzrlib.fetch import GenericRepoFetcher
2523
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2524
self.source, self.source._format, self.target,
2525
self.target._format)
2526
f = GenericRepoFetcher(to_repository=self.target,
2527
from_repository=self.source,
2528
last_revision=revision_id,
2529
pb=pb, find_ghosts=find_ghosts)
2530
return f.count_copied, f.failed_revisions
2533
class InterWeaveRepo(InterSameDataRepository):
2534
"""Optimised code paths between Weave based repositories.
2536
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
2537
implemented lazy inter-object optimisation.
2541
def _get_repo_format_to_test(self):
2542
from bzrlib.repofmt import weaverepo
2543
return weaverepo.RepositoryFormat7()
2546
def is_compatible(source, target):
2547
"""Be compatible with known Weave formats.
2549
We don't test for the stores being of specific types because that
2550
could lead to confusing results, and there is no need to be
2553
from bzrlib.repofmt.weaverepo import (
2559
return (isinstance(source._format, (RepositoryFormat5,
2561
RepositoryFormat7)) and
2562
isinstance(target._format, (RepositoryFormat5,
2564
RepositoryFormat7)))
2565
except AttributeError:
2569
def copy_content(self, revision_id=None):
2570
"""See InterRepository.copy_content()."""
2571
# weave specific optimised path:
2573
self.target.set_make_working_trees(self.source.make_working_trees())
2574
except (errors.RepositoryUpgradeRequired, NotImplemented):
2576
# FIXME do not peek!
2577
if self.source._transport.listable():
2578
pb = ui.ui_factory.nested_progress_bar()
2580
self.target.weave_store.copy_all_ids(
2581
self.source.weave_store,
2583
from_transaction=self.source.get_transaction(),
2584
to_transaction=self.target.get_transaction())
2585
pb.update('copying inventory', 0, 1)
2586
self.target.control_weaves.copy_multi(
2587
self.source.control_weaves, ['inventory'],
2588
from_transaction=self.source.get_transaction(),
2589
to_transaction=self.target.get_transaction())
2590
self.target._revision_store.text_store.copy_all_ids(
2591
self.source._revision_store.text_store,
2596
self.target.fetch(self.source, revision_id=revision_id)
2599
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2600
"""See InterRepository.fetch()."""
2601
from bzrlib.fetch import GenericRepoFetcher
2602
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2603
self.source, self.source._format, self.target, self.target._format)
2604
f = GenericRepoFetcher(to_repository=self.target,
2605
from_repository=self.source,
2606
last_revision=revision_id,
2607
pb=pb, find_ghosts=find_ghosts)
2608
return f.count_copied, f.failed_revisions
2611
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2612
"""See InterRepository.missing_revision_ids()."""
2613
# we want all revisions to satisfy revision_id in source.
2614
# but we don't want to stat every file here and there.
2615
# we want then, all revisions other needs to satisfy revision_id
2616
# checked, but not those that we have locally.
2617
# so the first thing is to get a subset of the revisions to
2618
# satisfy revision_id in source, and then eliminate those that
2619
# we do already have.
2620
# this is slow on high latency connection to self, but as as this
2621
# disk format scales terribly for push anyway due to rewriting
2622
# inventory.weave, this is considered acceptable.
2624
if revision_id is not None:
2625
source_ids = self.source.get_ancestry(revision_id)
2626
if source_ids[0] is not None:
2627
raise AssertionError()
2630
source_ids = self.source._all_possible_ids()
2631
source_ids_set = set(source_ids)
2632
# source_ids is the worst possible case we may need to pull.
2633
# now we want to filter source_ids against what we actually
2634
# have in target, but don't try to check for existence where we know
2635
# we do not have a revision as that would be pointless.
2636
target_ids = set(self.target._all_possible_ids())
2637
possibly_present_revisions = target_ids.intersection(source_ids_set)
2638
actually_present_revisions = set(
2639
self.target._eliminate_revisions_not_present(possibly_present_revisions))
2640
required_revisions = source_ids_set.difference(actually_present_revisions)
2641
if revision_id is not None:
2642
# we used get_ancestry to determine source_ids then we are assured all
2643
# revisions referenced are present as they are installed in topological order.
2644
# and the tip revision was validated by get_ancestry.
2645
result_set = required_revisions
2647
# if we just grabbed the possibly available ids, then
2648
# we only have an estimate of whats available and need to validate
2649
# that against the revision records.
2651
self.source._eliminate_revisions_not_present(required_revisions))
2652
return self.source.revision_ids_to_search_result(result_set)
2655
class InterKnitRepo(InterSameDataRepository):
2656
"""Optimised code paths between Knit based repositories."""
2659
def _get_repo_format_to_test(self):
2660
from bzrlib.repofmt import knitrepo
2661
return knitrepo.RepositoryFormatKnit1()
2664
def is_compatible(source, target):
2665
"""Be compatible with known Knit formats.
2667
We don't test for the stores being of specific types because that
2668
could lead to confusing results, and there is no need to be
2671
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
2673
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
2674
isinstance(target._format, RepositoryFormatKnit))
2675
except AttributeError:
2677
return are_knits and InterRepository._same_model(source, target)
2680
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2681
"""See InterRepository.fetch()."""
2682
from bzrlib.fetch import KnitRepoFetcher
2683
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2684
self.source, self.source._format, self.target, self.target._format)
2685
f = KnitRepoFetcher(to_repository=self.target,
2686
from_repository=self.source,
2687
last_revision=revision_id,
2688
pb=pb, find_ghosts=find_ghosts)
2689
return f.count_copied, f.failed_revisions
2692
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2693
"""See InterRepository.missing_revision_ids()."""
2694
if revision_id is not None:
2695
source_ids = self.source.get_ancestry(revision_id)
2696
if source_ids[0] is not None:
2697
raise AssertionError()
2700
source_ids = self.source.all_revision_ids()
2701
source_ids_set = set(source_ids)
2702
# source_ids is the worst possible case we may need to pull.
2703
# now we want to filter source_ids against what we actually
2704
# have in target, but don't try to check for existence where we know
2705
# we do not have a revision as that would be pointless.
2706
target_ids = set(self.target.all_revision_ids())
2707
possibly_present_revisions = target_ids.intersection(source_ids_set)
2708
actually_present_revisions = set(
2709
self.target._eliminate_revisions_not_present(possibly_present_revisions))
2710
required_revisions = source_ids_set.difference(actually_present_revisions)
2711
if revision_id is not None:
2712
# we used get_ancestry to determine source_ids then we are assured all
2713
# revisions referenced are present as they are installed in topological order.
2714
# and the tip revision was validated by get_ancestry.
2715
result_set = required_revisions
2717
# if we just grabbed the possibly available ids, then
2718
# we only have an estimate of whats available and need to validate
2719
# that against the revision records.
2721
self.source._eliminate_revisions_not_present(required_revisions))
2722
return self.source.revision_ids_to_search_result(result_set)
2725
class InterPackRepo(InterSameDataRepository):
2726
"""Optimised code paths between Pack based repositories."""
2729
def _get_repo_format_to_test(self):
2730
from bzrlib.repofmt import pack_repo
2731
return pack_repo.RepositoryFormatKnitPack1()
2734
def is_compatible(source, target):
2735
"""Be compatible with known Pack formats.
2737
We don't test for the stores being of specific types because that
2738
could lead to confusing results, and there is no need to be
2741
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
2743
are_packs = (isinstance(source._format, RepositoryFormatPack) and
2744
isinstance(target._format, RepositoryFormatPack))
2745
except AttributeError:
2747
return are_packs and InterRepository._same_model(source, target)
2750
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2751
"""See InterRepository.fetch()."""
2752
from bzrlib.repofmt.pack_repo import Packer
2753
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2754
self.source, self.source._format, self.target, self.target._format)
2755
self.count_copied = 0
2756
if revision_id is None:
2758
# everything to do - use pack logic
2759
# to fetch from all packs to one without
2760
# inventory parsing etc, IFF nothing to be copied is in the target.
2762
revision_ids = self.source.all_revision_ids()
2763
revision_keys = [(revid,) for revid in revision_ids]
2764
index = self.target._pack_collection.revision_index.combined_index
2765
present_revision_ids = set(item[1][0] for item in
2766
index.iter_entries(revision_keys))
2767
revision_ids = set(revision_ids) - present_revision_ids
2768
# implementing the TODO will involve:
2769
# - detecting when all of a pack is selected
2770
# - avoiding as much as possible pre-selection, so the
2771
# more-core routines such as create_pack_from_packs can filter in
2772
# a just-in-time fashion. (though having a HEADS list on a
2773
# repository might make this a lot easier, because we could
2774
# sensibly detect 'new revisions' without doing a full index scan.
2775
elif _mod_revision.is_null(revision_id):
2780
revision_ids = self.search_missing_revision_ids(revision_id,
2781
find_ghosts=find_ghosts).get_keys()
2782
except errors.NoSuchRevision:
2783
raise errors.InstallFailed([revision_id])
2784
if len(revision_ids) == 0:
2786
packs = self.source._pack_collection.all_packs()
2787
pack = Packer(self.target._pack_collection, packs, '.fetch',
2788
revision_ids).pack()
2789
if pack is not None:
2790
self.target._pack_collection._save_pack_names()
2791
# Trigger an autopack. This may duplicate effort as we've just done
2792
# a pack creation, but for now it is simpler to think about as
2793
# 'upload data, then repack if needed'.
2794
self.target._pack_collection.autopack()
2795
return (pack.get_revision_count(), [])
2800
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2801
"""See InterRepository.missing_revision_ids().
2803
:param find_ghosts: Find ghosts throughout the ancestry of
2806
if not find_ghosts and revision_id is not None:
2807
return self._walk_to_common_revisions([revision_id])
2808
elif revision_id is not None:
2809
source_ids = self.source.get_ancestry(revision_id)
2810
if source_ids[0] is not None:
2811
raise AssertionError()
2814
source_ids = self.source.all_revision_ids()
2815
# source_ids is the worst possible case we may need to pull.
2816
# now we want to filter source_ids against what we actually
2817
# have in target, but don't try to check for existence where we know
2818
# we do not have a revision as that would be pointless.
2819
target_ids = set(self.target.all_revision_ids())
2820
result_set = set(source_ids).difference(target_ids)
2821
return self.source.revision_ids_to_search_result(result_set)
2824
class InterModel1and2(InterRepository):
2827
def _get_repo_format_to_test(self):
2831
def is_compatible(source, target):
2832
if not source.supports_rich_root() and target.supports_rich_root():
2838
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2839
"""See InterRepository.fetch()."""
2840
from bzrlib.fetch import Model1toKnit2Fetcher
2841
f = Model1toKnit2Fetcher(to_repository=self.target,
2842
from_repository=self.source,
2843
last_revision=revision_id,
2844
pb=pb, find_ghosts=find_ghosts)
2845
return f.count_copied, f.failed_revisions
2848
def copy_content(self, revision_id=None):
2849
"""Make a complete copy of the content in self into destination.
2851
This is a destructive operation! Do not use it on existing
2854
:param revision_id: Only copy the content needed to construct
2855
revision_id and its parents.
2858
self.target.set_make_working_trees(self.source.make_working_trees())
2859
except NotImplementedError:
2861
# but don't bother fetching if we have the needed data now.
2862
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2863
self.target.has_revision(revision_id)):
2865
self.target.fetch(self.source, revision_id=revision_id)
2868
class InterKnit1and2(InterKnitRepo):
2871
def _get_repo_format_to_test(self):
2875
def is_compatible(source, target):
2876
"""Be compatible with Knit1 source and Knit3 target"""
2877
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit3
2879
from bzrlib.repofmt.knitrepo import (RepositoryFormatKnit1,
2880
RepositoryFormatKnit3)
2881
from bzrlib.repofmt.pack_repo import (
2882
RepositoryFormatKnitPack1,
2883
RepositoryFormatKnitPack3,
2884
RepositoryFormatPackDevelopment0,
2885
RepositoryFormatPackDevelopment0Subtree,
2888
RepositoryFormatKnit1,
2889
RepositoryFormatKnitPack1,
2890
RepositoryFormatPackDevelopment0,
2893
RepositoryFormatKnit3,
2894
RepositoryFormatKnitPack3,
2895
RepositoryFormatPackDevelopment0Subtree,
2897
return (isinstance(source._format, nosubtrees) and
2898
isinstance(target._format, subtrees))
2899
except AttributeError:
2903
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2904
"""See InterRepository.fetch()."""
2905
from bzrlib.fetch import Knit1to2Fetcher
2906
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2907
self.source, self.source._format, self.target,
2908
self.target._format)
2909
f = Knit1to2Fetcher(to_repository=self.target,
2910
from_repository=self.source,
2911
last_revision=revision_id,
2912
pb=pb, find_ghosts=find_ghosts)
2913
return f.count_copied, f.failed_revisions
2916
class InterDifferingSerializer(InterKnitRepo):
2919
def _get_repo_format_to_test(self):
2923
def is_compatible(source, target):
2924
"""Be compatible with Knit2 source and Knit3 target"""
2925
if source.supports_rich_root() != target.supports_rich_root():
2927
# Ideally, we'd support fetching if the source had no tree references
2928
# even if it supported them...
2929
if (getattr(source, '_format.supports_tree_reference', False) and
2930
not getattr(target, '_format.supports_tree_reference', False)):
2935
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2936
"""See InterRepository.fetch()."""
2937
revision_ids = self.target.search_missing_revision_ids(self.source,
2938
revision_id, find_ghosts=find_ghosts).get_keys()
2939
revision_ids = tsort.topo_sort(
2940
self.source.get_graph().get_parent_map(revision_ids))
2941
def revisions_iterator():
2942
for current_revision_id in revision_ids:
2943
revision = self.source.get_revision(current_revision_id)
2944
tree = self.source.revision_tree(current_revision_id)
2946
signature = self.source.get_signature_text(
2947
current_revision_id)
2948
except errors.NoSuchRevision:
2950
yield revision, tree, signature
2952
my_pb = ui.ui_factory.nested_progress_bar()
2957
install_revisions(self.target, revisions_iterator(),
2958
len(revision_ids), pb)
2960
if my_pb is not None:
2962
return len(revision_ids), 0
2965
class InterRemoteToOther(InterRepository):
2967
def __init__(self, source, target):
2968
InterRepository.__init__(self, source, target)
2969
self._real_inter = None
2972
def is_compatible(source, target):
2973
if not isinstance(source, remote.RemoteRepository):
2975
# Is source's model compatible with target's model?
2976
source._ensure_real()
2977
real_source = source._real_repository
2978
if isinstance(real_source, remote.RemoteRepository):
2979
raise NotImplementedError(
2980
"We don't support remote repos backed by remote repos yet.")
2981
return InterRepository._same_model(real_source, target)
2984
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2985
"""See InterRepository.fetch()."""
2986
from bzrlib.fetch import RemoteToOtherFetcher
2987
mutter("Using fetch logic to copy between %s(remote) and %s(%s)",
2988
self.source, self.target, self.target._format)
2989
# TODO: jam 20070210 This should be an assert, not a translate
2990
revision_id = osutils.safe_revision_id(revision_id)
2991
f = RemoteToOtherFetcher(to_repository=self.target,
2992
from_repository=self.source,
2993
last_revision=revision_id,
2994
pb=pb, find_ghosts=find_ghosts)
2995
return f.count_copied, f.failed_revisions
2998
def _get_repo_format_to_test(self):
3002
class InterOtherToRemote(InterRepository):
3004
def __init__(self, source, target):
3005
InterRepository.__init__(self, source, target)
3006
self._real_inter = None
3009
def is_compatible(source, target):
3010
if isinstance(target, remote.RemoteRepository):
3014
def _ensure_real_inter(self):
3015
if self._real_inter is None:
3016
self.target._ensure_real()
3017
real_target = self.target._real_repository
3018
self._real_inter = InterRepository.get(self.source, real_target)
3020
def copy_content(self, revision_id=None):
3021
self._ensure_real_inter()
3022
self._real_inter.copy_content(revision_id=revision_id)
3024
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3025
self._ensure_real_inter()
3026
self._real_inter.fetch(revision_id=revision_id, pb=pb,
3027
find_ghosts=find_ghosts)
3030
def _get_repo_format_to_test(self):
3034
InterRepository.register_optimiser(InterDifferingSerializer)
3035
InterRepository.register_optimiser(InterSameDataRepository)
3036
InterRepository.register_optimiser(InterWeaveRepo)
3037
InterRepository.register_optimiser(InterKnitRepo)
3038
InterRepository.register_optimiser(InterModel1and2)
3039
InterRepository.register_optimiser(InterKnit1and2)
3040
InterRepository.register_optimiser(InterPackRepo)
3041
InterRepository.register_optimiser(InterRemoteToOther)
3042
InterRepository.register_optimiser(InterOtherToRemote)
3045
class CopyConverter(object):
3046
"""A repository conversion tool which just performs a copy of the content.
3048
This is slow but quite reliable.
3051
def __init__(self, target_format):
3052
"""Create a CopyConverter.
3054
:param target_format: The format the resulting repository should be.
3056
self.target_format = target_format
3058
def convert(self, repo, pb):
3059
"""Perform the conversion of to_convert, giving feedback via pb.
3061
:param to_convert: The disk object to convert.
3062
:param pb: a progress bar to use for progress information.
3067
# this is only useful with metadir layouts - separated repo content.
3068
# trigger an assertion if not such
3069
repo._format.get_format_string()
3070
self.repo_dir = repo.bzrdir
3071
self.step('Moving repository to repository.backup')
3072
self.repo_dir.transport.move('repository', 'repository.backup')
3073
backup_transport = self.repo_dir.transport.clone('repository.backup')
3074
repo._format.check_conversion_target(self.target_format)
3075
self.source_repo = repo._format.open(self.repo_dir,
3077
_override_transport=backup_transport)
3078
self.step('Creating new repository')
3079
converted = self.target_format.initialize(self.repo_dir,
3080
self.source_repo.is_shared())
3081
converted.lock_write()
3083
self.step('Copying content into repository.')
3084
self.source_repo.copy_content_into(converted)
3087
self.step('Deleting old repository content.')
3088
self.repo_dir.transport.delete_tree('repository.backup')
3089
self.pb.note('repository converted')
3091
def step(self, message):
3092
"""Update the pb by a step."""
3094
self.pb.update(message, self.count, self.total)
3106
def _unescaper(match, _map=_unescape_map):
3107
code = match.group(1)
3111
if not code.startswith('#'):
3113
return unichr(int(code[1:])).encode('utf8')
3119
def _unescape_xml(data):
3120
"""Unescape predefined XML entities in a string of data."""
3122
if _unescape_re is None:
3123
_unescape_re = re.compile('\&([^;]*);')
3124
return _unescape_re.sub(_unescaper, data)
3127
class _VersionedFileChecker(object):
3129
def __init__(self, repository):
3130
self.repository = repository
3131
self.text_index = self.repository._generate_text_key_index()
3133
def calculate_file_version_parents(self, revision_id, file_id):
3134
"""Calculate the correct parents for a file version according to
3137
parent_keys = self.text_index[(file_id, revision_id)]
3138
if parent_keys == [_mod_revision.NULL_REVISION]:
3140
# strip the file_id, for the weave api
3141
return tuple([revision_id for file_id, revision_id in parent_keys])
3143
def check_file_version_parents(self, weave, file_id):
3144
"""Check the parents stored in a versioned file are correct.
3146
It also detects file versions that are not referenced by their
3147
corresponding revision's inventory.
3149
:returns: A tuple of (wrong_parents, dangling_file_versions).
3150
wrong_parents is a dict mapping {revision_id: (stored_parents,
3151
correct_parents)} for each revision_id where the stored parents
3152
are not correct. dangling_file_versions is a set of (file_id,
3153
revision_id) tuples for versions that are present in this versioned
3154
file, but not used by the corresponding inventory.
3157
unused_versions = set()
3158
versions = weave.versions()
3159
parent_map = weave.get_parent_map(versions)
3160
for num, revision_id in enumerate(versions):
3162
correct_parents = self.calculate_file_version_parents(
3163
revision_id, file_id)
3165
# The version is not part of the used keys.
3166
unused_versions.add(revision_id)
3169
knit_parents = tuple(parent_map[revision_id])
3170
except errors.RevisionNotPresent:
3172
if correct_parents != knit_parents:
3173
wrong_parents[revision_id] = (knit_parents, correct_parents)
3174
return wrong_parents, unused_versions
3177
def _old_get_graph(repository, revision_id):
3178
"""DO NOT USE. That is all. I'm serious."""
3179
graph = repository.get_graph()
3180
revision_graph = dict(((key, value) for key, value in
3181
graph.iter_ancestry([revision_id]) if value is not None))
3182
return _strip_NULL_ghosts(revision_graph)
3185
def _strip_NULL_ghosts(revision_graph):
3186
"""Also don't use this. more compatibility code for unmigrated clients."""
3187
# Filter ghosts, and null:
3188
if _mod_revision.NULL_REVISION in revision_graph:
3189
del revision_graph[_mod_revision.NULL_REVISION]
3190
for key, parents in revision_graph.items():
3191
revision_graph[key] = tuple(parent for parent in parents if parent
3193
return revision_graph