1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
39
revision as _mod_revision,
45
from bzrlib.bundle import serializer
46
from bzrlib.revisiontree import RevisionTree
47
from bzrlib.store.versioned import VersionedFileStore
48
from bzrlib.store.text import TextStore
49
from bzrlib.testament import Testament
50
from bzrlib.util import bencode
53
from bzrlib.decorators import needs_read_lock, needs_write_lock
54
from bzrlib.inter import InterObject
55
from bzrlib.inventory import Inventory, InventoryDirectory, ROOT_ID
56
from bzrlib.symbol_versioning import (
63
from bzrlib.trace import mutter, mutter_callsite, note, warning
66
# Old formats display a warning, but only once
67
_deprecation_warning_done = False
70
class CommitBuilder(object):
71
"""Provides an interface to build up a commit.
73
This allows describing a tree to be committed without needing to
74
know the internals of the format of the repository.
77
# all clients should supply tree roots.
78
record_root_entry = True
79
# the default CommitBuilder does not manage trees whose root is versioned.
80
_versioned_root = False
82
def __init__(self, repository, parents, config, timestamp=None,
83
timezone=None, committer=None, revprops=None,
85
"""Initiate a CommitBuilder.
87
:param repository: Repository to commit to.
88
:param parents: Revision ids of the parents of the new revision.
89
:param config: Configuration to use.
90
:param timestamp: Optional timestamp recorded for commit.
91
:param timezone: Optional timezone for timestamp.
92
:param committer: Optional committer to set for commit.
93
:param revprops: Optional dictionary of revision properties.
94
:param revision_id: Optional revision id.
99
self._committer = self._config.username()
101
self._committer = committer
103
self.new_inventory = Inventory(None)
104
self._new_revision_id = revision_id
105
self.parents = parents
106
self.repository = repository
109
if revprops is not None:
110
self._revprops.update(revprops)
112
if timestamp is None:
113
timestamp = time.time()
114
# Restrict resolution to 1ms
115
self._timestamp = round(timestamp, 3)
118
self._timezone = osutils.local_time_offset()
120
self._timezone = int(timezone)
122
self._generate_revision_if_needed()
123
self.__heads = graph.HeadsCache(repository.get_graph()).heads
125
def commit(self, message):
126
"""Make the actual commit.
128
:return: The revision id of the recorded revision.
130
rev = _mod_revision.Revision(
131
timestamp=self._timestamp,
132
timezone=self._timezone,
133
committer=self._committer,
135
inventory_sha1=self.inv_sha1,
136
revision_id=self._new_revision_id,
137
properties=self._revprops)
138
rev.parent_ids = self.parents
139
self.repository.add_revision(self._new_revision_id, rev,
140
self.new_inventory, self._config)
141
self.repository.commit_write_group()
142
return self._new_revision_id
145
"""Abort the commit that is being built.
147
self.repository.abort_write_group()
149
def revision_tree(self):
150
"""Return the tree that was just committed.
152
After calling commit() this can be called to get a RevisionTree
153
representing the newly committed tree. This is preferred to
154
calling Repository.revision_tree() because that may require
155
deserializing the inventory, while we already have a copy in
158
return RevisionTree(self.repository, self.new_inventory,
159
self._new_revision_id)
161
def finish_inventory(self):
162
"""Tell the builder that the inventory is finished."""
163
if self.new_inventory.root is None:
164
raise AssertionError('Root entry should be supplied to'
165
' record_entry_contents, as of bzr 0.10.',
166
DeprecationWarning, stacklevel=2)
167
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
168
self.new_inventory.revision_id = self._new_revision_id
169
self.inv_sha1 = self.repository.add_inventory(
170
self._new_revision_id,
175
def _gen_revision_id(self):
176
"""Return new revision-id."""
177
return generate_ids.gen_revision_id(self._config.username(),
180
def _generate_revision_if_needed(self):
181
"""Create a revision id if None was supplied.
183
If the repository can not support user-specified revision ids
184
they should override this function and raise CannotSetRevisionId
185
if _new_revision_id is not None.
187
:raises: CannotSetRevisionId
189
if self._new_revision_id is None:
190
self._new_revision_id = self._gen_revision_id()
191
self.random_revid = True
193
self.random_revid = False
195
def _heads(self, file_id, revision_ids):
196
"""Calculate the graph heads for revision_ids in the graph of file_id.
198
This can use either a per-file graph or a global revision graph as we
199
have an identity relationship between the two graphs.
201
return self.__heads(revision_ids)
203
def _check_root(self, ie, parent_invs, tree):
204
"""Helper for record_entry_contents.
206
:param ie: An entry being added.
207
:param parent_invs: The inventories of the parent revisions of the
209
:param tree: The tree that is being committed.
211
# In this revision format, root entries have no knit or weave When
212
# serializing out to disk and back in root.revision is always
214
ie.revision = self._new_revision_id
216
def _get_delta(self, ie, basis_inv, path):
217
"""Get a delta against the basis inventory for ie."""
218
if ie.file_id not in basis_inv:
220
return (None, path, ie.file_id, ie)
221
elif ie != basis_inv[ie.file_id]:
223
# TODO: avoid tis id2path call.
224
return (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
229
def record_entry_contents(self, ie, parent_invs, path, tree,
231
"""Record the content of ie from tree into the commit if needed.
233
Side effect: sets ie.revision when unchanged
235
:param ie: An inventory entry present in the commit.
236
:param parent_invs: The inventories of the parent revisions of the
238
:param path: The path the entry is at in the tree.
239
:param tree: The tree which contains this entry and should be used to
241
:param content_summary: Summary data from the tree about the paths
242
content - stat, length, exec, sha/link target. This is only
243
accessed when the entry has a revision of None - that is when it is
244
a candidate to commit.
245
:return: A tuple (change_delta, version_recorded). change_delta is
246
an inventory_delta change for this entry against the basis tree of
247
the commit, or None if no change occured against the basis tree.
248
version_recorded is True if a new version of the entry has been
249
recorded. For instance, committing a merge where a file was only
250
changed on the other side will return (delta, False).
252
if self.new_inventory.root is None:
253
if ie.parent_id is not None:
254
raise errors.RootMissing()
255
self._check_root(ie, parent_invs, tree)
256
if ie.revision is None:
257
kind = content_summary[0]
259
# ie is carried over from a prior commit
261
# XXX: repository specific check for nested tree support goes here - if
262
# the repo doesn't want nested trees we skip it ?
263
if (kind == 'tree-reference' and
264
not self.repository._format.supports_tree_reference):
265
# mismatch between commit builder logic and repository:
266
# this needs the entry creation pushed down into the builder.
267
raise NotImplementedError('Missing repository subtree support.')
268
self.new_inventory.add(ie)
270
# TODO: slow, take it out of the inner loop.
272
basis_inv = parent_invs[0]
274
basis_inv = Inventory(root_id=None)
276
# ie.revision is always None if the InventoryEntry is considered
277
# for committing. We may record the previous parents revision if the
278
# content is actually unchanged against a sole head.
279
if ie.revision is not None:
280
if not self._versioned_root and path == '':
281
# repositories that do not version the root set the root's
282
# revision to the new commit even when no change occurs, and
283
# this masks when a change may have occurred against the basis,
284
# so calculate if one happened.
285
if ie.file_id in basis_inv:
286
delta = (basis_inv.id2path(ie.file_id), path,
290
delta = (None, path, ie.file_id, ie)
293
# we don't need to commit this, because the caller already
294
# determined that an existing revision of this file is
296
return None, (ie.revision == self._new_revision_id)
297
# XXX: Friction: parent_candidates should return a list not a dict
298
# so that we don't have to walk the inventories again.
299
parent_candiate_entries = ie.parent_candidates(parent_invs)
300
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
302
for inv in parent_invs:
303
if ie.file_id in inv:
304
old_rev = inv[ie.file_id].revision
305
if old_rev in head_set:
306
heads.append(inv[ie.file_id].revision)
307
head_set.remove(inv[ie.file_id].revision)
310
# now we check to see if we need to write a new record to the
312
# We write a new entry unless there is one head to the ancestors, and
313
# the kind-derived content is unchanged.
315
# Cheapest check first: no ancestors, or more the one head in the
316
# ancestors, we write a new node.
320
# There is a single head, look it up for comparison
321
parent_entry = parent_candiate_entries[heads[0]]
322
# if the non-content specific data has changed, we'll be writing a
324
if (parent_entry.parent_id != ie.parent_id or
325
parent_entry.name != ie.name):
327
# now we need to do content specific checks:
329
# if the kind changed the content obviously has
330
if kind != parent_entry.kind:
333
if content_summary[2] is None:
334
raise ValueError("Files must not have executable = None")
336
if (# if the file length changed we have to store:
337
parent_entry.text_size != content_summary[1] or
338
# if the exec bit has changed we have to store:
339
parent_entry.executable != content_summary[2]):
341
elif parent_entry.text_sha1 == content_summary[3]:
342
# all meta and content is unchanged (using a hash cache
343
# hit to check the sha)
344
ie.revision = parent_entry.revision
345
ie.text_size = parent_entry.text_size
346
ie.text_sha1 = parent_entry.text_sha1
347
ie.executable = parent_entry.executable
348
return self._get_delta(ie, basis_inv, path), False
350
# Either there is only a hash change(no hash cache entry,
351
# or same size content change), or there is no change on
353
# Provide the parent's hash to the store layer, so that the
354
# content is unchanged we will not store a new node.
355
nostore_sha = parent_entry.text_sha1
357
# We want to record a new node regardless of the presence or
358
# absence of a content change in the file.
360
ie.executable = content_summary[2]
361
lines = tree.get_file(ie.file_id, path).readlines()
363
ie.text_sha1, ie.text_size = self._add_text_to_weave(
364
ie.file_id, lines, heads, nostore_sha)
365
except errors.ExistingContent:
366
# Turns out that the file content was unchanged, and we were
367
# only going to store a new node if it was changed. Carry over
369
ie.revision = parent_entry.revision
370
ie.text_size = parent_entry.text_size
371
ie.text_sha1 = parent_entry.text_sha1
372
ie.executable = parent_entry.executable
373
return self._get_delta(ie, basis_inv, path), False
374
elif kind == 'directory':
376
# all data is meta here, nothing specific to directory, so
378
ie.revision = parent_entry.revision
379
return self._get_delta(ie, basis_inv, path), False
381
self._add_text_to_weave(ie.file_id, lines, heads, None)
382
elif kind == 'symlink':
383
current_link_target = content_summary[3]
385
# symlink target is not generic metadata, check if it has
387
if current_link_target != parent_entry.symlink_target:
390
# unchanged, carry over.
391
ie.revision = parent_entry.revision
392
ie.symlink_target = parent_entry.symlink_target
393
return self._get_delta(ie, basis_inv, path), False
394
ie.symlink_target = current_link_target
396
self._add_text_to_weave(ie.file_id, lines, heads, None)
397
elif kind == 'tree-reference':
399
if content_summary[3] != parent_entry.reference_revision:
402
# unchanged, carry over.
403
ie.reference_revision = parent_entry.reference_revision
404
ie.revision = parent_entry.revision
405
return self._get_delta(ie, basis_inv, path), False
406
ie.reference_revision = content_summary[3]
408
self._add_text_to_weave(ie.file_id, lines, heads, None)
410
raise NotImplementedError('unknown kind')
411
ie.revision = self._new_revision_id
412
return self._get_delta(ie, basis_inv, path), True
414
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
415
# Note: as we read the content directly from the tree, we know its not
416
# been turned into unicode or badly split - but a broken tree
417
# implementation could give us bad output from readlines() so this is
418
# not a guarantee of safety. What would be better is always checking
419
# the content during test suite execution. RBC 20070912
420
parent_keys = tuple((file_id, parent) for parent in parents)
421
return self.repository.texts.add_lines(
422
(file_id, self._new_revision_id), parent_keys, new_lines,
423
nostore_sha=nostore_sha, random_id=self.random_revid,
424
check_content=False)[0:2]
427
class RootCommitBuilder(CommitBuilder):
428
"""This commitbuilder actually records the root id"""
430
# the root entry gets versioned properly by this builder.
431
_versioned_root = True
433
def _check_root(self, ie, parent_invs, tree):
434
"""Helper for record_entry_contents.
436
:param ie: An entry being added.
437
:param parent_invs: The inventories of the parent revisions of the
439
:param tree: The tree that is being committed.
443
######################################################################
446
class Repository(object):
447
"""Repository holding history for one or more branches.
449
The repository holds and retrieves historical information including
450
revisions and file history. It's normally accessed only by the Branch,
451
which views a particular line of development through that history.
453
The Repository builds on top of some byte storage facilies (the revisions,
454
signatures, inventories and texts attributes) and a Transport, which
455
respectively provide byte storage and a means to access the (possibly
458
The byte storage facilities are addressed via tuples, which we refer to
459
as 'keys' throughout the code base. Revision_keys, inventory_keys and
460
signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:
461
(file_id, revision_id). We use this interface because it allows low
462
friction with the underlying code that implements disk indices, network
463
encoding and other parts of bzrlib.
465
:ivar revisions: A bzrlib.versionedfile.VersionedFiles instance containing
466
the serialised revisions for the repository. This can be used to obtain
467
revision graph information or to access raw serialised revisions.
468
The result of trying to insert data into the repository via this store
469
is undefined: it should be considered read-only except for implementors
471
:ivar signatures: A bzrlib.versionedfile.VersionedFiles instance containing
472
the serialised signatures for the repository. This can be used to
473
obtain access to raw serialised signatures. The result of trying to
474
insert data into the repository via this store is undefined: it should
475
be considered read-only except for implementors of repositories.
476
:ivar inventories: A bzrlib.versionedfile.VersionedFiles instance containing
477
the serialised inventories for the repository. This can be used to
478
obtain unserialised inventories. The result of trying to insert data
479
into the repository via this store is undefined: it should be
480
considered read-only except for implementors of repositories.
481
:ivar texts: A bzrlib.versionedfile.VersionedFiles instance containing the
482
texts of files and directories for the repository. This can be used to
483
obtain file texts or file graphs. Note that Repository.iter_file_bytes
484
is usually a better interface for accessing file texts.
485
The result of trying to insert data into the repository via this store
486
is undefined: it should be considered read-only except for implementors
488
:ivar _transport: Transport for file access to repository, typically
489
pointing to .bzr/repository.
492
# What class to use for a CommitBuilder. Often its simpler to change this
493
# in a Repository class subclass rather than to override
494
# get_commit_builder.
495
_commit_builder_class = CommitBuilder
496
# The search regex used by xml based repositories to determine what things
497
# where changed in a single commit.
498
_file_ids_altered_regex = lazy_regex.lazy_compile(
499
r'file_id="(?P<file_id>[^"]+)"'
500
r'.* revision="(?P<revision_id>[^"]+)"'
503
def abort_write_group(self):
504
"""Commit the contents accrued within the current write group.
506
:seealso: start_write_group.
508
if self._write_group is not self.get_transaction():
509
# has an unlock or relock occured ?
510
raise errors.BzrError('mismatched lock context and write group.')
511
self._abort_write_group()
512
self._write_group = None
514
def _abort_write_group(self):
515
"""Template method for per-repository write group cleanup.
517
This is called during abort before the write group is considered to be
518
finished and should cleanup any internal state accrued during the write
519
group. There is no requirement that data handed to the repository be
520
*not* made available - this is not a rollback - but neither should any
521
attempt be made to ensure that data added is fully commited. Abort is
522
invoked when an error has occured so futher disk or network operations
523
may not be possible or may error and if possible should not be
527
def add_fallback_repository(self, repository):
528
"""Add a repository to use for looking up data not held locally.
530
:param repository: A repository.
532
if not self._format.supports_external_lookups:
533
raise errors.UnstackableRepositoryFormat(self._format, self.base)
534
if not self._add_fallback_repository_check(repository):
535
raise errors.IncompatibleRepositories(self, repository)
536
self._fallback_repositories.append(repository)
537
self.texts.add_fallback_versioned_files(repository.texts)
538
self.inventories.add_fallback_versioned_files(repository.inventories)
539
self.revisions.add_fallback_versioned_files(repository.revisions)
540
self.signatures.add_fallback_versioned_files(repository.signatures)
542
def _add_fallback_repository_check(self, repository):
543
"""Check that this repository can fallback to repository safely.
545
:param repository: A repository to fallback to.
546
:return: True if the repositories can stack ok.
548
return InterRepository._same_model(self, repository)
550
def add_inventory(self, revision_id, inv, parents):
551
"""Add the inventory inv to the repository as revision_id.
553
:param parents: The revision ids of the parents that revision_id
554
is known to have and are in the repository already.
556
:returns: The validator(which is a sha1 digest, though what is sha'd is
557
repository format specific) of the serialized inventory.
559
if not self.is_in_write_group():
560
raise AssertionError("%r not in write group" % (self,))
561
_mod_revision.check_not_reserved_id(revision_id)
562
if not (inv.revision_id is None or inv.revision_id == revision_id):
563
raise AssertionError(
564
"Mismatch between inventory revision"
565
" id and insertion revid (%r, %r)"
566
% (inv.revision_id, revision_id))
568
raise AssertionError()
569
inv_lines = self._serialise_inventory_to_lines(inv)
570
return self._inventory_add_lines(revision_id, parents,
571
inv_lines, check_content=False)
573
def _inventory_add_lines(self, revision_id, parents, lines,
575
"""Store lines in inv_vf and return the sha1 of the inventory."""
576
parents = [(parent,) for parent in parents]
577
return self.inventories.add_lines((revision_id,), parents, lines,
578
check_content=check_content)[0]
580
def add_revision(self, revision_id, rev, inv=None, config=None):
581
"""Add rev to the revision store as revision_id.
583
:param revision_id: the revision id to use.
584
:param rev: The revision object.
585
:param inv: The inventory for the revision. if None, it will be looked
586
up in the inventory storer
587
:param config: If None no digital signature will be created.
588
If supplied its signature_needed method will be used
589
to determine if a signature should be made.
591
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
593
_mod_revision.check_not_reserved_id(revision_id)
594
if config is not None and config.signature_needed():
596
inv = self.get_inventory(revision_id)
597
plaintext = Testament(rev, inv).as_short_text()
598
self.store_revision_signature(
599
gpg.GPGStrategy(config), plaintext, revision_id)
600
# check inventory present
601
if not self.inventories.get_parent_map([(revision_id,)]):
603
raise errors.WeaveRevisionNotPresent(revision_id,
606
# yes, this is not suitable for adding with ghosts.
607
rev.inventory_sha1 = self.add_inventory(revision_id, inv,
611
rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]
612
self._add_revision(rev)
614
def _add_revision(self, revision):
615
text = self._serializer.write_revision_to_string(revision)
616
key = (revision.revision_id,)
617
parents = tuple((parent,) for parent in revision.parent_ids)
618
self.revisions.add_lines(key, parents, osutils.split_lines(text))
620
def all_revision_ids(self):
621
"""Returns a list of all the revision ids in the repository.
623
This is conceptually deprecated because code should generally work on
624
the graph reachable from a particular revision, and ignore any other
625
revisions that might be present. There is no direct replacement
628
if 'evil' in debug.debug_flags:
629
mutter_callsite(2, "all_revision_ids is linear with history.")
630
return self._all_revision_ids()
632
def _all_revision_ids(self):
633
"""Returns a list of all the revision ids in the repository.
635
These are in as much topological order as the underlying store can
638
raise NotImplementedError(self._all_revision_ids)
640
def break_lock(self):
641
"""Break a lock if one is present from another instance.
643
Uses the ui factory to ask for confirmation if the lock may be from
646
self.control_files.break_lock()
649
def _eliminate_revisions_not_present(self, revision_ids):
650
"""Check every revision id in revision_ids to see if we have it.
652
Returns a set of the present revisions.
655
graph = self.get_graph()
656
parent_map = graph.get_parent_map(revision_ids)
657
# The old API returned a list, should this actually be a set?
658
return parent_map.keys()
661
def create(a_bzrdir):
662
"""Construct the current default format repository in a_bzrdir."""
663
return RepositoryFormat.get_default_format().initialize(a_bzrdir)
665
def __init__(self, _format, a_bzrdir, control_files):
666
"""instantiate a Repository.
668
:param _format: The format of the repository on disk.
669
:param a_bzrdir: The BzrDir of the repository.
671
In the future we will have a single api for all stores for
672
getting file texts, inventories and revisions, then
673
this construct will accept instances of those things.
675
super(Repository, self).__init__()
676
self._format = _format
677
# the following are part of the public API for Repository:
678
self.bzrdir = a_bzrdir
679
self.control_files = control_files
680
self._transport = control_files._transport
681
self.base = self._transport.base
683
self._reconcile_does_inventory_gc = True
684
self._reconcile_fixes_text_parents = False
685
self._reconcile_backsup_inventory = True
686
# not right yet - should be more semantically clear ?
688
# TODO: make sure to construct the right store classes, etc, depending
689
# on whether escaping is required.
690
self._warn_if_deprecated()
691
self._write_group = None
692
# Additional places to query for data.
693
self._fallback_repositories = []
694
# What order should fetch operations request streams in?
695
# The default is unsorted as that is the cheapest for an origin to
697
self._fetch_order = 'unsorted'
698
# Does this repository use deltas that can be fetched as-deltas ?
699
# (E.g. knits, where the knit deltas can be transplanted intact.
700
# We default to False, which will ensure that enough data to get
701
# a full text out of any fetch stream will be grabbed.
702
self._fetch_uses_deltas = False
705
return '%s(%r)' % (self.__class__.__name__,
708
def has_same_location(self, other):
709
"""Returns a boolean indicating if this repository is at the same
710
location as another repository.
712
This might return False even when two repository objects are accessing
713
the same physical repository via different URLs.
715
if self.__class__ is not other.__class__:
717
return (self._transport.base == other._transport.base)
719
def is_in_write_group(self):
720
"""Return True if there is an open write group.
722
:seealso: start_write_group.
724
return self._write_group is not None
727
return self.control_files.is_locked()
729
def is_write_locked(self):
730
"""Return True if this object is write locked."""
731
return self.is_locked() and self.control_files._lock_mode == 'w'
733
def lock_write(self, token=None):
734
"""Lock this repository for writing.
736
This causes caching within the repository obejct to start accumlating
737
data during reads, and allows a 'write_group' to be obtained. Write
738
groups must be used for actual data insertion.
740
:param token: if this is already locked, then lock_write will fail
741
unless the token matches the existing lock.
742
:returns: a token if this instance supports tokens, otherwise None.
743
:raises TokenLockingNotSupported: when a token is given but this
744
instance doesn't support using token locks.
745
:raises MismatchedToken: if the specified token doesn't match the token
746
of the existing lock.
747
:seealso: start_write_group.
749
A token should be passed in if you know that you have locked the object
750
some other way, and need to synchronise this object's state with that
753
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
755
result = self.control_files.lock_write(token=token)
756
for repo in self._fallback_repositories:
757
# Writes don't affect fallback repos
763
self.control_files.lock_read()
764
for repo in self._fallback_repositories:
768
def get_physical_lock_status(self):
769
return self.control_files.get_physical_lock_status()
771
def leave_lock_in_place(self):
772
"""Tell this repository not to release the physical lock when this
775
If lock_write doesn't return a token, then this method is not supported.
777
self.control_files.leave_in_place()
779
def dont_leave_lock_in_place(self):
780
"""Tell this repository to release the physical lock when this
781
object is unlocked, even if it didn't originally acquire it.
783
If lock_write doesn't return a token, then this method is not supported.
785
self.control_files.dont_leave_in_place()
788
def gather_stats(self, revid=None, committers=None):
789
"""Gather statistics from a revision id.
791
:param revid: The revision id to gather statistics from, if None, then
792
no revision specific statistics are gathered.
793
:param committers: Optional parameter controlling whether to grab
794
a count of committers from the revision specific statistics.
795
:return: A dictionary of statistics. Currently this contains:
796
committers: The number of committers if requested.
797
firstrev: A tuple with timestamp, timezone for the penultimate left
798
most ancestor of revid, if revid is not the NULL_REVISION.
799
latestrev: A tuple with timestamp, timezone for revid, if revid is
800
not the NULL_REVISION.
801
revisions: The total revision count in the repository.
802
size: An estimate disk size of the repository in bytes.
805
if revid and committers:
806
result['committers'] = 0
807
if revid and revid != _mod_revision.NULL_REVISION:
809
all_committers = set()
810
revisions = self.get_ancestry(revid)
811
# pop the leading None
813
first_revision = None
815
# ignore the revisions in the middle - just grab first and last
816
revisions = revisions[0], revisions[-1]
817
for revision in self.get_revisions(revisions):
818
if not first_revision:
819
first_revision = revision
821
all_committers.add(revision.committer)
822
last_revision = revision
824
result['committers'] = len(all_committers)
825
result['firstrev'] = (first_revision.timestamp,
826
first_revision.timezone)
827
result['latestrev'] = (last_revision.timestamp,
828
last_revision.timezone)
830
# now gather global repository information
831
# XXX: This is available for many repos regardless of listability.
832
if self.bzrdir.root_transport.listable():
833
# XXX: do we want to __define len__() ?
834
# Maybe the versionedfiles object should provide a different
835
# method to get the number of keys.
836
result['revisions'] = len(self.revisions.keys())
840
def find_branches(self, using=False):
841
"""Find branches underneath this repository.
843
This will include branches inside other branches.
845
:param using: If True, list only branches using this repository.
847
if using and not self.is_shared():
849
return [self.bzrdir.open_branch()]
850
except errors.NotBranchError:
852
class Evaluator(object):
855
self.first_call = True
857
def __call__(self, bzrdir):
858
# On the first call, the parameter is always the bzrdir
859
# containing the current repo.
860
if not self.first_call:
862
repository = bzrdir.open_repository()
863
except errors.NoRepositoryPresent:
866
return False, (None, repository)
867
self.first_call = False
869
value = (bzrdir.open_branch(), None)
870
except errors.NotBranchError:
875
for branch, repository in bzrdir.BzrDir.find_bzrdirs(
876
self.bzrdir.root_transport, evaluate=Evaluator()):
877
if branch is not None:
878
branches.append(branch)
879
if not using and repository is not None:
880
branches.extend(repository.find_branches())
884
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
885
"""Return the revision ids that other has that this does not.
887
These are returned in topological order.
889
revision_id: only return revision ids included by revision_id.
891
return InterRepository.get(other, self).search_missing_revision_ids(
892
revision_id, find_ghosts)
894
@deprecated_method(one_two)
896
def missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
897
"""Return the revision ids that other has that this does not.
899
These are returned in topological order.
901
revision_id: only return revision ids included by revision_id.
903
keys = self.search_missing_revision_ids(
904
other, revision_id, find_ghosts).get_keys()
907
parents = other.get_graph().get_parent_map(keys)
910
return tsort.topo_sort(parents)
914
"""Open the repository rooted at base.
916
For instance, if the repository is at URL/.bzr/repository,
917
Repository.open(URL) -> a Repository instance.
919
control = bzrdir.BzrDir.open(base)
920
return control.open_repository()
922
def copy_content_into(self, destination, revision_id=None):
923
"""Make a complete copy of the content in self into destination.
925
This is a destructive operation! Do not use it on existing
928
return InterRepository.get(self, destination).copy_content(revision_id)
930
def commit_write_group(self):
931
"""Commit the contents accrued within the current write group.
933
:seealso: start_write_group.
935
if self._write_group is not self.get_transaction():
936
# has an unlock or relock occured ?
937
raise errors.BzrError('mismatched lock context %r and '
939
(self.get_transaction(), self._write_group))
940
self._commit_write_group()
941
self._write_group = None
943
def _commit_write_group(self):
944
"""Template method for per-repository write group cleanup.
946
This is called before the write group is considered to be
947
finished and should ensure that all data handed to the repository
948
for writing during the write group is safely committed (to the
949
extent possible considering file system caching etc).
952
def fetch(self, source, revision_id=None, pb=None, find_ghosts=False):
953
"""Fetch the content required to construct revision_id from source.
955
If revision_id is None all content is copied.
956
:param find_ghosts: Find and copy revisions in the source that are
957
ghosts in the target (and not reachable directly by walking out to
958
the first-present revision in target from revision_id).
960
# fast path same-url fetch operations
961
if self.has_same_location(source):
962
# check that last_revision is in 'from' and then return a
964
if (revision_id is not None and
965
not _mod_revision.is_null(revision_id)):
966
self.get_revision(revision_id)
968
inter = InterRepository.get(source, self)
970
return inter.fetch(revision_id=revision_id, pb=pb, find_ghosts=find_ghosts)
971
except NotImplementedError:
972
raise errors.IncompatibleRepositories(source, self)
974
def create_bundle(self, target, base, fileobj, format=None):
975
return serializer.write_bundle(self, target, base, fileobj, format)
977
def get_commit_builder(self, branch, parents, config, timestamp=None,
978
timezone=None, committer=None, revprops=None,
980
"""Obtain a CommitBuilder for this repository.
982
:param branch: Branch to commit to.
983
:param parents: Revision ids of the parents of the new revision.
984
:param config: Configuration to use.
985
:param timestamp: Optional timestamp recorded for commit.
986
:param timezone: Optional timezone for timestamp.
987
:param committer: Optional committer to set for commit.
988
:param revprops: Optional dictionary of revision properties.
989
:param revision_id: Optional revision id.
991
result = self._commit_builder_class(self, parents, config,
992
timestamp, timezone, committer, revprops, revision_id)
993
self.start_write_group()
997
if (self.control_files._lock_count == 1 and
998
self.control_files._lock_mode == 'w'):
999
if self._write_group is not None:
1000
self.abort_write_group()
1001
self.control_files.unlock()
1002
raise errors.BzrError(
1003
'Must end write groups before releasing write locks.')
1004
self.control_files.unlock()
1005
for repo in self._fallback_repositories:
1009
def clone(self, a_bzrdir, revision_id=None):
1010
"""Clone this repository into a_bzrdir using the current format.
1012
Currently no check is made that the format of this repository and
1013
the bzrdir format are compatible. FIXME RBC 20060201.
1015
:return: The newly created destination repository.
1017
# TODO: deprecate after 0.16; cloning this with all its settings is
1018
# probably not very useful -- mbp 20070423
1019
dest_repo = self._create_sprouting_repo(a_bzrdir, shared=self.is_shared())
1020
self.copy_content_into(dest_repo, revision_id)
1023
def start_write_group(self):
1024
"""Start a write group in the repository.
1026
Write groups are used by repositories which do not have a 1:1 mapping
1027
between file ids and backend store to manage the insertion of data from
1028
both fetch and commit operations.
1030
A write lock is required around the start_write_group/commit_write_group
1031
for the support of lock-requiring repository formats.
1033
One can only insert data into a repository inside a write group.
1037
if not self.is_write_locked():
1038
raise errors.NotWriteLocked(self)
1039
if self._write_group:
1040
raise errors.BzrError('already in a write group')
1041
self._start_write_group()
1042
# so we can detect unlock/relock - the write group is now entered.
1043
self._write_group = self.get_transaction()
1045
def _start_write_group(self):
1046
"""Template method for per-repository write group startup.
1048
This is called before the write group is considered to be
1053
def sprout(self, to_bzrdir, revision_id=None):
1054
"""Create a descendent repository for new development.
1056
Unlike clone, this does not copy the settings of the repository.
1058
dest_repo = self._create_sprouting_repo(to_bzrdir, shared=False)
1059
dest_repo.fetch(self, revision_id=revision_id)
1062
def _create_sprouting_repo(self, a_bzrdir, shared):
1063
if not isinstance(a_bzrdir._format, self.bzrdir._format.__class__):
1064
# use target default format.
1065
dest_repo = a_bzrdir.create_repository()
1067
# Most control formats need the repository to be specifically
1068
# created, but on some old all-in-one formats it's not needed
1070
dest_repo = self._format.initialize(a_bzrdir, shared=shared)
1071
except errors.UninitializableFormat:
1072
dest_repo = a_bzrdir.open_repository()
1076
def has_revision(self, revision_id):
1077
"""True if this repository has a copy of the revision."""
1078
return revision_id in self.has_revisions((revision_id,))
1081
def has_revisions(self, revision_ids):
1082
"""Probe to find out the presence of multiple revisions.
1084
:param revision_ids: An iterable of revision_ids.
1085
:return: A set of the revision_ids that were present.
1087
parent_map = self.revisions.get_parent_map(
1088
[(rev_id,) for rev_id in revision_ids])
1090
if _mod_revision.NULL_REVISION in revision_ids:
1091
result.add(_mod_revision.NULL_REVISION)
1092
result.update([key[0] for key in parent_map])
1096
def get_revision(self, revision_id):
1097
"""Return the Revision object for a named revision."""
1098
return self.get_revisions([revision_id])[0]
1101
def get_revision_reconcile(self, revision_id):
1102
"""'reconcile' helper routine that allows access to a revision always.
1104
This variant of get_revision does not cross check the weave graph
1105
against the revision one as get_revision does: but it should only
1106
be used by reconcile, or reconcile-alike commands that are correcting
1107
or testing the revision graph.
1109
return self._get_revisions([revision_id])[0]
1112
def get_revisions(self, revision_ids):
1113
"""Get many revisions at once."""
1114
return self._get_revisions(revision_ids)
1117
def _get_revisions(self, revision_ids):
1118
"""Core work logic to get many revisions without sanity checks."""
1119
for rev_id in revision_ids:
1120
if not rev_id or not isinstance(rev_id, basestring):
1121
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1122
keys = [(key,) for key in revision_ids]
1123
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1125
for record in stream:
1126
if record.storage_kind == 'absent':
1127
raise errors.NoSuchRevision(self, record.key[0])
1128
text = record.get_bytes_as('fulltext')
1129
rev = self._serializer.read_revision_from_string(text)
1130
revs[record.key[0]] = rev
1131
return [revs[revid] for revid in revision_ids]
1134
def get_revision_xml(self, revision_id):
1135
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1136
# would have already do it.
1137
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1138
rev = self.get_revision(revision_id)
1139
rev_tmp = StringIO()
1140
# the current serializer..
1141
self._serializer.write_revision(rev, rev_tmp)
1143
return rev_tmp.getvalue()
1145
def get_deltas_for_revisions(self, revisions):
1146
"""Produce a generator of revision deltas.
1148
Note that the input is a sequence of REVISIONS, not revision_ids.
1149
Trees will be held in memory until the generator exits.
1150
Each delta is relative to the revision's lefthand predecessor.
1152
required_trees = set()
1153
for revision in revisions:
1154
required_trees.add(revision.revision_id)
1155
required_trees.update(revision.parent_ids[:1])
1156
trees = dict((t.get_revision_id(), t) for
1157
t in self.revision_trees(required_trees))
1158
for revision in revisions:
1159
if not revision.parent_ids:
1160
old_tree = self.revision_tree(None)
1162
old_tree = trees[revision.parent_ids[0]]
1163
yield trees[revision.revision_id].changes_from(old_tree)
1166
def get_revision_delta(self, revision_id):
1167
"""Return the delta for one revision.
1169
The delta is relative to the left-hand predecessor of the
1172
r = self.get_revision(revision_id)
1173
return list(self.get_deltas_for_revisions([r]))[0]
1176
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
1177
signature = gpg_strategy.sign(plaintext)
1178
self.add_signature_text(revision_id, signature)
1181
def add_signature_text(self, revision_id, signature):
1182
self.signatures.add_lines((revision_id,), (),
1183
osutils.split_lines(signature))
1185
def find_text_key_references(self):
1186
"""Find the text key references within the repository.
1188
:return: a dictionary mapping (file_id, revision_id) tuples to altered file-ids to an iterable of
1189
revision_ids. Each altered file-ids has the exact revision_ids that
1190
altered it listed explicitly.
1191
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1192
to whether they were referred to by the inventory of the
1193
revision_id that they contain. The inventory texts from all present
1194
revision ids are assessed to generate this report.
1196
revision_keys = self.revisions.keys()
1197
w = self.inventories
1198
pb = ui.ui_factory.nested_progress_bar()
1200
return self._find_text_key_references_from_xml_inventory_lines(
1201
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
1205
def _find_text_key_references_from_xml_inventory_lines(self,
1207
"""Core routine for extracting references to texts from inventories.
1209
This performs the translation of xml lines to revision ids.
1211
:param line_iterator: An iterator of lines, origin_version_id
1212
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1213
to whether they were referred to by the inventory of the
1214
revision_id that they contain. Note that if that revision_id was
1215
not part of the line_iterator's output then False will be given -
1216
even though it may actually refer to that key.
1218
if not self._serializer.support_altered_by_hack:
1219
raise AssertionError(
1220
"_find_text_key_references_from_xml_inventory_lines only "
1221
"supported for branches which store inventory as unnested xml"
1222
", not on %r" % self)
1225
# this code needs to read every new line in every inventory for the
1226
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
1227
# not present in one of those inventories is unnecessary but not
1228
# harmful because we are filtering by the revision id marker in the
1229
# inventory lines : we only select file ids altered in one of those
1230
# revisions. We don't need to see all lines in the inventory because
1231
# only those added in an inventory in rev X can contain a revision=X
1233
unescape_revid_cache = {}
1234
unescape_fileid_cache = {}
1236
# jam 20061218 In a big fetch, this handles hundreds of thousands
1237
# of lines, so it has had a lot of inlining and optimizing done.
1238
# Sorry that it is a little bit messy.
1239
# Move several functions to be local variables, since this is a long
1241
search = self._file_ids_altered_regex.search
1242
unescape = _unescape_xml
1243
setdefault = result.setdefault
1244
for line, line_key in line_iterator:
1245
match = search(line)
1248
# One call to match.group() returning multiple items is quite a
1249
# bit faster than 2 calls to match.group() each returning 1
1250
file_id, revision_id = match.group('file_id', 'revision_id')
1252
# Inlining the cache lookups helps a lot when you make 170,000
1253
# lines and 350k ids, versus 8.4 unique ids.
1254
# Using a cache helps in 2 ways:
1255
# 1) Avoids unnecessary decoding calls
1256
# 2) Re-uses cached strings, which helps in future set and
1258
# (2) is enough that removing encoding entirely along with
1259
# the cache (so we are using plain strings) results in no
1260
# performance improvement.
1262
revision_id = unescape_revid_cache[revision_id]
1264
unescaped = unescape(revision_id)
1265
unescape_revid_cache[revision_id] = unescaped
1266
revision_id = unescaped
1268
# Note that unconditionally unescaping means that we deserialise
1269
# every fileid, which for general 'pull' is not great, but we don't
1270
# really want to have some many fulltexts that this matters anyway.
1273
file_id = unescape_fileid_cache[file_id]
1275
unescaped = unescape(file_id)
1276
unescape_fileid_cache[file_id] = unescaped
1279
key = (file_id, revision_id)
1280
setdefault(key, False)
1281
if revision_id == line_key[-1]:
1285
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1287
"""Helper routine for fileids_altered_by_revision_ids.
1289
This performs the translation of xml lines to revision ids.
1291
:param line_iterator: An iterator of lines, origin_version_id
1292
:param revision_ids: The revision ids to filter for. This should be a
1293
set or other type which supports efficient __contains__ lookups, as
1294
the revision id from each parsed line will be looked up in the
1295
revision_ids filter.
1296
:return: a dictionary mapping altered file-ids to an iterable of
1297
revision_ids. Each altered file-ids has the exact revision_ids that
1298
altered it listed explicitly.
1301
setdefault = result.setdefault
1303
self._find_text_key_references_from_xml_inventory_lines(
1304
line_iterator).iterkeys():
1305
# once data is all ensured-consistent; then this is
1306
# if revision_id == version_id
1307
if key[-1:] in revision_ids:
1308
setdefault(key[0], set()).add(key[-1])
1311
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1312
"""Find the file ids and versions affected by revisions.
1314
:param revisions: an iterable containing revision ids.
1315
:param _inv_weave: The inventory weave from this repository or None.
1316
If None, the inventory weave will be opened automatically.
1317
:return: a dictionary mapping altered file-ids to an iterable of
1318
revision_ids. Each altered file-ids has the exact revision_ids that
1319
altered it listed explicitly.
1321
selected_keys = set((revid,) for revid in revision_ids)
1322
w = _inv_weave or self.inventories
1323
pb = ui.ui_factory.nested_progress_bar()
1325
return self._find_file_ids_from_xml_inventory_lines(
1326
w.iter_lines_added_or_present_in_keys(
1327
selected_keys, pb=pb),
1332
def iter_files_bytes(self, desired_files):
1333
"""Iterate through file versions.
1335
Files will not necessarily be returned in the order they occur in
1336
desired_files. No specific order is guaranteed.
1338
Yields pairs of identifier, bytes_iterator. identifier is an opaque
1339
value supplied by the caller as part of desired_files. It should
1340
uniquely identify the file version in the caller's context. (Examples:
1341
an index number or a TreeTransform trans_id.)
1343
bytes_iterator is an iterable of bytestrings for the file. The
1344
kind of iterable and length of the bytestrings are unspecified, but for
1345
this implementation, it is a list of bytes produced by
1346
VersionedFile.get_record_stream().
1348
:param desired_files: a list of (file_id, revision_id, identifier)
1351
transaction = self.get_transaction()
1353
for file_id, revision_id, callable_data in desired_files:
1354
text_keys[(file_id, revision_id)] = callable_data
1355
for record in self.texts.get_record_stream(text_keys, 'unordered', True):
1356
if record.storage_kind == 'absent':
1357
raise errors.RevisionNotPresent(record.key, self)
1358
yield text_keys[record.key], record.get_bytes_as('fulltext')
1360
def _generate_text_key_index(self, text_key_references=None,
1362
"""Generate a new text key index for the repository.
1364
This is an expensive function that will take considerable time to run.
1366
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1367
list of parents, also text keys. When a given key has no parents,
1368
the parents list will be [NULL_REVISION].
1370
# All revisions, to find inventory parents.
1371
if ancestors is None:
1372
graph = self.get_graph()
1373
ancestors = graph.get_parent_map(self.all_revision_ids())
1374
if text_key_references is None:
1375
text_key_references = self.find_text_key_references()
1376
pb = ui.ui_factory.nested_progress_bar()
1378
return self._do_generate_text_key_index(ancestors,
1379
text_key_references, pb)
1383
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1384
"""Helper for _generate_text_key_index to avoid deep nesting."""
1385
revision_order = tsort.topo_sort(ancestors)
1386
invalid_keys = set()
1388
for revision_id in revision_order:
1389
revision_keys[revision_id] = set()
1390
text_count = len(text_key_references)
1391
# a cache of the text keys to allow reuse; costs a dict of all the
1392
# keys, but saves a 2-tuple for every child of a given key.
1394
for text_key, valid in text_key_references.iteritems():
1396
invalid_keys.add(text_key)
1398
revision_keys[text_key[1]].add(text_key)
1399
text_key_cache[text_key] = text_key
1400
del text_key_references
1402
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1403
NULL_REVISION = _mod_revision.NULL_REVISION
1404
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
1405
# too small for large or very branchy trees. However, for 55K path
1406
# trees, it would be easy to use too much memory trivially. Ideally we
1407
# could gauge this by looking at available real memory etc, but this is
1408
# always a tricky proposition.
1409
inventory_cache = lru_cache.LRUCache(10)
1410
batch_size = 10 # should be ~150MB on a 55K path tree
1411
batch_count = len(revision_order) / batch_size + 1
1413
pb.update("Calculating text parents.", processed_texts, text_count)
1414
for offset in xrange(batch_count):
1415
to_query = revision_order[offset * batch_size:(offset + 1) *
1419
for rev_tree in self.revision_trees(to_query):
1420
revision_id = rev_tree.get_revision_id()
1421
parent_ids = ancestors[revision_id]
1422
for text_key in revision_keys[revision_id]:
1423
pb.update("Calculating text parents.", processed_texts)
1424
processed_texts += 1
1425
candidate_parents = []
1426
for parent_id in parent_ids:
1427
parent_text_key = (text_key[0], parent_id)
1429
check_parent = parent_text_key not in \
1430
revision_keys[parent_id]
1432
# the parent parent_id is a ghost:
1433
check_parent = False
1434
# truncate the derived graph against this ghost.
1435
parent_text_key = None
1437
# look at the parent commit details inventories to
1438
# determine possible candidates in the per file graph.
1441
inv = inventory_cache[parent_id]
1443
inv = self.revision_tree(parent_id).inventory
1444
inventory_cache[parent_id] = inv
1445
parent_entry = inv._byid.get(text_key[0], None)
1446
if parent_entry is not None:
1448
text_key[0], parent_entry.revision)
1450
parent_text_key = None
1451
if parent_text_key is not None:
1452
candidate_parents.append(
1453
text_key_cache[parent_text_key])
1454
parent_heads = text_graph.heads(candidate_parents)
1455
new_parents = list(parent_heads)
1456
new_parents.sort(key=lambda x:candidate_parents.index(x))
1457
if new_parents == []:
1458
new_parents = [NULL_REVISION]
1459
text_index[text_key] = new_parents
1461
for text_key in invalid_keys:
1462
text_index[text_key] = [NULL_REVISION]
1465
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
1466
"""Get an iterable listing the keys of all the data introduced by a set
1469
The keys will be ordered so that the corresponding items can be safely
1470
fetched and inserted in that order.
1472
:returns: An iterable producing tuples of (knit-kind, file-id,
1473
versions). knit-kind is one of 'file', 'inventory', 'signatures',
1474
'revisions'. file-id is None unless knit-kind is 'file'.
1476
# XXX: it's a bit weird to control the inventory weave caching in this
1477
# generator. Ideally the caching would be done in fetch.py I think. Or
1478
# maybe this generator should explicitly have the contract that it
1479
# should not be iterated until the previously yielded item has been
1481
inv_w = self.inventories
1483
# file ids that changed
1484
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
1486
num_file_ids = len(file_ids)
1487
for file_id, altered_versions in file_ids.iteritems():
1488
if _files_pb is not None:
1489
_files_pb.update("fetch texts", count, num_file_ids)
1491
yield ("file", file_id, altered_versions)
1492
# We're done with the files_pb. Note that it finished by the caller,
1493
# just as it was created by the caller.
1497
yield ("inventory", None, revision_ids)
1500
revisions_with_signatures = set()
1501
for rev_id in revision_ids:
1503
self.get_signature_text(rev_id)
1504
except errors.NoSuchRevision:
1508
revisions_with_signatures.add(rev_id)
1509
yield ("signatures", None, revisions_with_signatures)
1512
yield ("revisions", None, revision_ids)
1515
def get_inventory(self, revision_id):
1516
"""Get Inventory object by revision id."""
1517
return self.iter_inventories([revision_id]).next()
1519
def iter_inventories(self, revision_ids):
1520
"""Get many inventories by revision_ids.
1522
This will buffer some or all of the texts used in constructing the
1523
inventories in memory, but will only parse a single inventory at a
1526
:return: An iterator of inventories.
1528
if ((None in revision_ids)
1529
or (_mod_revision.NULL_REVISION in revision_ids)):
1530
raise ValueError('cannot get null revision inventory')
1531
return self._iter_inventories(revision_ids)
1533
def _iter_inventories(self, revision_ids):
1534
"""single-document based inventory iteration."""
1535
for text, revision_id in self._iter_inventory_xmls(revision_ids):
1536
yield self.deserialise_inventory(revision_id, text)
1538
def _iter_inventory_xmls(self, revision_ids):
1539
keys = [(revision_id,) for revision_id in revision_ids]
1540
stream = self.inventories.get_record_stream(keys, 'unordered', True)
1542
for record in stream:
1543
if record.storage_kind != 'absent':
1544
texts[record.key] = record.get_bytes_as('fulltext')
1546
raise errors.NoSuchRevision(self, record.key)
1548
yield texts[key], key[-1]
1550
def deserialise_inventory(self, revision_id, xml):
1551
"""Transform the xml into an inventory object.
1553
:param revision_id: The expected revision id of the inventory.
1554
:param xml: A serialised inventory.
1556
result = self._serializer.read_inventory_from_string(xml, revision_id)
1557
if result.revision_id != revision_id:
1558
raise AssertionError('revision id mismatch %s != %s' % (
1559
result.revision_id, revision_id))
1562
def serialise_inventory(self, inv):
1563
return self._serializer.write_inventory_to_string(inv)
1565
def _serialise_inventory_to_lines(self, inv):
1566
return self._serializer.write_inventory_to_lines(inv)
1568
def get_serializer_format(self):
1569
return self._serializer.format_num
1572
def get_inventory_xml(self, revision_id):
1573
"""Get inventory XML as a file object."""
1574
texts = self._iter_inventory_xmls([revision_id])
1576
text, revision_id = texts.next()
1577
except StopIteration:
1578
raise errors.HistoryMissing(self, 'inventory', revision_id)
1582
def get_inventory_sha1(self, revision_id):
1583
"""Return the sha1 hash of the inventory entry
1585
return self.get_revision(revision_id).inventory_sha1
1587
def iter_reverse_revision_history(self, revision_id):
1588
"""Iterate backwards through revision ids in the lefthand history
1590
:param revision_id: The revision id to start with. All its lefthand
1591
ancestors will be traversed.
1593
graph = self.get_graph()
1594
next_id = revision_id
1596
if next_id in (None, _mod_revision.NULL_REVISION):
1599
# Note: The following line may raise KeyError in the event of
1600
# truncated history. We decided not to have a try:except:raise
1601
# RevisionNotPresent here until we see a use for it, because of the
1602
# cost in an inner loop that is by its very nature O(history).
1603
# Robert Collins 20080326
1604
parents = graph.get_parent_map([next_id])[next_id]
1605
if len(parents) == 0:
1608
next_id = parents[0]
1611
def get_revision_inventory(self, revision_id):
1612
"""Return inventory of a past revision."""
1613
# TODO: Unify this with get_inventory()
1614
# bzr 0.0.6 and later imposes the constraint that the inventory_id
1615
# must be the same as its revision, so this is trivial.
1616
if revision_id is None:
1617
# This does not make sense: if there is no revision,
1618
# then it is the current tree inventory surely ?!
1619
# and thus get_root_id() is something that looks at the last
1620
# commit on the branch, and the get_root_id is an inventory check.
1621
raise NotImplementedError
1622
# return Inventory(self.get_root_id())
1624
return self.get_inventory(revision_id)
1627
def is_shared(self):
1628
"""Return True if this repository is flagged as a shared repository."""
1629
raise NotImplementedError(self.is_shared)
1632
def reconcile(self, other=None, thorough=False):
1633
"""Reconcile this repository."""
1634
from bzrlib.reconcile import RepoReconciler
1635
reconciler = RepoReconciler(self, thorough=thorough)
1636
reconciler.reconcile()
1639
def _refresh_data(self):
1640
"""Helper called from lock_* to ensure coherency with disk.
1642
The default implementation does nothing; it is however possible
1643
for repositories to maintain loaded indices across multiple locks
1644
by checking inside their implementation of this method to see
1645
whether their indices are still valid. This depends of course on
1646
the disk format being validatable in this manner.
1650
def revision_tree(self, revision_id):
1651
"""Return Tree for a revision on this branch.
1653
`revision_id` may be None for the empty tree revision.
1655
# TODO: refactor this to use an existing revision object
1656
# so we don't need to read it in twice.
1657
if revision_id is None or revision_id == _mod_revision.NULL_REVISION:
1658
return RevisionTree(self, Inventory(root_id=None),
1659
_mod_revision.NULL_REVISION)
1661
inv = self.get_revision_inventory(revision_id)
1662
return RevisionTree(self, inv, revision_id)
1664
def revision_trees(self, revision_ids):
1665
"""Return Tree for a revision on this branch.
1667
`revision_id` may not be None or 'null:'"""
1668
inventories = self.iter_inventories(revision_ids)
1669
for inv in inventories:
1670
yield RevisionTree(self, inv, inv.revision_id)
1673
def get_ancestry(self, revision_id, topo_sorted=True):
1674
"""Return a list of revision-ids integrated by a revision.
1676
The first element of the list is always None, indicating the origin
1677
revision. This might change when we have history horizons, or
1678
perhaps we should have a new API.
1680
This is topologically sorted.
1682
if _mod_revision.is_null(revision_id):
1684
if not self.has_revision(revision_id):
1685
raise errors.NoSuchRevision(self, revision_id)
1686
graph = self.get_graph()
1688
search = graph._make_breadth_first_searcher([revision_id])
1691
found, ghosts = search.next_with_ghosts()
1692
except StopIteration:
1695
if _mod_revision.NULL_REVISION in keys:
1696
keys.remove(_mod_revision.NULL_REVISION)
1698
parent_map = graph.get_parent_map(keys)
1699
keys = tsort.topo_sort(parent_map)
1700
return [None] + list(keys)
1703
"""Compress the data within the repository.
1705
This operation only makes sense for some repository types. For other
1706
types it should be a no-op that just returns.
1708
This stub method does not require a lock, but subclasses should use
1709
@needs_write_lock as this is a long running call its reasonable to
1710
implicitly lock for the user.
1714
@deprecated_method(one_six)
1715
def print_file(self, file, revision_id):
1716
"""Print `file` to stdout.
1718
FIXME RBC 20060125 as John Meinel points out this is a bad api
1719
- it writes to stdout, it assumes that that is valid etc. Fix
1720
by creating a new more flexible convenience function.
1722
tree = self.revision_tree(revision_id)
1723
# use inventory as it was in that revision
1724
file_id = tree.inventory.path2id(file)
1726
# TODO: jam 20060427 Write a test for this code path
1727
# it had a bug in it, and was raising the wrong
1729
raise errors.BzrError("%r is not present in revision %s" % (file, revision_id))
1730
tree.print_file(file_id)
1732
def get_transaction(self):
1733
return self.control_files.get_transaction()
1735
@deprecated_method(one_one)
1736
def get_parents(self, revision_ids):
1737
"""See StackedParentsProvider.get_parents"""
1738
parent_map = self.get_parent_map(revision_ids)
1739
return [parent_map.get(r, None) for r in revision_ids]
1741
def get_parent_map(self, revision_ids):
1742
"""See graph._StackedParentsProvider.get_parent_map"""
1743
# revisions index works in keys; this just works in revisions
1744
# therefore wrap and unwrap
1747
for revision_id in revision_ids:
1748
if revision_id == _mod_revision.NULL_REVISION:
1749
result[revision_id] = ()
1750
elif revision_id is None:
1751
raise ValueError('get_parent_map(None) is not valid')
1753
query_keys.append((revision_id ,))
1754
for ((revision_id,), parent_keys) in \
1755
self.revisions.get_parent_map(query_keys).iteritems():
1757
result[revision_id] = tuple(parent_revid
1758
for (parent_revid,) in parent_keys)
1760
result[revision_id] = (_mod_revision.NULL_REVISION,)
1763
def _make_parents_provider(self):
1766
def get_graph(self, other_repository=None):
1767
"""Return the graph walker for this repository format"""
1768
parents_provider = self._make_parents_provider()
1769
if (other_repository is not None and
1770
not self.has_same_location(other_repository)):
1771
parents_provider = graph._StackedParentsProvider(
1772
[parents_provider, other_repository._make_parents_provider()])
1773
return graph.Graph(parents_provider)
1775
def _get_versioned_file_checker(self):
1776
"""Return an object suitable for checking versioned files."""
1777
return _VersionedFileChecker(self)
1779
def revision_ids_to_search_result(self, result_set):
1780
"""Convert a set of revision ids to a graph SearchResult."""
1781
result_parents = set()
1782
for parents in self.get_graph().get_parent_map(
1783
result_set).itervalues():
1784
result_parents.update(parents)
1785
included_keys = result_set.intersection(result_parents)
1786
start_keys = result_set.difference(included_keys)
1787
exclude_keys = result_parents.difference(result_set)
1788
result = graph.SearchResult(start_keys, exclude_keys,
1789
len(result_set), result_set)
1793
def set_make_working_trees(self, new_value):
1794
"""Set the policy flag for making working trees when creating branches.
1796
This only applies to branches that use this repository.
1798
The default is 'True'.
1799
:param new_value: True to restore the default, False to disable making
1802
raise NotImplementedError(self.set_make_working_trees)
1804
def make_working_trees(self):
1805
"""Returns the policy for making working trees on new branches."""
1806
raise NotImplementedError(self.make_working_trees)
1809
def sign_revision(self, revision_id, gpg_strategy):
1810
plaintext = Testament.from_revision(self, revision_id).as_short_text()
1811
self.store_revision_signature(gpg_strategy, plaintext, revision_id)
1814
def has_signature_for_revision_id(self, revision_id):
1815
"""Query for a revision signature for revision_id in the repository."""
1816
if not self.has_revision(revision_id):
1817
raise errors.NoSuchRevision(self, revision_id)
1818
sig_present = (1 == len(
1819
self.signatures.get_parent_map([(revision_id,)])))
1823
def get_signature_text(self, revision_id):
1824
"""Return the text for a signature."""
1825
stream = self.signatures.get_record_stream([(revision_id,)],
1827
record = stream.next()
1828
if record.storage_kind == 'absent':
1829
raise errors.NoSuchRevision(self, revision_id)
1830
return record.get_bytes_as('fulltext')
1833
def check(self, revision_ids=None):
1834
"""Check consistency of all history of given revision_ids.
1836
Different repository implementations should override _check().
1838
:param revision_ids: A non-empty list of revision_ids whose ancestry
1839
will be checked. Typically the last revision_id of a branch.
1841
return self._check(revision_ids)
1843
def _check(self, revision_ids):
1844
result = check.Check(self)
1848
def _warn_if_deprecated(self):
1849
global _deprecation_warning_done
1850
if _deprecation_warning_done:
1852
_deprecation_warning_done = True
1853
warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
1854
% (self._format, self.bzrdir.transport.base))
1856
def supports_rich_root(self):
1857
return self._format.rich_root_data
1859
def _check_ascii_revisionid(self, revision_id, method):
1860
"""Private helper for ascii-only repositories."""
1861
# weave repositories refuse to store revisionids that are non-ascii.
1862
if revision_id is not None:
1863
# weaves require ascii revision ids.
1864
if isinstance(revision_id, unicode):
1866
revision_id.encode('ascii')
1867
except UnicodeEncodeError:
1868
raise errors.NonAsciiRevisionId(method, self)
1871
revision_id.decode('ascii')
1872
except UnicodeDecodeError:
1873
raise errors.NonAsciiRevisionId(method, self)
1875
def revision_graph_can_have_wrong_parents(self):
1876
"""Is it possible for this repository to have a revision graph with
1879
If True, then this repository must also implement
1880
_find_inconsistent_revision_parents so that check and reconcile can
1881
check for inconsistencies before proceeding with other checks that may
1882
depend on the revision index being consistent.
1884
raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
1887
# remove these delegates a while after bzr 0.15
1888
def __make_delegated(name, from_module):
1889
def _deprecated_repository_forwarder():
1890
symbol_versioning.warn('%s moved to %s in bzr 0.15'
1891
% (name, from_module),
1894
m = __import__(from_module, globals(), locals(), [name])
1896
return getattr(m, name)
1897
except AttributeError:
1898
raise AttributeError('module %s has no name %s'
1900
globals()[name] = _deprecated_repository_forwarder
1903
'AllInOneRepository',
1904
'WeaveMetaDirRepository',
1905
'PreSplitOutRepositoryFormat',
1906
'RepositoryFormat4',
1907
'RepositoryFormat5',
1908
'RepositoryFormat6',
1909
'RepositoryFormat7',
1911
__make_delegated(_name, 'bzrlib.repofmt.weaverepo')
1915
'RepositoryFormatKnit',
1916
'RepositoryFormatKnit1',
1918
__make_delegated(_name, 'bzrlib.repofmt.knitrepo')
1921
def install_revision(repository, rev, revision_tree):
1922
"""Install all revision data into a repository."""
1923
install_revisions(repository, [(rev, revision_tree, None)])
1926
def install_revisions(repository, iterable, num_revisions=None, pb=None):
1927
"""Install all revision data into a repository.
1929
Accepts an iterable of revision, tree, signature tuples. The signature
1932
repository.start_write_group()
1934
for n, (revision, revision_tree, signature) in enumerate(iterable):
1935
_install_revision(repository, revision, revision_tree, signature)
1937
pb.update('Transferring revisions', n + 1, num_revisions)
1939
repository.abort_write_group()
1942
repository.commit_write_group()
1945
def _install_revision(repository, rev, revision_tree, signature):
1946
"""Install all revision data into a repository."""
1947
present_parents = []
1949
for p_id in rev.parent_ids:
1950
if repository.has_revision(p_id):
1951
present_parents.append(p_id)
1952
parent_trees[p_id] = repository.revision_tree(p_id)
1954
parent_trees[p_id] = repository.revision_tree(None)
1956
inv = revision_tree.inventory
1957
entries = inv.iter_entries()
1958
# backwards compatibility hack: skip the root id.
1959
if not repository.supports_rich_root():
1960
path, root = entries.next()
1961
if root.revision != rev.revision_id:
1962
raise errors.IncompatibleRevision(repr(repository))
1964
for path, ie in entries:
1965
text_keys[(ie.file_id, ie.revision)] = ie
1966
text_parent_map = repository.texts.get_parent_map(text_keys)
1967
missing_texts = set(text_keys) - set(text_parent_map)
1968
# Add the texts that are not already present
1969
for text_key in missing_texts:
1970
ie = text_keys[text_key]
1972
# FIXME: TODO: The following loop overlaps/duplicates that done by
1973
# commit to determine parents. There is a latent/real bug here where
1974
# the parents inserted are not those commit would do - in particular
1975
# they are not filtered by heads(). RBC, AB
1976
for revision, tree in parent_trees.iteritems():
1977
if ie.file_id not in tree:
1979
parent_id = tree.inventory[ie.file_id].revision
1980
if parent_id in text_parents:
1982
text_parents.append((ie.file_id, parent_id))
1983
lines = revision_tree.get_file(ie.file_id).readlines()
1984
repository.texts.add_lines(text_key, text_parents, lines)
1986
# install the inventory
1987
repository.add_inventory(rev.revision_id, inv, present_parents)
1988
except errors.RevisionAlreadyPresent:
1990
if signature is not None:
1991
repository.add_signature_text(rev.revision_id, signature)
1992
repository.add_revision(rev.revision_id, rev, inv)
1995
class MetaDirRepository(Repository):
1996
"""Repositories in the new meta-dir layout.
1998
:ivar _transport: Transport for access to repository control files,
1999
typically pointing to .bzr/repository.
2002
def __init__(self, _format, a_bzrdir, control_files):
2003
super(MetaDirRepository, self).__init__(_format, a_bzrdir, control_files)
2004
self._transport = control_files._transport
2007
def is_shared(self):
2008
"""Return True if this repository is flagged as a shared repository."""
2009
return self._transport.has('shared-storage')
2012
def set_make_working_trees(self, new_value):
2013
"""Set the policy flag for making working trees when creating branches.
2015
This only applies to branches that use this repository.
2017
The default is 'True'.
2018
:param new_value: True to restore the default, False to disable making
2023
self._transport.delete('no-working-trees')
2024
except errors.NoSuchFile:
2027
self._transport.put_bytes('no-working-trees', '',
2028
mode=self.bzrdir._get_file_mode())
2030
def make_working_trees(self):
2031
"""Returns the policy for making working trees on new branches."""
2032
return not self._transport.has('no-working-trees')
2035
class MetaDirVersionedFileRepository(MetaDirRepository):
2036
"""Repositories in a meta-dir, that work via versioned file objects."""
2038
def __init__(self, _format, a_bzrdir, control_files):
2039
super(MetaDirVersionedFileRepository, self).__init__(_format, a_bzrdir,
2043
class RepositoryFormatRegistry(registry.Registry):
2044
"""Registry of RepositoryFormats."""
2046
def get(self, format_string):
2047
r = registry.Registry.get(self, format_string)
2053
format_registry = RepositoryFormatRegistry()
2054
"""Registry of formats, indexed by their identifying format string.
2056
This can contain either format instances themselves, or classes/factories that
2057
can be called to obtain one.
2061
#####################################################################
2062
# Repository Formats
2064
class RepositoryFormat(object):
2065
"""A repository format.
2067
Formats provide three things:
2068
* An initialization routine to construct repository data on disk.
2069
* a format string which is used when the BzrDir supports versioned
2071
* an open routine which returns a Repository instance.
2073
There is one and only one Format subclass for each on-disk format. But
2074
there can be one Repository subclass that is used for several different
2075
formats. The _format attribute on a Repository instance can be used to
2076
determine the disk format.
2078
Formats are placed in an dict by their format string for reference
2079
during opening. These should be subclasses of RepositoryFormat
2082
Once a format is deprecated, just deprecate the initialize and open
2083
methods on the format class. Do not deprecate the object, as the
2084
object will be created every system load.
2086
Common instance attributes:
2087
_matchingbzrdir - the bzrdir format that the repository format was
2088
originally written to work with. This can be used if manually
2089
constructing a bzrdir and repository, or more commonly for test suite
2093
# Set to True or False in derived classes. True indicates that the format
2094
# supports ghosts gracefully.
2095
supports_ghosts = None
2096
# Can this repository be given external locations to lookup additional
2097
# data. Set to True or False in derived classes.
2098
supports_external_lookups = None
2101
return "<%s>" % self.__class__.__name__
2103
def __eq__(self, other):
2104
# format objects are generally stateless
2105
return isinstance(other, self.__class__)
2107
def __ne__(self, other):
2108
return not self == other
2111
def find_format(klass, a_bzrdir):
2112
"""Return the format for the repository object in a_bzrdir.
2114
This is used by bzr native formats that have a "format" file in
2115
the repository. Other methods may be used by different types of
2119
transport = a_bzrdir.get_repository_transport(None)
2120
format_string = transport.get("format").read()
2121
return format_registry.get(format_string)
2122
except errors.NoSuchFile:
2123
raise errors.NoRepositoryPresent(a_bzrdir)
2125
raise errors.UnknownFormatError(format=format_string,
2129
def register_format(klass, format):
2130
format_registry.register(format.get_format_string(), format)
2133
def unregister_format(klass, format):
2134
format_registry.remove(format.get_format_string())
2137
def get_default_format(klass):
2138
"""Return the current default format."""
2139
from bzrlib import bzrdir
2140
return bzrdir.format_registry.make_bzrdir('default').repository_format
2142
def get_format_string(self):
2143
"""Return the ASCII format string that identifies this format.
2145
Note that in pre format ?? repositories the format string is
2146
not permitted nor written to disk.
2148
raise NotImplementedError(self.get_format_string)
2150
def get_format_description(self):
2151
"""Return the short description for this format."""
2152
raise NotImplementedError(self.get_format_description)
2154
# TODO: this shouldn't be in the base class, it's specific to things that
2155
# use weaves or knits -- mbp 20070207
2156
def _get_versioned_file_store(self,
2161
versionedfile_class=None,
2162
versionedfile_kwargs={},
2164
if versionedfile_class is None:
2165
versionedfile_class = self._versionedfile_class
2166
weave_transport = control_files._transport.clone(name)
2167
dir_mode = control_files._dir_mode
2168
file_mode = control_files._file_mode
2169
return VersionedFileStore(weave_transport, prefixed=prefixed,
2171
file_mode=file_mode,
2172
versionedfile_class=versionedfile_class,
2173
versionedfile_kwargs=versionedfile_kwargs,
2176
def initialize(self, a_bzrdir, shared=False):
2177
"""Initialize a repository of this format in a_bzrdir.
2179
:param a_bzrdir: The bzrdir to put the new repository in it.
2180
:param shared: The repository should be initialized as a sharable one.
2181
:returns: The new repository object.
2183
This may raise UninitializableFormat if shared repository are not
2184
compatible the a_bzrdir.
2186
raise NotImplementedError(self.initialize)
2188
def is_supported(self):
2189
"""Is this format supported?
2191
Supported formats must be initializable and openable.
2192
Unsupported formats may not support initialization or committing or
2193
some other features depending on the reason for not being supported.
2197
def check_conversion_target(self, target_format):
2198
raise NotImplementedError(self.check_conversion_target)
2200
def open(self, a_bzrdir, _found=False):
2201
"""Return an instance of this format for the bzrdir a_bzrdir.
2203
_found is a private parameter, do not use it.
2205
raise NotImplementedError(self.open)
2208
class MetaDirRepositoryFormat(RepositoryFormat):
2209
"""Common base class for the new repositories using the metadir layout."""
2211
rich_root_data = False
2212
supports_tree_reference = False
2213
supports_external_lookups = False
2214
_matchingbzrdir = bzrdir.BzrDirMetaFormat1()
2217
super(MetaDirRepositoryFormat, self).__init__()
2219
def _create_control_files(self, a_bzrdir):
2220
"""Create the required files and the initial control_files object."""
2221
# FIXME: RBC 20060125 don't peek under the covers
2222
# NB: no need to escape relative paths that are url safe.
2223
repository_transport = a_bzrdir.get_repository_transport(self)
2224
control_files = lockable_files.LockableFiles(repository_transport,
2225
'lock', lockdir.LockDir)
2226
control_files.create_lock()
2227
return control_files
2229
def _upload_blank_content(self, a_bzrdir, dirs, files, utf8_files, shared):
2230
"""Upload the initial blank content."""
2231
control_files = self._create_control_files(a_bzrdir)
2232
control_files.lock_write()
2233
transport = control_files._transport
2235
utf8_files += [('shared-storage', '')]
2237
transport.mkdir_multi(dirs, mode=a_bzrdir._get_dir_mode())
2238
for (filename, content_stream) in files:
2239
transport.put_file(filename, content_stream,
2240
mode=a_bzrdir._get_file_mode())
2241
for (filename, content_bytes) in utf8_files:
2242
transport.put_bytes_non_atomic(filename, content_bytes,
2243
mode=a_bzrdir._get_file_mode())
2245
control_files.unlock()
2248
# formats which have no format string are not discoverable
2249
# and not independently creatable, so are not registered. They're
2250
# all in bzrlib.repofmt.weaverepo now. When an instance of one of these is
2251
# needed, it's constructed directly by the BzrDir. Non-native formats where
2252
# the repository is not separately opened are similar.
2254
format_registry.register_lazy(
2255
'Bazaar-NG Repository format 7',
2256
'bzrlib.repofmt.weaverepo',
2260
format_registry.register_lazy(
2261
'Bazaar-NG Knit Repository Format 1',
2262
'bzrlib.repofmt.knitrepo',
2263
'RepositoryFormatKnit1',
2266
format_registry.register_lazy(
2267
'Bazaar Knit Repository Format 3 (bzr 0.15)\n',
2268
'bzrlib.repofmt.knitrepo',
2269
'RepositoryFormatKnit3',
2272
format_registry.register_lazy(
2273
'Bazaar Knit Repository Format 4 (bzr 1.0)\n',
2274
'bzrlib.repofmt.knitrepo',
2275
'RepositoryFormatKnit4',
2278
# Pack-based formats. There is one format for pre-subtrees, and one for
2279
# post-subtrees to allow ease of testing.
2280
# NOTE: These are experimental in 0.92. Stable in 1.0 and above
2281
format_registry.register_lazy(
2282
'Bazaar pack repository format 1 (needs bzr 0.92)\n',
2283
'bzrlib.repofmt.pack_repo',
2284
'RepositoryFormatKnitPack1',
2286
format_registry.register_lazy(
2287
'Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n',
2288
'bzrlib.repofmt.pack_repo',
2289
'RepositoryFormatKnitPack3',
2291
format_registry.register_lazy(
2292
'Bazaar pack repository format 1 with rich root (needs bzr 1.0)\n',
2293
'bzrlib.repofmt.pack_repo',
2294
'RepositoryFormatKnitPack4',
2296
# Development formats.
2298
# development 0 - stub to introduce development versioning scheme.
2299
format_registry.register_lazy(
2300
"Bazaar development format 0 (needs bzr.dev from before 1.3)\n",
2301
'bzrlib.repofmt.pack_repo',
2302
'RepositoryFormatPackDevelopment0',
2304
format_registry.register_lazy(
2305
("Bazaar development format 0 with subtree support "
2306
"(needs bzr.dev from before 1.3)\n"),
2307
'bzrlib.repofmt.pack_repo',
2308
'RepositoryFormatPackDevelopment0Subtree',
2310
format_registry.register_lazy(
2311
"Bazaar development format 1 (needs bzr.dev from before 1.6)\n",
2312
'bzrlib.repofmt.pack_repo',
2313
'RepositoryFormatPackDevelopment1',
2315
format_registry.register_lazy(
2316
("Bazaar development format 1 with subtree support "
2317
"(needs bzr.dev from before 1.6)\n"),
2318
'bzrlib.repofmt.pack_repo',
2319
'RepositoryFormatPackDevelopment1Subtree',
2321
# 1.3->1.4 go below here
2324
class InterRepository(InterObject):
2325
"""This class represents operations taking place between two repositories.
2327
Its instances have methods like copy_content and fetch, and contain
2328
references to the source and target repositories these operations can be
2331
Often we will provide convenience methods on 'repository' which carry out
2332
operations with another repository - they will always forward to
2333
InterRepository.get(other).method_name(parameters).
2337
"""The available optimised InterRepository types."""
2339
def copy_content(self, revision_id=None):
2340
raise NotImplementedError(self.copy_content)
2342
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2343
"""Fetch the content required to construct revision_id.
2345
The content is copied from self.source to self.target.
2347
:param revision_id: if None all content is copied, if NULL_REVISION no
2349
:param pb: optional progress bar to use for progress reports. If not
2350
provided a default one will be created.
2352
Returns the copied revision count and the failed revisions in a tuple:
2355
raise NotImplementedError(self.fetch)
2357
def _walk_to_common_revisions(self, revision_ids):
2358
"""Walk out from revision_ids in source to revisions target has.
2360
:param revision_ids: The start point for the search.
2361
:return: A set of revision ids.
2363
target_graph = self.target.get_graph()
2364
revision_ids = frozenset(revision_ids)
2365
if set(target_graph.get_parent_map(revision_ids)) == revision_ids:
2366
return graph.SearchResult(revision_ids, set(), 0, set())
2367
missing_revs = set()
2368
source_graph = self.source.get_graph()
2369
# ensure we don't pay silly lookup costs.
2370
searcher = source_graph._make_breadth_first_searcher(revision_ids)
2371
null_set = frozenset([_mod_revision.NULL_REVISION])
2374
next_revs, ghosts = searcher.next_with_ghosts()
2375
except StopIteration:
2377
if revision_ids.intersection(ghosts):
2378
absent_ids = set(revision_ids.intersection(ghosts))
2379
# If all absent_ids are present in target, no error is needed.
2380
absent_ids.difference_update(
2381
set(target_graph.get_parent_map(absent_ids)))
2383
raise errors.NoSuchRevision(self.source, absent_ids.pop())
2384
# we don't care about other ghosts as we can't fetch them and
2385
# haven't been asked to.
2386
next_revs = set(next_revs)
2387
# we always have NULL_REVISION present.
2388
have_revs = set(target_graph.get_parent_map(next_revs)).union(null_set)
2389
missing_revs.update(next_revs - have_revs)
2390
searcher.stop_searching_any(have_revs)
2391
return searcher.get_result()
2393
@deprecated_method(one_two)
2395
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2396
"""Return the revision ids that source has that target does not.
2398
These are returned in topological order.
2400
:param revision_id: only return revision ids included by this
2402
:param find_ghosts: If True find missing revisions in deep history
2403
rather than just finding the surface difference.
2405
return list(self.search_missing_revision_ids(
2406
revision_id, find_ghosts).get_keys())
2409
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2410
"""Return the revision ids that source has that target does not.
2412
:param revision_id: only return revision ids included by this
2414
:param find_ghosts: If True find missing revisions in deep history
2415
rather than just finding the surface difference.
2416
:return: A bzrlib.graph.SearchResult.
2418
# stop searching at found target revisions.
2419
if not find_ghosts and revision_id is not None:
2420
return self._walk_to_common_revisions([revision_id])
2421
# generic, possibly worst case, slow code path.
2422
target_ids = set(self.target.all_revision_ids())
2423
if revision_id is not None:
2424
source_ids = self.source.get_ancestry(revision_id)
2425
if source_ids[0] is not None:
2426
raise AssertionError()
2429
source_ids = self.source.all_revision_ids()
2430
result_set = set(source_ids).difference(target_ids)
2431
return self.source.revision_ids_to_search_result(result_set)
2434
def _same_model(source, target):
2435
"""True if source and target have the same data representation."""
2436
if source.supports_rich_root() != target.supports_rich_root():
2438
if source._serializer != target._serializer:
2443
class InterSameDataRepository(InterRepository):
2444
"""Code for converting between repositories that represent the same data.
2446
Data format and model must match for this to work.
2450
def _get_repo_format_to_test(self):
2451
"""Repository format for testing with.
2453
InterSameData can pull from subtree to subtree and from non-subtree to
2454
non-subtree, so we test this with the richest repository format.
2456
from bzrlib.repofmt import knitrepo
2457
return knitrepo.RepositoryFormatKnit3()
2460
def is_compatible(source, target):
2461
return InterRepository._same_model(source, target)
2464
def copy_content(self, revision_id=None):
2465
"""Make a complete copy of the content in self into destination.
2467
This copies both the repository's revision data, and configuration information
2468
such as the make_working_trees setting.
2470
This is a destructive operation! Do not use it on existing
2473
:param revision_id: Only copy the content needed to construct
2474
revision_id and its parents.
2477
self.target.set_make_working_trees(self.source.make_working_trees())
2478
except NotImplementedError:
2480
# but don't bother fetching if we have the needed data now.
2481
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2482
self.target.has_revision(revision_id)):
2484
self.target.fetch(self.source, revision_id=revision_id)
2487
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2488
"""See InterRepository.fetch()."""
2489
from bzrlib.fetch import GenericRepoFetcher
2490
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2491
self.source, self.source._format, self.target,
2492
self.target._format)
2493
f = GenericRepoFetcher(to_repository=self.target,
2494
from_repository=self.source,
2495
last_revision=revision_id,
2496
pb=pb, find_ghosts=find_ghosts)
2497
return f.count_copied, f.failed_revisions
2500
class InterWeaveRepo(InterSameDataRepository):
2501
"""Optimised code paths between Weave based repositories.
2503
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
2504
implemented lazy inter-object optimisation.
2508
def _get_repo_format_to_test(self):
2509
from bzrlib.repofmt import weaverepo
2510
return weaverepo.RepositoryFormat7()
2513
def is_compatible(source, target):
2514
"""Be compatible with known Weave formats.
2516
We don't test for the stores being of specific types because that
2517
could lead to confusing results, and there is no need to be
2520
from bzrlib.repofmt.weaverepo import (
2526
return (isinstance(source._format, (RepositoryFormat5,
2528
RepositoryFormat7)) and
2529
isinstance(target._format, (RepositoryFormat5,
2531
RepositoryFormat7)))
2532
except AttributeError:
2536
def copy_content(self, revision_id=None):
2537
"""See InterRepository.copy_content()."""
2538
# weave specific optimised path:
2540
self.target.set_make_working_trees(self.source.make_working_trees())
2541
except (errors.RepositoryUpgradeRequired, NotImplemented):
2543
# FIXME do not peek!
2544
if self.source._transport.listable():
2545
pb = ui.ui_factory.nested_progress_bar()
2547
self.target.texts.insert_record_stream(
2548
self.source.texts.get_record_stream(
2549
self.source.texts.keys(), 'topological', False))
2550
pb.update('copying inventory', 0, 1)
2551
self.target.inventories.insert_record_stream(
2552
self.source.inventories.get_record_stream(
2553
self.source.inventories.keys(), 'topological', False))
2554
self.target.signatures.insert_record_stream(
2555
self.source.signatures.get_record_stream(
2556
self.source.signatures.keys(),
2558
self.target.revisions.insert_record_stream(
2559
self.source.revisions.get_record_stream(
2560
self.source.revisions.keys(),
2561
'topological', True))
2565
self.target.fetch(self.source, revision_id=revision_id)
2568
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2569
"""See InterRepository.fetch()."""
2570
from bzrlib.fetch import GenericRepoFetcher
2571
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2572
self.source, self.source._format, self.target, self.target._format)
2573
f = GenericRepoFetcher(to_repository=self.target,
2574
from_repository=self.source,
2575
last_revision=revision_id,
2576
pb=pb, find_ghosts=find_ghosts)
2577
return f.count_copied, f.failed_revisions
2580
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2581
"""See InterRepository.missing_revision_ids()."""
2582
# we want all revisions to satisfy revision_id in source.
2583
# but we don't want to stat every file here and there.
2584
# we want then, all revisions other needs to satisfy revision_id
2585
# checked, but not those that we have locally.
2586
# so the first thing is to get a subset of the revisions to
2587
# satisfy revision_id in source, and then eliminate those that
2588
# we do already have.
2589
# this is slow on high latency connection to self, but as as this
2590
# disk format scales terribly for push anyway due to rewriting
2591
# inventory.weave, this is considered acceptable.
2593
if revision_id is not None:
2594
source_ids = self.source.get_ancestry(revision_id)
2595
if source_ids[0] is not None:
2596
raise AssertionError()
2599
source_ids = self.source._all_possible_ids()
2600
source_ids_set = set(source_ids)
2601
# source_ids is the worst possible case we may need to pull.
2602
# now we want to filter source_ids against what we actually
2603
# have in target, but don't try to check for existence where we know
2604
# we do not have a revision as that would be pointless.
2605
target_ids = set(self.target._all_possible_ids())
2606
possibly_present_revisions = target_ids.intersection(source_ids_set)
2607
actually_present_revisions = set(
2608
self.target._eliminate_revisions_not_present(possibly_present_revisions))
2609
required_revisions = source_ids_set.difference(actually_present_revisions)
2610
if revision_id is not None:
2611
# we used get_ancestry to determine source_ids then we are assured all
2612
# revisions referenced are present as they are installed in topological order.
2613
# and the tip revision was validated by get_ancestry.
2614
result_set = required_revisions
2616
# if we just grabbed the possibly available ids, then
2617
# we only have an estimate of whats available and need to validate
2618
# that against the revision records.
2620
self.source._eliminate_revisions_not_present(required_revisions))
2621
return self.source.revision_ids_to_search_result(result_set)
2624
class InterKnitRepo(InterSameDataRepository):
2625
"""Optimised code paths between Knit based repositories."""
2628
def _get_repo_format_to_test(self):
2629
from bzrlib.repofmt import knitrepo
2630
return knitrepo.RepositoryFormatKnit1()
2633
def is_compatible(source, target):
2634
"""Be compatible with known Knit formats.
2636
We don't test for the stores being of specific types because that
2637
could lead to confusing results, and there is no need to be
2640
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
2642
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
2643
isinstance(target._format, RepositoryFormatKnit))
2644
except AttributeError:
2646
return are_knits and InterRepository._same_model(source, target)
2649
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2650
"""See InterRepository.fetch()."""
2651
from bzrlib.fetch import KnitRepoFetcher
2652
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2653
self.source, self.source._format, self.target, self.target._format)
2654
f = KnitRepoFetcher(to_repository=self.target,
2655
from_repository=self.source,
2656
last_revision=revision_id,
2657
pb=pb, find_ghosts=find_ghosts)
2658
return f.count_copied, f.failed_revisions
2661
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2662
"""See InterRepository.missing_revision_ids()."""
2663
if revision_id is not None:
2664
source_ids = self.source.get_ancestry(revision_id)
2665
if source_ids[0] is not None:
2666
raise AssertionError()
2669
source_ids = self.source.all_revision_ids()
2670
source_ids_set = set(source_ids)
2671
# source_ids is the worst possible case we may need to pull.
2672
# now we want to filter source_ids against what we actually
2673
# have in target, but don't try to check for existence where we know
2674
# we do not have a revision as that would be pointless.
2675
target_ids = set(self.target.all_revision_ids())
2676
possibly_present_revisions = target_ids.intersection(source_ids_set)
2677
actually_present_revisions = set(
2678
self.target._eliminate_revisions_not_present(possibly_present_revisions))
2679
required_revisions = source_ids_set.difference(actually_present_revisions)
2680
if revision_id is not None:
2681
# we used get_ancestry to determine source_ids then we are assured all
2682
# revisions referenced are present as they are installed in topological order.
2683
# and the tip revision was validated by get_ancestry.
2684
result_set = required_revisions
2686
# if we just grabbed the possibly available ids, then
2687
# we only have an estimate of whats available and need to validate
2688
# that against the revision records.
2690
self.source._eliminate_revisions_not_present(required_revisions))
2691
return self.source.revision_ids_to_search_result(result_set)
2694
class InterPackRepo(InterSameDataRepository):
2695
"""Optimised code paths between Pack based repositories."""
2698
def _get_repo_format_to_test(self):
2699
from bzrlib.repofmt import pack_repo
2700
return pack_repo.RepositoryFormatKnitPack1()
2703
def is_compatible(source, target):
2704
"""Be compatible with known Pack formats.
2706
We don't test for the stores being of specific types because that
2707
could lead to confusing results, and there is no need to be
2710
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
2712
are_packs = (isinstance(source._format, RepositoryFormatPack) and
2713
isinstance(target._format, RepositoryFormatPack))
2714
except AttributeError:
2716
return are_packs and InterRepository._same_model(source, target)
2719
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2720
"""See InterRepository.fetch()."""
2721
if len(self.source._fallback_repositories) > 0:
2722
from bzrlib.fetch import KnitRepoFetcher
2723
fetcher = KnitRepoFetcher(self.target, self.source, revision_id,
2725
return fetcher.count_copied, fetcher.failed_revisions
2726
from bzrlib.repofmt.pack_repo import Packer
2727
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2728
self.source, self.source._format, self.target, self.target._format)
2729
self.count_copied = 0
2730
if revision_id is None:
2732
# everything to do - use pack logic
2733
# to fetch from all packs to one without
2734
# inventory parsing etc, IFF nothing to be copied is in the target.
2736
source_revision_ids = frozenset(self.source.all_revision_ids())
2737
revision_ids = source_revision_ids - \
2738
frozenset(self.target.get_parent_map(source_revision_ids))
2739
revision_keys = [(revid,) for revid in revision_ids]
2740
index = self.target._pack_collection.revision_index.combined_index
2741
present_revision_ids = set(item[1][0] for item in
2742
index.iter_entries(revision_keys))
2743
revision_ids = set(revision_ids) - present_revision_ids
2744
# implementing the TODO will involve:
2745
# - detecting when all of a pack is selected
2746
# - avoiding as much as possible pre-selection, so the
2747
# more-core routines such as create_pack_from_packs can filter in
2748
# a just-in-time fashion. (though having a HEADS list on a
2749
# repository might make this a lot easier, because we could
2750
# sensibly detect 'new revisions' without doing a full index scan.
2751
elif _mod_revision.is_null(revision_id):
2756
revision_ids = self.search_missing_revision_ids(revision_id,
2757
find_ghosts=find_ghosts).get_keys()
2758
except errors.NoSuchRevision:
2759
raise errors.InstallFailed([revision_id])
2760
if len(revision_ids) == 0:
2762
packs = self.source._pack_collection.all_packs()
2763
pack = Packer(self.target._pack_collection, packs, '.fetch',
2764
revision_ids).pack()
2765
if pack is not None:
2766
self.target._pack_collection._save_pack_names()
2767
# Trigger an autopack. This may duplicate effort as we've just done
2768
# a pack creation, but for now it is simpler to think about as
2769
# 'upload data, then repack if needed'.
2770
self.target._pack_collection.autopack()
2771
return (pack.get_revision_count(), [])
2776
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2777
"""See InterRepository.missing_revision_ids().
2779
:param find_ghosts: Find ghosts throughout the ancestry of
2782
if not find_ghosts and revision_id is not None:
2783
return self._walk_to_common_revisions([revision_id])
2784
elif revision_id is not None:
2785
# Find ghosts: search for revisions pointing from one repository to
2786
# the other, and vice versa, anywhere in the history of revision_id.
2787
graph = self.target.get_graph(other_repository=self.source)
2788
searcher = graph._make_breadth_first_searcher([revision_id])
2792
next_revs, ghosts = searcher.next_with_ghosts()
2793
except StopIteration:
2795
if revision_id in ghosts:
2796
raise errors.NoSuchRevision(self.source, revision_id)
2797
found_ids.update(next_revs)
2798
found_ids.update(ghosts)
2799
found_ids = frozenset(found_ids)
2800
# Double query here: should be able to avoid this by changing the
2801
# graph api further.
2802
result_set = found_ids - frozenset(
2803
self.target.get_parent_map(found_ids))
2805
source_ids = self.source.all_revision_ids()
2806
# source_ids is the worst possible case we may need to pull.
2807
# now we want to filter source_ids against what we actually
2808
# have in target, but don't try to check for existence where we know
2809
# we do not have a revision as that would be pointless.
2810
target_ids = set(self.target.all_revision_ids())
2811
result_set = set(source_ids).difference(target_ids)
2812
return self.source.revision_ids_to_search_result(result_set)
2815
class InterModel1and2(InterRepository):
2818
def _get_repo_format_to_test(self):
2822
def is_compatible(source, target):
2823
if not source.supports_rich_root() and target.supports_rich_root():
2829
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2830
"""See InterRepository.fetch()."""
2831
from bzrlib.fetch import Model1toKnit2Fetcher
2832
f = Model1toKnit2Fetcher(to_repository=self.target,
2833
from_repository=self.source,
2834
last_revision=revision_id,
2835
pb=pb, find_ghosts=find_ghosts)
2836
return f.count_copied, f.failed_revisions
2839
def copy_content(self, revision_id=None):
2840
"""Make a complete copy of the content in self into destination.
2842
This is a destructive operation! Do not use it on existing
2845
:param revision_id: Only copy the content needed to construct
2846
revision_id and its parents.
2849
self.target.set_make_working_trees(self.source.make_working_trees())
2850
except NotImplementedError:
2852
# but don't bother fetching if we have the needed data now.
2853
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2854
self.target.has_revision(revision_id)):
2856
self.target.fetch(self.source, revision_id=revision_id)
2859
class InterKnit1and2(InterKnitRepo):
2862
def _get_repo_format_to_test(self):
2866
def is_compatible(source, target):
2867
"""Be compatible with Knit1 source and Knit3 target"""
2868
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit3
2870
from bzrlib.repofmt.knitrepo import (RepositoryFormatKnit1,
2871
RepositoryFormatKnit3)
2872
from bzrlib.repofmt.pack_repo import (
2873
RepositoryFormatKnitPack1,
2874
RepositoryFormatKnitPack3,
2875
RepositoryFormatPackDevelopment0,
2876
RepositoryFormatPackDevelopment0Subtree,
2879
RepositoryFormatKnit1,
2880
RepositoryFormatKnitPack1,
2881
RepositoryFormatPackDevelopment0,
2884
RepositoryFormatKnit3,
2885
RepositoryFormatKnitPack3,
2886
RepositoryFormatPackDevelopment0Subtree,
2888
return (isinstance(source._format, nosubtrees) and
2889
isinstance(target._format, subtrees))
2890
except AttributeError:
2894
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2895
"""See InterRepository.fetch()."""
2896
from bzrlib.fetch import Knit1to2Fetcher
2897
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2898
self.source, self.source._format, self.target,
2899
self.target._format)
2900
f = Knit1to2Fetcher(to_repository=self.target,
2901
from_repository=self.source,
2902
last_revision=revision_id,
2903
pb=pb, find_ghosts=find_ghosts)
2904
return f.count_copied, f.failed_revisions
2907
class InterDifferingSerializer(InterKnitRepo):
2910
def _get_repo_format_to_test(self):
2914
def is_compatible(source, target):
2915
"""Be compatible with Knit2 source and Knit3 target"""
2916
if source.supports_rich_root() != target.supports_rich_root():
2918
# Ideally, we'd support fetching if the source had no tree references
2919
# even if it supported them...
2920
if (getattr(source, '_format.supports_tree_reference', False) and
2921
not getattr(target, '_format.supports_tree_reference', False)):
2926
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2927
"""See InterRepository.fetch()."""
2928
revision_ids = self.target.search_missing_revision_ids(self.source,
2929
revision_id, find_ghosts=find_ghosts).get_keys()
2930
revision_ids = tsort.topo_sort(
2931
self.source.get_graph().get_parent_map(revision_ids))
2932
def revisions_iterator():
2933
for current_revision_id in revision_ids:
2934
revision = self.source.get_revision(current_revision_id)
2935
tree = self.source.revision_tree(current_revision_id)
2937
signature = self.source.get_signature_text(
2938
current_revision_id)
2939
except errors.NoSuchRevision:
2941
yield revision, tree, signature
2943
my_pb = ui.ui_factory.nested_progress_bar()
2948
install_revisions(self.target, revisions_iterator(),
2949
len(revision_ids), pb)
2951
if my_pb is not None:
2953
return len(revision_ids), 0
2956
class InterOtherToRemote(InterRepository):
2958
def __init__(self, source, target):
2959
InterRepository.__init__(self, source, target)
2960
self._real_inter = None
2963
def is_compatible(source, target):
2964
if isinstance(target, remote.RemoteRepository):
2968
def _ensure_real_inter(self):
2969
if self._real_inter is None:
2970
self.target._ensure_real()
2971
real_target = self.target._real_repository
2972
self._real_inter = InterRepository.get(self.source, real_target)
2974
def copy_content(self, revision_id=None):
2975
self._ensure_real_inter()
2976
self._real_inter.copy_content(revision_id=revision_id)
2978
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2979
self._ensure_real_inter()
2980
return self._real_inter.fetch(revision_id=revision_id, pb=pb,
2981
find_ghosts=find_ghosts)
2984
def _get_repo_format_to_test(self):
2988
class InterRemoteToOther(InterRepository):
2990
def __init__(self, source, target):
2991
InterRepository.__init__(self, source, target)
2992
self._real_inter = None
2995
def is_compatible(source, target):
2996
if not isinstance(source, remote.RemoteRepository):
2998
# Is source's model compatible with target's model?
2999
source._ensure_real()
3000
real_source = source._real_repository
3001
if isinstance(real_source, remote.RemoteRepository):
3002
raise NotImplementedError(
3003
"We don't support remote repos backed by remote repos yet.")
3004
return InterRepository._same_model(real_source, target)
3006
def _ensure_real_inter(self):
3007
if self._real_inter is None:
3008
self.source._ensure_real()
3009
real_source = self.source._real_repository
3010
self._real_inter = InterRepository.get(real_source, self.target)
3012
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3013
self._ensure_real_inter()
3014
return self._real_inter.fetch(revision_id=revision_id, pb=pb,
3015
find_ghosts=find_ghosts)
3017
def copy_content(self, revision_id=None):
3018
self._ensure_real_inter()
3019
self._real_inter.copy_content(revision_id=revision_id)
3022
def _get_repo_format_to_test(self):
3027
InterRepository.register_optimiser(InterDifferingSerializer)
3028
InterRepository.register_optimiser(InterSameDataRepository)
3029
InterRepository.register_optimiser(InterWeaveRepo)
3030
InterRepository.register_optimiser(InterKnitRepo)
3031
InterRepository.register_optimiser(InterModel1and2)
3032
InterRepository.register_optimiser(InterKnit1and2)
3033
InterRepository.register_optimiser(InterPackRepo)
3034
InterRepository.register_optimiser(InterOtherToRemote)
3035
InterRepository.register_optimiser(InterRemoteToOther)
3038
class CopyConverter(object):
3039
"""A repository conversion tool which just performs a copy of the content.
3041
This is slow but quite reliable.
3044
def __init__(self, target_format):
3045
"""Create a CopyConverter.
3047
:param target_format: The format the resulting repository should be.
3049
self.target_format = target_format
3051
def convert(self, repo, pb):
3052
"""Perform the conversion of to_convert, giving feedback via pb.
3054
:param to_convert: The disk object to convert.
3055
:param pb: a progress bar to use for progress information.
3060
# this is only useful with metadir layouts - separated repo content.
3061
# trigger an assertion if not such
3062
repo._format.get_format_string()
3063
self.repo_dir = repo.bzrdir
3064
self.step('Moving repository to repository.backup')
3065
self.repo_dir.transport.move('repository', 'repository.backup')
3066
backup_transport = self.repo_dir.transport.clone('repository.backup')
3067
repo._format.check_conversion_target(self.target_format)
3068
self.source_repo = repo._format.open(self.repo_dir,
3070
_override_transport=backup_transport)
3071
self.step('Creating new repository')
3072
converted = self.target_format.initialize(self.repo_dir,
3073
self.source_repo.is_shared())
3074
converted.lock_write()
3076
self.step('Copying content into repository.')
3077
self.source_repo.copy_content_into(converted)
3080
self.step('Deleting old repository content.')
3081
self.repo_dir.transport.delete_tree('repository.backup')
3082
self.pb.note('repository converted')
3084
def step(self, message):
3085
"""Update the pb by a step."""
3087
self.pb.update(message, self.count, self.total)
3099
def _unescaper(match, _map=_unescape_map):
3100
code = match.group(1)
3104
if not code.startswith('#'):
3106
return unichr(int(code[1:])).encode('utf8')
3112
def _unescape_xml(data):
3113
"""Unescape predefined XML entities in a string of data."""
3115
if _unescape_re is None:
3116
_unescape_re = re.compile('\&([^;]*);')
3117
return _unescape_re.sub(_unescaper, data)
3120
class _VersionedFileChecker(object):
3122
def __init__(self, repository):
3123
self.repository = repository
3124
self.text_index = self.repository._generate_text_key_index()
3126
def calculate_file_version_parents(self, text_key):
3127
"""Calculate the correct parents for a file version according to
3130
parent_keys = self.text_index[text_key]
3131
if parent_keys == [_mod_revision.NULL_REVISION]:
3133
return tuple(parent_keys)
3135
def check_file_version_parents(self, texts, progress_bar=None):
3136
"""Check the parents stored in a versioned file are correct.
3138
It also detects file versions that are not referenced by their
3139
corresponding revision's inventory.
3141
:returns: A tuple of (wrong_parents, dangling_file_versions).
3142
wrong_parents is a dict mapping {revision_id: (stored_parents,
3143
correct_parents)} for each revision_id where the stored parents
3144
are not correct. dangling_file_versions is a set of (file_id,
3145
revision_id) tuples for versions that are present in this versioned
3146
file, but not used by the corresponding inventory.
3149
self.file_ids = set([file_id for file_id, _ in
3150
self.text_index.iterkeys()])
3151
# text keys is now grouped by file_id
3152
n_weaves = len(self.file_ids)
3153
files_in_revisions = {}
3154
revisions_of_files = {}
3155
n_versions = len(self.text_index)
3156
progress_bar.update('loading text store', 0, n_versions)
3157
parent_map = self.repository.texts.get_parent_map(self.text_index)
3158
# On unlistable transports this could well be empty/error...
3159
text_keys = self.repository.texts.keys()
3160
unused_keys = frozenset(text_keys) - set(self.text_index)
3161
for num, key in enumerate(self.text_index.iterkeys()):
3162
if progress_bar is not None:
3163
progress_bar.update('checking text graph', num, n_versions)
3164
correct_parents = self.calculate_file_version_parents(key)
3166
knit_parents = parent_map[key]
3167
except errors.RevisionNotPresent:
3170
if correct_parents != knit_parents:
3171
wrong_parents[key] = (knit_parents, correct_parents)
3172
return wrong_parents, unused_keys
3175
def _old_get_graph(repository, revision_id):
3176
"""DO NOT USE. That is all. I'm serious."""
3177
graph = repository.get_graph()
3178
revision_graph = dict(((key, value) for key, value in
3179
graph.iter_ancestry([revision_id]) if value is not None))
3180
return _strip_NULL_ghosts(revision_graph)
3183
def _strip_NULL_ghosts(revision_graph):
3184
"""Also don't use this. more compatibility code for unmigrated clients."""
3185
# Filter ghosts, and null:
3186
if _mod_revision.NULL_REVISION in revision_graph:
3187
del revision_graph[_mod_revision.NULL_REVISION]
3188
for key, parents in revision_graph.items():
3189
revision_graph[key] = tuple(parent for parent in parents if parent
3191
return revision_graph