1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib.lazy_import import lazy_import
18
lazy_import(globals(), """
37
revision as _mod_revision,
43
from bzrlib.bundle import serializer
44
from bzrlib.revisiontree import RevisionTree
45
from bzrlib.store.versioned import VersionedFileStore
46
from bzrlib.testament import Testament
49
from bzrlib import registry
50
from bzrlib.decorators import needs_read_lock, needs_write_lock
51
from bzrlib.inter import InterObject
52
from bzrlib.inventory import Inventory, InventoryDirectory, ROOT_ID
53
from bzrlib.symbol_versioning import (
59
from bzrlib.trace import (
60
log_exception_quietly, note, mutter, mutter_callsite, warning)
63
# Old formats display a warning, but only once
64
_deprecation_warning_done = False
67
class CommitBuilder(object):
68
"""Provides an interface to build up a commit.
70
This allows describing a tree to be committed without needing to
71
know the internals of the format of the repository.
74
# all clients should supply tree roots.
75
record_root_entry = True
76
# the default CommitBuilder does not manage trees whose root is versioned.
77
_versioned_root = False
79
def __init__(self, repository, parents, config, timestamp=None,
80
timezone=None, committer=None, revprops=None,
82
"""Initiate a CommitBuilder.
84
:param repository: Repository to commit to.
85
:param parents: Revision ids of the parents of the new revision.
86
:param config: Configuration to use.
87
:param timestamp: Optional timestamp recorded for commit.
88
:param timezone: Optional timezone for timestamp.
89
:param committer: Optional committer to set for commit.
90
:param revprops: Optional dictionary of revision properties.
91
:param revision_id: Optional revision id.
96
self._committer = self._config.username()
98
self._committer = committer
100
self.new_inventory = Inventory(None)
101
self._new_revision_id = revision_id
102
self.parents = parents
103
self.repository = repository
106
if revprops is not None:
107
self._validate_revprops(revprops)
108
self._revprops.update(revprops)
110
if timestamp is None:
111
timestamp = time.time()
112
# Restrict resolution to 1ms
113
self._timestamp = round(timestamp, 3)
116
self._timezone = osutils.local_time_offset()
118
self._timezone = int(timezone)
120
self._generate_revision_if_needed()
121
self.__heads = graph.HeadsCache(repository.get_graph()).heads
123
def _validate_unicode_text(self, text, context):
124
"""Verify things like commit messages don't have bogus characters."""
126
raise ValueError('Invalid value for %s: %r' % (context, text))
128
def _validate_revprops(self, revprops):
129
for key, value in revprops.iteritems():
130
# We know that the XML serializers do not round trip '\r'
131
# correctly, so refuse to accept them
132
if not isinstance(value, basestring):
133
raise ValueError('revision property (%s) is not a valid'
134
' (unicode) string: %r' % (key, value))
135
self._validate_unicode_text(value,
136
'revision property (%s)' % (key,))
138
def commit(self, message):
139
"""Make the actual commit.
141
:return: The revision id of the recorded revision.
143
self._validate_unicode_text(message, 'commit message')
144
rev = _mod_revision.Revision(
145
timestamp=self._timestamp,
146
timezone=self._timezone,
147
committer=self._committer,
149
inventory_sha1=self.inv_sha1,
150
revision_id=self._new_revision_id,
151
properties=self._revprops)
152
rev.parent_ids = self.parents
153
self.repository.add_revision(self._new_revision_id, rev,
154
self.new_inventory, self._config)
155
self.repository.commit_write_group()
156
return self._new_revision_id
159
"""Abort the commit that is being built.
161
self.repository.abort_write_group()
163
def revision_tree(self):
164
"""Return the tree that was just committed.
166
After calling commit() this can be called to get a RevisionTree
167
representing the newly committed tree. This is preferred to
168
calling Repository.revision_tree() because that may require
169
deserializing the inventory, while we already have a copy in
172
return RevisionTree(self.repository, self.new_inventory,
173
self._new_revision_id)
175
def finish_inventory(self):
176
"""Tell the builder that the inventory is finished."""
177
if self.new_inventory.root is None:
178
raise AssertionError('Root entry should be supplied to'
179
' record_entry_contents, as of bzr 0.10.')
180
self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
181
self.new_inventory.revision_id = self._new_revision_id
182
self.inv_sha1 = self.repository.add_inventory(
183
self._new_revision_id,
188
def _gen_revision_id(self):
189
"""Return new revision-id."""
190
return generate_ids.gen_revision_id(self._config.username(),
193
def _generate_revision_if_needed(self):
194
"""Create a revision id if None was supplied.
196
If the repository can not support user-specified revision ids
197
they should override this function and raise CannotSetRevisionId
198
if _new_revision_id is not None.
200
:raises: CannotSetRevisionId
202
if self._new_revision_id is None:
203
self._new_revision_id = self._gen_revision_id()
204
self.random_revid = True
206
self.random_revid = False
208
def _heads(self, file_id, revision_ids):
209
"""Calculate the graph heads for revision_ids in the graph of file_id.
211
This can use either a per-file graph or a global revision graph as we
212
have an identity relationship between the two graphs.
214
return self.__heads(revision_ids)
216
def _check_root(self, ie, parent_invs, tree):
217
"""Helper for record_entry_contents.
219
:param ie: An entry being added.
220
:param parent_invs: The inventories of the parent revisions of the
222
:param tree: The tree that is being committed.
224
# In this revision format, root entries have no knit or weave When
225
# serializing out to disk and back in root.revision is always
227
ie.revision = self._new_revision_id
229
def _get_delta(self, ie, basis_inv, path):
230
"""Get a delta against the basis inventory for ie."""
231
if ie.file_id not in basis_inv:
233
return (None, path, ie.file_id, ie)
234
elif ie != basis_inv[ie.file_id]:
236
# TODO: avoid tis id2path call.
237
return (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
242
def record_entry_contents(self, ie, parent_invs, path, tree,
244
"""Record the content of ie from tree into the commit if needed.
246
Side effect: sets ie.revision when unchanged
248
:param ie: An inventory entry present in the commit.
249
:param parent_invs: The inventories of the parent revisions of the
251
:param path: The path the entry is at in the tree.
252
:param tree: The tree which contains this entry and should be used to
254
:param content_summary: Summary data from the tree about the paths
255
content - stat, length, exec, sha/link target. This is only
256
accessed when the entry has a revision of None - that is when it is
257
a candidate to commit.
258
:return: A tuple (change_delta, version_recorded, fs_hash).
259
change_delta is an inventory_delta change for this entry against
260
the basis tree of the commit, or None if no change occured against
262
version_recorded is True if a new version of the entry has been
263
recorded. For instance, committing a merge where a file was only
264
changed on the other side will return (delta, False).
265
fs_hash is either None, or the hash details for the path (currently
266
a tuple of the contents sha1 and the statvalue returned by
267
tree.get_file_with_stat()).
269
if self.new_inventory.root is None:
270
if ie.parent_id is not None:
271
raise errors.RootMissing()
272
self._check_root(ie, parent_invs, tree)
273
if ie.revision is None:
274
kind = content_summary[0]
276
# ie is carried over from a prior commit
278
# XXX: repository specific check for nested tree support goes here - if
279
# the repo doesn't want nested trees we skip it ?
280
if (kind == 'tree-reference' and
281
not self.repository._format.supports_tree_reference):
282
# mismatch between commit builder logic and repository:
283
# this needs the entry creation pushed down into the builder.
284
raise NotImplementedError('Missing repository subtree support.')
285
self.new_inventory.add(ie)
287
# TODO: slow, take it out of the inner loop.
289
basis_inv = parent_invs[0]
291
basis_inv = Inventory(root_id=None)
293
# ie.revision is always None if the InventoryEntry is considered
294
# for committing. We may record the previous parents revision if the
295
# content is actually unchanged against a sole head.
296
if ie.revision is not None:
297
if not self._versioned_root and path == '':
298
# repositories that do not version the root set the root's
299
# revision to the new commit even when no change occurs, and
300
# this masks when a change may have occurred against the basis,
301
# so calculate if one happened.
302
if ie.file_id in basis_inv:
303
delta = (basis_inv.id2path(ie.file_id), path,
307
delta = (None, path, ie.file_id, ie)
308
return delta, False, None
310
# we don't need to commit this, because the caller already
311
# determined that an existing revision of this file is
312
# appropriate. If its not being considered for committing then
313
# it and all its parents to the root must be unaltered so
314
# no-change against the basis.
315
if ie.revision == self._new_revision_id:
316
raise AssertionError("Impossible situation, a skipped "
317
"inventory entry (%r) claims to be modified in this "
318
"commit (%r).", (ie, self._new_revision_id))
319
return None, False, None
320
# XXX: Friction: parent_candidates should return a list not a dict
321
# so that we don't have to walk the inventories again.
322
parent_candiate_entries = ie.parent_candidates(parent_invs)
323
head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
325
for inv in parent_invs:
326
if ie.file_id in inv:
327
old_rev = inv[ie.file_id].revision
328
if old_rev in head_set:
329
heads.append(inv[ie.file_id].revision)
330
head_set.remove(inv[ie.file_id].revision)
333
# now we check to see if we need to write a new record to the
335
# We write a new entry unless there is one head to the ancestors, and
336
# the kind-derived content is unchanged.
338
# Cheapest check first: no ancestors, or more the one head in the
339
# ancestors, we write a new node.
343
# There is a single head, look it up for comparison
344
parent_entry = parent_candiate_entries[heads[0]]
345
# if the non-content specific data has changed, we'll be writing a
347
if (parent_entry.parent_id != ie.parent_id or
348
parent_entry.name != ie.name):
350
# now we need to do content specific checks:
352
# if the kind changed the content obviously has
353
if kind != parent_entry.kind:
355
# Stat cache fingerprint feedback for the caller - None as we usually
356
# don't generate one.
359
if content_summary[2] is None:
360
raise ValueError("Files must not have executable = None")
362
if (# if the file length changed we have to store:
363
parent_entry.text_size != content_summary[1] or
364
# if the exec bit has changed we have to store:
365
parent_entry.executable != content_summary[2]):
367
elif parent_entry.text_sha1 == content_summary[3]:
368
# all meta and content is unchanged (using a hash cache
369
# hit to check the sha)
370
ie.revision = parent_entry.revision
371
ie.text_size = parent_entry.text_size
372
ie.text_sha1 = parent_entry.text_sha1
373
ie.executable = parent_entry.executable
374
return self._get_delta(ie, basis_inv, path), False, None
376
# Either there is only a hash change(no hash cache entry,
377
# or same size content change), or there is no change on
379
# Provide the parent's hash to the store layer, so that the
380
# content is unchanged we will not store a new node.
381
nostore_sha = parent_entry.text_sha1
383
# We want to record a new node regardless of the presence or
384
# absence of a content change in the file.
386
ie.executable = content_summary[2]
387
file_obj, stat_value = tree.get_file_with_stat(ie.file_id, path)
389
lines = file_obj.readlines()
393
ie.text_sha1, ie.text_size = self._add_text_to_weave(
394
ie.file_id, lines, heads, nostore_sha)
395
# Let the caller know we generated a stat fingerprint.
396
fingerprint = (ie.text_sha1, stat_value)
397
except errors.ExistingContent:
398
# Turns out that the file content was unchanged, and we were
399
# only going to store a new node if it was changed. Carry over
401
ie.revision = parent_entry.revision
402
ie.text_size = parent_entry.text_size
403
ie.text_sha1 = parent_entry.text_sha1
404
ie.executable = parent_entry.executable
405
return self._get_delta(ie, basis_inv, path), False, None
406
elif kind == 'directory':
408
# all data is meta here, nothing specific to directory, so
410
ie.revision = parent_entry.revision
411
return self._get_delta(ie, basis_inv, path), False, None
413
self._add_text_to_weave(ie.file_id, lines, heads, None)
414
elif kind == 'symlink':
415
current_link_target = content_summary[3]
417
# symlink target is not generic metadata, check if it has
419
if current_link_target != parent_entry.symlink_target:
422
# unchanged, carry over.
423
ie.revision = parent_entry.revision
424
ie.symlink_target = parent_entry.symlink_target
425
return self._get_delta(ie, basis_inv, path), False, None
426
ie.symlink_target = current_link_target
428
self._add_text_to_weave(ie.file_id, lines, heads, None)
429
elif kind == 'tree-reference':
431
if content_summary[3] != parent_entry.reference_revision:
434
# unchanged, carry over.
435
ie.reference_revision = parent_entry.reference_revision
436
ie.revision = parent_entry.revision
437
return self._get_delta(ie, basis_inv, path), False, None
438
ie.reference_revision = content_summary[3]
440
self._add_text_to_weave(ie.file_id, lines, heads, None)
442
raise NotImplementedError('unknown kind')
443
ie.revision = self._new_revision_id
444
return self._get_delta(ie, basis_inv, path), True, fingerprint
446
def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
447
# Note: as we read the content directly from the tree, we know its not
448
# been turned into unicode or badly split - but a broken tree
449
# implementation could give us bad output from readlines() so this is
450
# not a guarantee of safety. What would be better is always checking
451
# the content during test suite execution. RBC 20070912
452
parent_keys = tuple((file_id, parent) for parent in parents)
453
return self.repository.texts.add_lines(
454
(file_id, self._new_revision_id), parent_keys, new_lines,
455
nostore_sha=nostore_sha, random_id=self.random_revid,
456
check_content=False)[0:2]
459
class RootCommitBuilder(CommitBuilder):
460
"""This commitbuilder actually records the root id"""
462
# the root entry gets versioned properly by this builder.
463
_versioned_root = True
465
def _check_root(self, ie, parent_invs, tree):
466
"""Helper for record_entry_contents.
468
:param ie: An entry being added.
469
:param parent_invs: The inventories of the parent revisions of the
471
:param tree: The tree that is being committed.
475
######################################################################
478
class Repository(object):
479
"""Repository holding history for one or more branches.
481
The repository holds and retrieves historical information including
482
revisions and file history. It's normally accessed only by the Branch,
483
which views a particular line of development through that history.
485
The Repository builds on top of some byte storage facilies (the revisions,
486
signatures, inventories and texts attributes) and a Transport, which
487
respectively provide byte storage and a means to access the (possibly
490
The byte storage facilities are addressed via tuples, which we refer to
491
as 'keys' throughout the code base. Revision_keys, inventory_keys and
492
signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:
493
(file_id, revision_id). We use this interface because it allows low
494
friction with the underlying code that implements disk indices, network
495
encoding and other parts of bzrlib.
497
:ivar revisions: A bzrlib.versionedfile.VersionedFiles instance containing
498
the serialised revisions for the repository. This can be used to obtain
499
revision graph information or to access raw serialised revisions.
500
The result of trying to insert data into the repository via this store
501
is undefined: it should be considered read-only except for implementors
503
:ivar signatures: A bzrlib.versionedfile.VersionedFiles instance containing
504
the serialised signatures for the repository. This can be used to
505
obtain access to raw serialised signatures. The result of trying to
506
insert data into the repository via this store is undefined: it should
507
be considered read-only except for implementors of repositories.
508
:ivar inventories: A bzrlib.versionedfile.VersionedFiles instance containing
509
the serialised inventories for the repository. This can be used to
510
obtain unserialised inventories. The result of trying to insert data
511
into the repository via this store is undefined: it should be
512
considered read-only except for implementors of repositories.
513
:ivar texts: A bzrlib.versionedfile.VersionedFiles instance containing the
514
texts of files and directories for the repository. This can be used to
515
obtain file texts or file graphs. Note that Repository.iter_file_bytes
516
is usually a better interface for accessing file texts.
517
The result of trying to insert data into the repository via this store
518
is undefined: it should be considered read-only except for implementors
520
:ivar _transport: Transport for file access to repository, typically
521
pointing to .bzr/repository.
524
# What class to use for a CommitBuilder. Often its simpler to change this
525
# in a Repository class subclass rather than to override
526
# get_commit_builder.
527
_commit_builder_class = CommitBuilder
528
# The search regex used by xml based repositories to determine what things
529
# where changed in a single commit.
530
_file_ids_altered_regex = lazy_regex.lazy_compile(
531
r'file_id="(?P<file_id>[^"]+)"'
532
r'.* revision="(?P<revision_id>[^"]+)"'
535
def abort_write_group(self, suppress_errors=False):
536
"""Commit the contents accrued within the current write group.
538
:param suppress_errors: if true, abort_write_group will catch and log
539
unexpected errors that happen during the abort, rather than
540
allowing them to propagate. Defaults to False.
542
:seealso: start_write_group.
544
if self._write_group is not self.get_transaction():
545
# has an unlock or relock occured ?
546
raise errors.BzrError('mismatched lock context and write group.')
548
self._abort_write_group()
549
except Exception, exc:
550
self._write_group = None
551
if not suppress_errors:
553
mutter('abort_write_group failed')
554
log_exception_quietly()
555
note('bzr: ERROR (ignored): %s', exc)
556
self._write_group = None
558
def _abort_write_group(self):
559
"""Template method for per-repository write group cleanup.
561
This is called during abort before the write group is considered to be
562
finished and should cleanup any internal state accrued during the write
563
group. There is no requirement that data handed to the repository be
564
*not* made available - this is not a rollback - but neither should any
565
attempt be made to ensure that data added is fully commited. Abort is
566
invoked when an error has occured so futher disk or network operations
567
may not be possible or may error and if possible should not be
571
def add_fallback_repository(self, repository):
572
"""Add a repository to use for looking up data not held locally.
574
:param repository: A repository.
576
if not self._format.supports_external_lookups:
577
raise errors.UnstackableRepositoryFormat(self._format, self.base)
578
self._check_fallback_repository(repository)
579
self._fallback_repositories.append(repository)
580
self.texts.add_fallback_versioned_files(repository.texts)
581
self.inventories.add_fallback_versioned_files(repository.inventories)
582
self.revisions.add_fallback_versioned_files(repository.revisions)
583
self.signatures.add_fallback_versioned_files(repository.signatures)
584
self._fetch_order = 'topological'
586
def _check_fallback_repository(self, repository):
587
"""Check that this repository can fallback to repository safely.
589
Raise an error if not.
591
:param repository: A repository to fallback to.
593
return InterRepository._assert_same_model(self, repository)
595
def add_inventory(self, revision_id, inv, parents):
596
"""Add the inventory inv to the repository as revision_id.
598
:param parents: The revision ids of the parents that revision_id
599
is known to have and are in the repository already.
601
:returns: The validator(which is a sha1 digest, though what is sha'd is
602
repository format specific) of the serialized inventory.
604
if not self.is_in_write_group():
605
raise AssertionError("%r not in write group" % (self,))
606
_mod_revision.check_not_reserved_id(revision_id)
607
if not (inv.revision_id is None or inv.revision_id == revision_id):
608
raise AssertionError(
609
"Mismatch between inventory revision"
610
" id and insertion revid (%r, %r)"
611
% (inv.revision_id, revision_id))
613
raise AssertionError()
614
inv_lines = self._serialise_inventory_to_lines(inv)
615
return self._inventory_add_lines(revision_id, parents,
616
inv_lines, check_content=False)
618
def _inventory_add_lines(self, revision_id, parents, lines,
620
"""Store lines in inv_vf and return the sha1 of the inventory."""
621
parents = [(parent,) for parent in parents]
622
return self.inventories.add_lines((revision_id,), parents, lines,
623
check_content=check_content)[0]
625
def add_revision(self, revision_id, rev, inv=None, config=None):
626
"""Add rev to the revision store as revision_id.
628
:param revision_id: the revision id to use.
629
:param rev: The revision object.
630
:param inv: The inventory for the revision. if None, it will be looked
631
up in the inventory storer
632
:param config: If None no digital signature will be created.
633
If supplied its signature_needed method will be used
634
to determine if a signature should be made.
636
# TODO: jam 20070210 Shouldn't we check rev.revision_id and
638
_mod_revision.check_not_reserved_id(revision_id)
639
if config is not None and config.signature_needed():
641
inv = self.get_inventory(revision_id)
642
plaintext = Testament(rev, inv).as_short_text()
643
self.store_revision_signature(
644
gpg.GPGStrategy(config), plaintext, revision_id)
645
# check inventory present
646
if not self.inventories.get_parent_map([(revision_id,)]):
648
raise errors.WeaveRevisionNotPresent(revision_id,
651
# yes, this is not suitable for adding with ghosts.
652
rev.inventory_sha1 = self.add_inventory(revision_id, inv,
656
rev.inventory_sha1 = self.inventories.get_sha1s([key])[key]
657
self._add_revision(rev)
659
def _add_revision(self, revision):
660
text = self._serializer.write_revision_to_string(revision)
661
key = (revision.revision_id,)
662
parents = tuple((parent,) for parent in revision.parent_ids)
663
self.revisions.add_lines(key, parents, osutils.split_lines(text))
665
def all_revision_ids(self):
666
"""Returns a list of all the revision ids in the repository.
668
This is conceptually deprecated because code should generally work on
669
the graph reachable from a particular revision, and ignore any other
670
revisions that might be present. There is no direct replacement
673
if 'evil' in debug.debug_flags:
674
mutter_callsite(2, "all_revision_ids is linear with history.")
675
return self._all_revision_ids()
677
def _all_revision_ids(self):
678
"""Returns a list of all the revision ids in the repository.
680
These are in as much topological order as the underlying store can
683
raise NotImplementedError(self._all_revision_ids)
685
def break_lock(self):
686
"""Break a lock if one is present from another instance.
688
Uses the ui factory to ask for confirmation if the lock may be from
691
self.control_files.break_lock()
694
def _eliminate_revisions_not_present(self, revision_ids):
695
"""Check every revision id in revision_ids to see if we have it.
697
Returns a set of the present revisions.
700
graph = self.get_graph()
701
parent_map = graph.get_parent_map(revision_ids)
702
# The old API returned a list, should this actually be a set?
703
return parent_map.keys()
706
def create(a_bzrdir):
707
"""Construct the current default format repository in a_bzrdir."""
708
return RepositoryFormat.get_default_format().initialize(a_bzrdir)
710
def __init__(self, _format, a_bzrdir, control_files):
711
"""instantiate a Repository.
713
:param _format: The format of the repository on disk.
714
:param a_bzrdir: The BzrDir of the repository.
716
In the future we will have a single api for all stores for
717
getting file texts, inventories and revisions, then
718
this construct will accept instances of those things.
720
super(Repository, self).__init__()
721
self._format = _format
722
# the following are part of the public API for Repository:
723
self.bzrdir = a_bzrdir
724
self.control_files = control_files
725
self._transport = control_files._transport
726
self.base = self._transport.base
728
self._reconcile_does_inventory_gc = True
729
self._reconcile_fixes_text_parents = False
730
self._reconcile_backsup_inventory = True
731
# not right yet - should be more semantically clear ?
733
# TODO: make sure to construct the right store classes, etc, depending
734
# on whether escaping is required.
735
self._warn_if_deprecated()
736
self._write_group = None
737
# Additional places to query for data.
738
self._fallback_repositories = []
739
# What order should fetch operations request streams in?
740
# The default is unordered as that is the cheapest for an origin to
742
self._fetch_order = 'unordered'
743
# Does this repository use deltas that can be fetched as-deltas ?
744
# (E.g. knits, where the knit deltas can be transplanted intact.
745
# We default to False, which will ensure that enough data to get
746
# a full text out of any fetch stream will be grabbed.
747
self._fetch_uses_deltas = False
748
# Should fetch trigger a reconcile after the fetch? Only needed for
749
# some repository formats that can suffer internal inconsistencies.
750
self._fetch_reconcile = False
753
return '%s(%r)' % (self.__class__.__name__,
756
def has_same_location(self, other):
757
"""Returns a boolean indicating if this repository is at the same
758
location as another repository.
760
This might return False even when two repository objects are accessing
761
the same physical repository via different URLs.
763
if self.__class__ is not other.__class__:
765
return (self._transport.base == other._transport.base)
767
def is_in_write_group(self):
768
"""Return True if there is an open write group.
770
:seealso: start_write_group.
772
return self._write_group is not None
775
return self.control_files.is_locked()
777
def is_write_locked(self):
778
"""Return True if this object is write locked."""
779
return self.is_locked() and self.control_files._lock_mode == 'w'
781
def lock_write(self, token=None):
782
"""Lock this repository for writing.
784
This causes caching within the repository obejct to start accumlating
785
data during reads, and allows a 'write_group' to be obtained. Write
786
groups must be used for actual data insertion.
788
:param token: if this is already locked, then lock_write will fail
789
unless the token matches the existing lock.
790
:returns: a token if this instance supports tokens, otherwise None.
791
:raises TokenLockingNotSupported: when a token is given but this
792
instance doesn't support using token locks.
793
:raises MismatchedToken: if the specified token doesn't match the token
794
of the existing lock.
795
:seealso: start_write_group.
797
A token should be passed in if you know that you have locked the object
798
some other way, and need to synchronise this object's state with that
801
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
803
result = self.control_files.lock_write(token=token)
804
for repo in self._fallback_repositories:
805
# Writes don't affect fallback repos
811
self.control_files.lock_read()
812
for repo in self._fallback_repositories:
816
def get_physical_lock_status(self):
817
return self.control_files.get_physical_lock_status()
819
def leave_lock_in_place(self):
820
"""Tell this repository not to release the physical lock when this
823
If lock_write doesn't return a token, then this method is not supported.
825
self.control_files.leave_in_place()
827
def dont_leave_lock_in_place(self):
828
"""Tell this repository to release the physical lock when this
829
object is unlocked, even if it didn't originally acquire it.
831
If lock_write doesn't return a token, then this method is not supported.
833
self.control_files.dont_leave_in_place()
836
def gather_stats(self, revid=None, committers=None):
837
"""Gather statistics from a revision id.
839
:param revid: The revision id to gather statistics from, if None, then
840
no revision specific statistics are gathered.
841
:param committers: Optional parameter controlling whether to grab
842
a count of committers from the revision specific statistics.
843
:return: A dictionary of statistics. Currently this contains:
844
committers: The number of committers if requested.
845
firstrev: A tuple with timestamp, timezone for the penultimate left
846
most ancestor of revid, if revid is not the NULL_REVISION.
847
latestrev: A tuple with timestamp, timezone for revid, if revid is
848
not the NULL_REVISION.
849
revisions: The total revision count in the repository.
850
size: An estimate disk size of the repository in bytes.
853
if revid and committers:
854
result['committers'] = 0
855
if revid and revid != _mod_revision.NULL_REVISION:
857
all_committers = set()
858
revisions = self.get_ancestry(revid)
859
# pop the leading None
861
first_revision = None
863
# ignore the revisions in the middle - just grab first and last
864
revisions = revisions[0], revisions[-1]
865
for revision in self.get_revisions(revisions):
866
if not first_revision:
867
first_revision = revision
869
all_committers.add(revision.committer)
870
last_revision = revision
872
result['committers'] = len(all_committers)
873
result['firstrev'] = (first_revision.timestamp,
874
first_revision.timezone)
875
result['latestrev'] = (last_revision.timestamp,
876
last_revision.timezone)
878
# now gather global repository information
879
# XXX: This is available for many repos regardless of listability.
880
if self.bzrdir.root_transport.listable():
881
# XXX: do we want to __define len__() ?
882
# Maybe the versionedfiles object should provide a different
883
# method to get the number of keys.
884
result['revisions'] = len(self.revisions.keys())
888
def find_branches(self, using=False):
889
"""Find branches underneath this repository.
891
This will include branches inside other branches.
893
:param using: If True, list only branches using this repository.
895
if using and not self.is_shared():
897
return [self.bzrdir.open_branch()]
898
except errors.NotBranchError:
900
class Evaluator(object):
903
self.first_call = True
905
def __call__(self, bzrdir):
906
# On the first call, the parameter is always the bzrdir
907
# containing the current repo.
908
if not self.first_call:
910
repository = bzrdir.open_repository()
911
except errors.NoRepositoryPresent:
914
return False, (None, repository)
915
self.first_call = False
917
value = (bzrdir.open_branch(), None)
918
except errors.NotBranchError:
923
for branch, repository in bzrdir.BzrDir.find_bzrdirs(
924
self.bzrdir.root_transport, evaluate=Evaluator()):
925
if branch is not None:
926
branches.append(branch)
927
if not using and repository is not None:
928
branches.extend(repository.find_branches())
932
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
933
"""Return the revision ids that other has that this does not.
935
These are returned in topological order.
937
revision_id: only return revision ids included by revision_id.
939
return InterRepository.get(other, self).search_missing_revision_ids(
940
revision_id, find_ghosts)
942
@deprecated_method(one_two)
944
def missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
945
"""Return the revision ids that other has that this does not.
947
These are returned in topological order.
949
revision_id: only return revision ids included by revision_id.
951
keys = self.search_missing_revision_ids(
952
other, revision_id, find_ghosts).get_keys()
955
parents = other.get_graph().get_parent_map(keys)
958
return tsort.topo_sort(parents)
962
"""Open the repository rooted at base.
964
For instance, if the repository is at URL/.bzr/repository,
965
Repository.open(URL) -> a Repository instance.
967
control = bzrdir.BzrDir.open(base)
968
return control.open_repository()
970
def copy_content_into(self, destination, revision_id=None):
971
"""Make a complete copy of the content in self into destination.
973
This is a destructive operation! Do not use it on existing
976
return InterRepository.get(self, destination).copy_content(revision_id)
978
def commit_write_group(self):
979
"""Commit the contents accrued within the current write group.
981
:seealso: start_write_group.
983
if self._write_group is not self.get_transaction():
984
# has an unlock or relock occured ?
985
raise errors.BzrError('mismatched lock context %r and '
987
(self.get_transaction(), self._write_group))
988
self._commit_write_group()
989
self._write_group = None
991
def _commit_write_group(self):
992
"""Template method for per-repository write group cleanup.
994
This is called before the write group is considered to be
995
finished and should ensure that all data handed to the repository
996
for writing during the write group is safely committed (to the
997
extent possible considering file system caching etc).
1000
def fetch(self, source, revision_id=None, pb=None, find_ghosts=False):
1001
"""Fetch the content required to construct revision_id from source.
1003
If revision_id is None all content is copied.
1004
:param find_ghosts: Find and copy revisions in the source that are
1005
ghosts in the target (and not reachable directly by walking out to
1006
the first-present revision in target from revision_id).
1008
# fast path same-url fetch operations
1009
if self.has_same_location(source):
1010
# check that last_revision is in 'from' and then return a
1012
if (revision_id is not None and
1013
not _mod_revision.is_null(revision_id)):
1014
self.get_revision(revision_id)
1016
# if there is no specific appropriate InterRepository, this will get
1017
# the InterRepository base class, which raises an
1018
# IncompatibleRepositories when asked to fetch.
1019
inter = InterRepository.get(source, self)
1020
return inter.fetch(revision_id=revision_id, pb=pb,
1021
find_ghosts=find_ghosts)
1023
def create_bundle(self, target, base, fileobj, format=None):
1024
return serializer.write_bundle(self, target, base, fileobj, format)
1026
def get_commit_builder(self, branch, parents, config, timestamp=None,
1027
timezone=None, committer=None, revprops=None,
1029
"""Obtain a CommitBuilder for this repository.
1031
:param branch: Branch to commit to.
1032
:param parents: Revision ids of the parents of the new revision.
1033
:param config: Configuration to use.
1034
:param timestamp: Optional timestamp recorded for commit.
1035
:param timezone: Optional timezone for timestamp.
1036
:param committer: Optional committer to set for commit.
1037
:param revprops: Optional dictionary of revision properties.
1038
:param revision_id: Optional revision id.
1040
result = self._commit_builder_class(self, parents, config,
1041
timestamp, timezone, committer, revprops, revision_id)
1042
self.start_write_group()
1046
if (self.control_files._lock_count == 1 and
1047
self.control_files._lock_mode == 'w'):
1048
if self._write_group is not None:
1049
self.abort_write_group()
1050
self.control_files.unlock()
1051
raise errors.BzrError(
1052
'Must end write groups before releasing write locks.')
1053
self.control_files.unlock()
1054
for repo in self._fallback_repositories:
1058
def clone(self, a_bzrdir, revision_id=None):
1059
"""Clone this repository into a_bzrdir using the current format.
1061
Currently no check is made that the format of this repository and
1062
the bzrdir format are compatible. FIXME RBC 20060201.
1064
:return: The newly created destination repository.
1066
# TODO: deprecate after 0.16; cloning this with all its settings is
1067
# probably not very useful -- mbp 20070423
1068
dest_repo = self._create_sprouting_repo(a_bzrdir, shared=self.is_shared())
1069
self.copy_content_into(dest_repo, revision_id)
1072
def start_write_group(self):
1073
"""Start a write group in the repository.
1075
Write groups are used by repositories which do not have a 1:1 mapping
1076
between file ids and backend store to manage the insertion of data from
1077
both fetch and commit operations.
1079
A write lock is required around the start_write_group/commit_write_group
1080
for the support of lock-requiring repository formats.
1082
One can only insert data into a repository inside a write group.
1086
if not self.is_write_locked():
1087
raise errors.NotWriteLocked(self)
1088
if self._write_group:
1089
raise errors.BzrError('already in a write group')
1090
self._start_write_group()
1091
# so we can detect unlock/relock - the write group is now entered.
1092
self._write_group = self.get_transaction()
1094
def _start_write_group(self):
1095
"""Template method for per-repository write group startup.
1097
This is called before the write group is considered to be
1102
def sprout(self, to_bzrdir, revision_id=None):
1103
"""Create a descendent repository for new development.
1105
Unlike clone, this does not copy the settings of the repository.
1107
dest_repo = self._create_sprouting_repo(to_bzrdir, shared=False)
1108
dest_repo.fetch(self, revision_id=revision_id)
1111
def _create_sprouting_repo(self, a_bzrdir, shared):
1112
if not isinstance(a_bzrdir._format, self.bzrdir._format.__class__):
1113
# use target default format.
1114
dest_repo = a_bzrdir.create_repository()
1116
# Most control formats need the repository to be specifically
1117
# created, but on some old all-in-one formats it's not needed
1119
dest_repo = self._format.initialize(a_bzrdir, shared=shared)
1120
except errors.UninitializableFormat:
1121
dest_repo = a_bzrdir.open_repository()
1125
def has_revision(self, revision_id):
1126
"""True if this repository has a copy of the revision."""
1127
return revision_id in self.has_revisions((revision_id,))
1130
def has_revisions(self, revision_ids):
1131
"""Probe to find out the presence of multiple revisions.
1133
:param revision_ids: An iterable of revision_ids.
1134
:return: A set of the revision_ids that were present.
1136
parent_map = self.revisions.get_parent_map(
1137
[(rev_id,) for rev_id in revision_ids])
1139
if _mod_revision.NULL_REVISION in revision_ids:
1140
result.add(_mod_revision.NULL_REVISION)
1141
result.update([key[0] for key in parent_map])
1145
def get_revision(self, revision_id):
1146
"""Return the Revision object for a named revision."""
1147
return self.get_revisions([revision_id])[0]
1150
def get_revision_reconcile(self, revision_id):
1151
"""'reconcile' helper routine that allows access to a revision always.
1153
This variant of get_revision does not cross check the weave graph
1154
against the revision one as get_revision does: but it should only
1155
be used by reconcile, or reconcile-alike commands that are correcting
1156
or testing the revision graph.
1158
return self._get_revisions([revision_id])[0]
1161
def get_revisions(self, revision_ids):
1162
"""Get many revisions at once."""
1163
return self._get_revisions(revision_ids)
1166
def _get_revisions(self, revision_ids):
1167
"""Core work logic to get many revisions without sanity checks."""
1168
for rev_id in revision_ids:
1169
if not rev_id or not isinstance(rev_id, basestring):
1170
raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
1171
keys = [(key,) for key in revision_ids]
1172
stream = self.revisions.get_record_stream(keys, 'unordered', True)
1174
for record in stream:
1175
if record.storage_kind == 'absent':
1176
raise errors.NoSuchRevision(self, record.key[0])
1177
text = record.get_bytes_as('fulltext')
1178
rev = self._serializer.read_revision_from_string(text)
1179
revs[record.key[0]] = rev
1180
return [revs[revid] for revid in revision_ids]
1183
def get_revision_xml(self, revision_id):
1184
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1185
# would have already do it.
1186
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1187
rev = self.get_revision(revision_id)
1188
rev_tmp = cStringIO.StringIO()
1189
# the current serializer..
1190
self._serializer.write_revision(rev, rev_tmp)
1192
return rev_tmp.getvalue()
1194
def get_deltas_for_revisions(self, revisions):
1195
"""Produce a generator of revision deltas.
1197
Note that the input is a sequence of REVISIONS, not revision_ids.
1198
Trees will be held in memory until the generator exits.
1199
Each delta is relative to the revision's lefthand predecessor.
1201
required_trees = set()
1202
for revision in revisions:
1203
required_trees.add(revision.revision_id)
1204
required_trees.update(revision.parent_ids[:1])
1205
trees = dict((t.get_revision_id(), t) for
1206
t in self.revision_trees(required_trees))
1207
for revision in revisions:
1208
if not revision.parent_ids:
1209
old_tree = self.revision_tree(_mod_revision.NULL_REVISION)
1211
old_tree = trees[revision.parent_ids[0]]
1212
yield trees[revision.revision_id].changes_from(old_tree)
1215
def get_revision_delta(self, revision_id):
1216
"""Return the delta for one revision.
1218
The delta is relative to the left-hand predecessor of the
1221
r = self.get_revision(revision_id)
1222
return list(self.get_deltas_for_revisions([r]))[0]
1225
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
1226
signature = gpg_strategy.sign(plaintext)
1227
self.add_signature_text(revision_id, signature)
1230
def add_signature_text(self, revision_id, signature):
1231
self.signatures.add_lines((revision_id,), (),
1232
osutils.split_lines(signature))
1234
def find_text_key_references(self):
1235
"""Find the text key references within the repository.
1237
:return: a dictionary mapping (file_id, revision_id) tuples to altered file-ids to an iterable of
1238
revision_ids. Each altered file-ids has the exact revision_ids that
1239
altered it listed explicitly.
1240
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1241
to whether they were referred to by the inventory of the
1242
revision_id that they contain. The inventory texts from all present
1243
revision ids are assessed to generate this report.
1245
revision_keys = self.revisions.keys()
1246
w = self.inventories
1247
pb = ui.ui_factory.nested_progress_bar()
1249
return self._find_text_key_references_from_xml_inventory_lines(
1250
w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))
1254
def _find_text_key_references_from_xml_inventory_lines(self,
1256
"""Core routine for extracting references to texts from inventories.
1258
This performs the translation of xml lines to revision ids.
1260
:param line_iterator: An iterator of lines, origin_version_id
1261
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1262
to whether they were referred to by the inventory of the
1263
revision_id that they contain. Note that if that revision_id was
1264
not part of the line_iterator's output then False will be given -
1265
even though it may actually refer to that key.
1267
if not self._serializer.support_altered_by_hack:
1268
raise AssertionError(
1269
"_find_text_key_references_from_xml_inventory_lines only "
1270
"supported for branches which store inventory as unnested xml"
1271
", not on %r" % self)
1274
# this code needs to read every new line in every inventory for the
1275
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
1276
# not present in one of those inventories is unnecessary but not
1277
# harmful because we are filtering by the revision id marker in the
1278
# inventory lines : we only select file ids altered in one of those
1279
# revisions. We don't need to see all lines in the inventory because
1280
# only those added in an inventory in rev X can contain a revision=X
1282
unescape_revid_cache = {}
1283
unescape_fileid_cache = {}
1285
# jam 20061218 In a big fetch, this handles hundreds of thousands
1286
# of lines, so it has had a lot of inlining and optimizing done.
1287
# Sorry that it is a little bit messy.
1288
# Move several functions to be local variables, since this is a long
1290
search = self._file_ids_altered_regex.search
1291
unescape = _unescape_xml
1292
setdefault = result.setdefault
1293
for line, line_key in line_iterator:
1294
match = search(line)
1297
# One call to match.group() returning multiple items is quite a
1298
# bit faster than 2 calls to match.group() each returning 1
1299
file_id, revision_id = match.group('file_id', 'revision_id')
1301
# Inlining the cache lookups helps a lot when you make 170,000
1302
# lines and 350k ids, versus 8.4 unique ids.
1303
# Using a cache helps in 2 ways:
1304
# 1) Avoids unnecessary decoding calls
1305
# 2) Re-uses cached strings, which helps in future set and
1307
# (2) is enough that removing encoding entirely along with
1308
# the cache (so we are using plain strings) results in no
1309
# performance improvement.
1311
revision_id = unescape_revid_cache[revision_id]
1313
unescaped = unescape(revision_id)
1314
unescape_revid_cache[revision_id] = unescaped
1315
revision_id = unescaped
1317
# Note that unconditionally unescaping means that we deserialise
1318
# every fileid, which for general 'pull' is not great, but we don't
1319
# really want to have some many fulltexts that this matters anyway.
1322
file_id = unescape_fileid_cache[file_id]
1324
unescaped = unescape(file_id)
1325
unescape_fileid_cache[file_id] = unescaped
1328
key = (file_id, revision_id)
1329
setdefault(key, False)
1330
if revision_id == line_key[-1]:
1334
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1336
"""Helper routine for fileids_altered_by_revision_ids.
1338
This performs the translation of xml lines to revision ids.
1340
:param line_iterator: An iterator of lines, origin_version_id
1341
:param revision_ids: The revision ids to filter for. This should be a
1342
set or other type which supports efficient __contains__ lookups, as
1343
the revision id from each parsed line will be looked up in the
1344
revision_ids filter.
1345
:return: a dictionary mapping altered file-ids to an iterable of
1346
revision_ids. Each altered file-ids has the exact revision_ids that
1347
altered it listed explicitly.
1350
setdefault = result.setdefault
1352
self._find_text_key_references_from_xml_inventory_lines(
1353
line_iterator).iterkeys():
1354
# once data is all ensured-consistent; then this is
1355
# if revision_id == version_id
1356
if key[-1:] in revision_ids:
1357
setdefault(key[0], set()).add(key[-1])
1360
def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
1361
"""Find the file ids and versions affected by revisions.
1363
:param revisions: an iterable containing revision ids.
1364
:param _inv_weave: The inventory weave from this repository or None.
1365
If None, the inventory weave will be opened automatically.
1366
:return: a dictionary mapping altered file-ids to an iterable of
1367
revision_ids. Each altered file-ids has the exact revision_ids that
1368
altered it listed explicitly.
1370
selected_keys = set((revid,) for revid in revision_ids)
1371
w = _inv_weave or self.inventories
1372
pb = ui.ui_factory.nested_progress_bar()
1374
return self._find_file_ids_from_xml_inventory_lines(
1375
w.iter_lines_added_or_present_in_keys(
1376
selected_keys, pb=pb),
1381
def iter_files_bytes(self, desired_files):
1382
"""Iterate through file versions.
1384
Files will not necessarily be returned in the order they occur in
1385
desired_files. No specific order is guaranteed.
1387
Yields pairs of identifier, bytes_iterator. identifier is an opaque
1388
value supplied by the caller as part of desired_files. It should
1389
uniquely identify the file version in the caller's context. (Examples:
1390
an index number or a TreeTransform trans_id.)
1392
bytes_iterator is an iterable of bytestrings for the file. The
1393
kind of iterable and length of the bytestrings are unspecified, but for
1394
this implementation, it is a list of bytes produced by
1395
VersionedFile.get_record_stream().
1397
:param desired_files: a list of (file_id, revision_id, identifier)
1400
transaction = self.get_transaction()
1402
for file_id, revision_id, callable_data in desired_files:
1403
text_keys[(file_id, revision_id)] = callable_data
1404
for record in self.texts.get_record_stream(text_keys, 'unordered', True):
1405
if record.storage_kind == 'absent':
1406
raise errors.RevisionNotPresent(record.key, self)
1407
yield text_keys[record.key], record.get_bytes_as('fulltext')
1409
def _generate_text_key_index(self, text_key_references=None,
1411
"""Generate a new text key index for the repository.
1413
This is an expensive function that will take considerable time to run.
1415
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1416
list of parents, also text keys. When a given key has no parents,
1417
the parents list will be [NULL_REVISION].
1419
# All revisions, to find inventory parents.
1420
if ancestors is None:
1421
graph = self.get_graph()
1422
ancestors = graph.get_parent_map(self.all_revision_ids())
1423
if text_key_references is None:
1424
text_key_references = self.find_text_key_references()
1425
pb = ui.ui_factory.nested_progress_bar()
1427
return self._do_generate_text_key_index(ancestors,
1428
text_key_references, pb)
1432
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1433
"""Helper for _generate_text_key_index to avoid deep nesting."""
1434
revision_order = tsort.topo_sort(ancestors)
1435
invalid_keys = set()
1437
for revision_id in revision_order:
1438
revision_keys[revision_id] = set()
1439
text_count = len(text_key_references)
1440
# a cache of the text keys to allow reuse; costs a dict of all the
1441
# keys, but saves a 2-tuple for every child of a given key.
1443
for text_key, valid in text_key_references.iteritems():
1445
invalid_keys.add(text_key)
1447
revision_keys[text_key[1]].add(text_key)
1448
text_key_cache[text_key] = text_key
1449
del text_key_references
1451
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1452
NULL_REVISION = _mod_revision.NULL_REVISION
1453
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
1454
# too small for large or very branchy trees. However, for 55K path
1455
# trees, it would be easy to use too much memory trivially. Ideally we
1456
# could gauge this by looking at available real memory etc, but this is
1457
# always a tricky proposition.
1458
inventory_cache = lru_cache.LRUCache(10)
1459
batch_size = 10 # should be ~150MB on a 55K path tree
1460
batch_count = len(revision_order) / batch_size + 1
1462
pb.update("Calculating text parents.", processed_texts, text_count)
1463
for offset in xrange(batch_count):
1464
to_query = revision_order[offset * batch_size:(offset + 1) *
1468
for rev_tree in self.revision_trees(to_query):
1469
revision_id = rev_tree.get_revision_id()
1470
parent_ids = ancestors[revision_id]
1471
for text_key in revision_keys[revision_id]:
1472
pb.update("Calculating text parents.", processed_texts)
1473
processed_texts += 1
1474
candidate_parents = []
1475
for parent_id in parent_ids:
1476
parent_text_key = (text_key[0], parent_id)
1478
check_parent = parent_text_key not in \
1479
revision_keys[parent_id]
1481
# the parent parent_id is a ghost:
1482
check_parent = False
1483
# truncate the derived graph against this ghost.
1484
parent_text_key = None
1486
# look at the parent commit details inventories to
1487
# determine possible candidates in the per file graph.
1490
inv = inventory_cache[parent_id]
1492
inv = self.revision_tree(parent_id).inventory
1493
inventory_cache[parent_id] = inv
1494
parent_entry = inv._byid.get(text_key[0], None)
1495
if parent_entry is not None:
1497
text_key[0], parent_entry.revision)
1499
parent_text_key = None
1500
if parent_text_key is not None:
1501
candidate_parents.append(
1502
text_key_cache[parent_text_key])
1503
parent_heads = text_graph.heads(candidate_parents)
1504
new_parents = list(parent_heads)
1505
new_parents.sort(key=lambda x:candidate_parents.index(x))
1506
if new_parents == []:
1507
new_parents = [NULL_REVISION]
1508
text_index[text_key] = new_parents
1510
for text_key in invalid_keys:
1511
text_index[text_key] = [NULL_REVISION]
1514
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
1515
"""Get an iterable listing the keys of all the data introduced by a set
1518
The keys will be ordered so that the corresponding items can be safely
1519
fetched and inserted in that order.
1521
:returns: An iterable producing tuples of (knit-kind, file-id,
1522
versions). knit-kind is one of 'file', 'inventory', 'signatures',
1523
'revisions'. file-id is None unless knit-kind is 'file'.
1525
# XXX: it's a bit weird to control the inventory weave caching in this
1526
# generator. Ideally the caching would be done in fetch.py I think. Or
1527
# maybe this generator should explicitly have the contract that it
1528
# should not be iterated until the previously yielded item has been
1530
inv_w = self.inventories
1532
# file ids that changed
1533
file_ids = self.fileids_altered_by_revision_ids(revision_ids, inv_w)
1535
num_file_ids = len(file_ids)
1536
for file_id, altered_versions in file_ids.iteritems():
1537
if _files_pb is not None:
1538
_files_pb.update("fetch texts", count, num_file_ids)
1540
yield ("file", file_id, altered_versions)
1541
# We're done with the files_pb. Note that it finished by the caller,
1542
# just as it was created by the caller.
1546
yield ("inventory", None, revision_ids)
1549
# XXX: Note ATM no callers actually pay attention to this return
1550
# instead they just use the list of revision ids and ignore
1551
# missing sigs. Consider removing this work entirely
1552
revisions_with_signatures = set(self.signatures.get_parent_map(
1553
[(r,) for r in revision_ids]))
1554
revisions_with_signatures = set(
1555
[r for (r,) in revisions_with_signatures])
1556
revisions_with_signatures.intersection_update(revision_ids)
1557
yield ("signatures", None, revisions_with_signatures)
1560
yield ("revisions", None, revision_ids)
1563
def get_inventory(self, revision_id):
1564
"""Get Inventory object by revision id."""
1565
return self.iter_inventories([revision_id]).next()
1567
def iter_inventories(self, revision_ids):
1568
"""Get many inventories by revision_ids.
1570
This will buffer some or all of the texts used in constructing the
1571
inventories in memory, but will only parse a single inventory at a
1574
:return: An iterator of inventories.
1576
if ((None in revision_ids)
1577
or (_mod_revision.NULL_REVISION in revision_ids)):
1578
raise ValueError('cannot get null revision inventory')
1579
return self._iter_inventories(revision_ids)
1581
def _iter_inventories(self, revision_ids):
1582
"""single-document based inventory iteration."""
1583
for text, revision_id in self._iter_inventory_xmls(revision_ids):
1584
yield self.deserialise_inventory(revision_id, text)
1586
def _iter_inventory_xmls(self, revision_ids):
1587
keys = [(revision_id,) for revision_id in revision_ids]
1588
stream = self.inventories.get_record_stream(keys, 'unordered', True)
1590
for record in stream:
1591
if record.storage_kind != 'absent':
1592
texts[record.key] = record.get_bytes_as('fulltext')
1594
raise errors.NoSuchRevision(self, record.key)
1596
yield texts[key], key[-1]
1598
def deserialise_inventory(self, revision_id, xml):
1599
"""Transform the xml into an inventory object.
1601
:param revision_id: The expected revision id of the inventory.
1602
:param xml: A serialised inventory.
1604
result = self._serializer.read_inventory_from_string(xml, revision_id)
1605
if result.revision_id != revision_id:
1606
raise AssertionError('revision id mismatch %s != %s' % (
1607
result.revision_id, revision_id))
1610
def serialise_inventory(self, inv):
1611
return self._serializer.write_inventory_to_string(inv)
1613
def _serialise_inventory_to_lines(self, inv):
1614
return self._serializer.write_inventory_to_lines(inv)
1616
def get_serializer_format(self):
1617
return self._serializer.format_num
1620
def get_inventory_xml(self, revision_id):
1621
"""Get inventory XML as a file object."""
1622
texts = self._iter_inventory_xmls([revision_id])
1624
text, revision_id = texts.next()
1625
except StopIteration:
1626
raise errors.HistoryMissing(self, 'inventory', revision_id)
1630
def get_inventory_sha1(self, revision_id):
1631
"""Return the sha1 hash of the inventory entry
1633
return self.get_revision(revision_id).inventory_sha1
1635
def iter_reverse_revision_history(self, revision_id):
1636
"""Iterate backwards through revision ids in the lefthand history
1638
:param revision_id: The revision id to start with. All its lefthand
1639
ancestors will be traversed.
1641
graph = self.get_graph()
1642
next_id = revision_id
1644
if next_id in (None, _mod_revision.NULL_REVISION):
1647
# Note: The following line may raise KeyError in the event of
1648
# truncated history. We decided not to have a try:except:raise
1649
# RevisionNotPresent here until we see a use for it, because of the
1650
# cost in an inner loop that is by its very nature O(history).
1651
# Robert Collins 20080326
1652
parents = graph.get_parent_map([next_id])[next_id]
1653
if len(parents) == 0:
1656
next_id = parents[0]
1659
def get_revision_inventory(self, revision_id):
1660
"""Return inventory of a past revision."""
1661
# TODO: Unify this with get_inventory()
1662
# bzr 0.0.6 and later imposes the constraint that the inventory_id
1663
# must be the same as its revision, so this is trivial.
1664
if revision_id is None:
1665
# This does not make sense: if there is no revision,
1666
# then it is the current tree inventory surely ?!
1667
# and thus get_root_id() is something that looks at the last
1668
# commit on the branch, and the get_root_id is an inventory check.
1669
raise NotImplementedError
1670
# return Inventory(self.get_root_id())
1672
return self.get_inventory(revision_id)
1674
def is_shared(self):
1675
"""Return True if this repository is flagged as a shared repository."""
1676
raise NotImplementedError(self.is_shared)
1679
def reconcile(self, other=None, thorough=False):
1680
"""Reconcile this repository."""
1681
from bzrlib.reconcile import RepoReconciler
1682
reconciler = RepoReconciler(self, thorough=thorough)
1683
reconciler.reconcile()
1686
def _refresh_data(self):
1687
"""Helper called from lock_* to ensure coherency with disk.
1689
The default implementation does nothing; it is however possible
1690
for repositories to maintain loaded indices across multiple locks
1691
by checking inside their implementation of this method to see
1692
whether their indices are still valid. This depends of course on
1693
the disk format being validatable in this manner.
1697
def revision_tree(self, revision_id):
1698
"""Return Tree for a revision on this branch.
1700
`revision_id` may be NULL_REVISION for the empty tree revision.
1702
revision_id = _mod_revision.ensure_null(revision_id)
1703
# TODO: refactor this to use an existing revision object
1704
# so we don't need to read it in twice.
1705
if revision_id == _mod_revision.NULL_REVISION:
1706
return RevisionTree(self, Inventory(root_id=None),
1707
_mod_revision.NULL_REVISION)
1709
inv = self.get_revision_inventory(revision_id)
1710
return RevisionTree(self, inv, revision_id)
1712
def revision_trees(self, revision_ids):
1713
"""Return Tree for a revision on this branch.
1715
`revision_id` may not be None or 'null:'"""
1716
inventories = self.iter_inventories(revision_ids)
1717
for inv in inventories:
1718
yield RevisionTree(self, inv, inv.revision_id)
1721
def get_ancestry(self, revision_id, topo_sorted=True):
1722
"""Return a list of revision-ids integrated by a revision.
1724
The first element of the list is always None, indicating the origin
1725
revision. This might change when we have history horizons, or
1726
perhaps we should have a new API.
1728
This is topologically sorted.
1730
if _mod_revision.is_null(revision_id):
1732
if not self.has_revision(revision_id):
1733
raise errors.NoSuchRevision(self, revision_id)
1734
graph = self.get_graph()
1736
search = graph._make_breadth_first_searcher([revision_id])
1739
found, ghosts = search.next_with_ghosts()
1740
except StopIteration:
1743
if _mod_revision.NULL_REVISION in keys:
1744
keys.remove(_mod_revision.NULL_REVISION)
1746
parent_map = graph.get_parent_map(keys)
1747
keys = tsort.topo_sort(parent_map)
1748
return [None] + list(keys)
1751
"""Compress the data within the repository.
1753
This operation only makes sense for some repository types. For other
1754
types it should be a no-op that just returns.
1756
This stub method does not require a lock, but subclasses should use
1757
@needs_write_lock as this is a long running call its reasonable to
1758
implicitly lock for the user.
1762
@deprecated_method(one_six)
1763
def print_file(self, file, revision_id):
1764
"""Print `file` to stdout.
1766
FIXME RBC 20060125 as John Meinel points out this is a bad api
1767
- it writes to stdout, it assumes that that is valid etc. Fix
1768
by creating a new more flexible convenience function.
1770
tree = self.revision_tree(revision_id)
1771
# use inventory as it was in that revision
1772
file_id = tree.inventory.path2id(file)
1774
# TODO: jam 20060427 Write a test for this code path
1775
# it had a bug in it, and was raising the wrong
1777
raise errors.BzrError("%r is not present in revision %s" % (file, revision_id))
1778
tree.print_file(file_id)
1780
def get_transaction(self):
1781
return self.control_files.get_transaction()
1783
@deprecated_method(one_one)
1784
def get_parents(self, revision_ids):
1785
"""See StackedParentsProvider.get_parents"""
1786
parent_map = self.get_parent_map(revision_ids)
1787
return [parent_map.get(r, None) for r in revision_ids]
1789
def get_parent_map(self, revision_ids):
1790
"""See graph._StackedParentsProvider.get_parent_map"""
1791
# revisions index works in keys; this just works in revisions
1792
# therefore wrap and unwrap
1795
for revision_id in revision_ids:
1796
if revision_id == _mod_revision.NULL_REVISION:
1797
result[revision_id] = ()
1798
elif revision_id is None:
1799
raise ValueError('get_parent_map(None) is not valid')
1801
query_keys.append((revision_id ,))
1802
for ((revision_id,), parent_keys) in \
1803
self.revisions.get_parent_map(query_keys).iteritems():
1805
result[revision_id] = tuple(parent_revid
1806
for (parent_revid,) in parent_keys)
1808
result[revision_id] = (_mod_revision.NULL_REVISION,)
1811
def _make_parents_provider(self):
1814
def get_graph(self, other_repository=None):
1815
"""Return the graph walker for this repository format"""
1816
parents_provider = self._make_parents_provider()
1817
if (other_repository is not None and
1818
not self.has_same_location(other_repository)):
1819
parents_provider = graph._StackedParentsProvider(
1820
[parents_provider, other_repository._make_parents_provider()])
1821
return graph.Graph(parents_provider)
1823
def _get_versioned_file_checker(self):
1824
"""Return an object suitable for checking versioned files."""
1825
return _VersionedFileChecker(self)
1827
def revision_ids_to_search_result(self, result_set):
1828
"""Convert a set of revision ids to a graph SearchResult."""
1829
result_parents = set()
1830
for parents in self.get_graph().get_parent_map(
1831
result_set).itervalues():
1832
result_parents.update(parents)
1833
included_keys = result_set.intersection(result_parents)
1834
start_keys = result_set.difference(included_keys)
1835
exclude_keys = result_parents.difference(result_set)
1836
result = graph.SearchResult(start_keys, exclude_keys,
1837
len(result_set), result_set)
1841
def set_make_working_trees(self, new_value):
1842
"""Set the policy flag for making working trees when creating branches.
1844
This only applies to branches that use this repository.
1846
The default is 'True'.
1847
:param new_value: True to restore the default, False to disable making
1850
raise NotImplementedError(self.set_make_working_trees)
1852
def make_working_trees(self):
1853
"""Returns the policy for making working trees on new branches."""
1854
raise NotImplementedError(self.make_working_trees)
1857
def sign_revision(self, revision_id, gpg_strategy):
1858
plaintext = Testament.from_revision(self, revision_id).as_short_text()
1859
self.store_revision_signature(gpg_strategy, plaintext, revision_id)
1862
def has_signature_for_revision_id(self, revision_id):
1863
"""Query for a revision signature for revision_id in the repository."""
1864
if not self.has_revision(revision_id):
1865
raise errors.NoSuchRevision(self, revision_id)
1866
sig_present = (1 == len(
1867
self.signatures.get_parent_map([(revision_id,)])))
1871
def get_signature_text(self, revision_id):
1872
"""Return the text for a signature."""
1873
stream = self.signatures.get_record_stream([(revision_id,)],
1875
record = stream.next()
1876
if record.storage_kind == 'absent':
1877
raise errors.NoSuchRevision(self, revision_id)
1878
return record.get_bytes_as('fulltext')
1881
def check(self, revision_ids=None):
1882
"""Check consistency of all history of given revision_ids.
1884
Different repository implementations should override _check().
1886
:param revision_ids: A non-empty list of revision_ids whose ancestry
1887
will be checked. Typically the last revision_id of a branch.
1889
return self._check(revision_ids)
1891
def _check(self, revision_ids):
1892
result = check.Check(self)
1896
def _warn_if_deprecated(self):
1897
global _deprecation_warning_done
1898
if _deprecation_warning_done:
1900
_deprecation_warning_done = True
1901
warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
1902
% (self._format, self.bzrdir.transport.base))
1904
def supports_rich_root(self):
1905
return self._format.rich_root_data
1907
def _check_ascii_revisionid(self, revision_id, method):
1908
"""Private helper for ascii-only repositories."""
1909
# weave repositories refuse to store revisionids that are non-ascii.
1910
if revision_id is not None:
1911
# weaves require ascii revision ids.
1912
if isinstance(revision_id, unicode):
1914
revision_id.encode('ascii')
1915
except UnicodeEncodeError:
1916
raise errors.NonAsciiRevisionId(method, self)
1919
revision_id.decode('ascii')
1920
except UnicodeDecodeError:
1921
raise errors.NonAsciiRevisionId(method, self)
1923
def revision_graph_can_have_wrong_parents(self):
1924
"""Is it possible for this repository to have a revision graph with
1927
If True, then this repository must also implement
1928
_find_inconsistent_revision_parents so that check and reconcile can
1929
check for inconsistencies before proceeding with other checks that may
1930
depend on the revision index being consistent.
1932
raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
1935
# remove these delegates a while after bzr 0.15
1936
def __make_delegated(name, from_module):
1937
def _deprecated_repository_forwarder():
1938
symbol_versioning.warn('%s moved to %s in bzr 0.15'
1939
% (name, from_module),
1942
m = __import__(from_module, globals(), locals(), [name])
1944
return getattr(m, name)
1945
except AttributeError:
1946
raise AttributeError('module %s has no name %s'
1948
globals()[name] = _deprecated_repository_forwarder
1951
'AllInOneRepository',
1952
'WeaveMetaDirRepository',
1953
'PreSplitOutRepositoryFormat',
1954
'RepositoryFormat4',
1955
'RepositoryFormat5',
1956
'RepositoryFormat6',
1957
'RepositoryFormat7',
1959
__make_delegated(_name, 'bzrlib.repofmt.weaverepo')
1963
'RepositoryFormatKnit',
1964
'RepositoryFormatKnit1',
1966
__make_delegated(_name, 'bzrlib.repofmt.knitrepo')
1969
def install_revision(repository, rev, revision_tree):
1970
"""Install all revision data into a repository."""
1971
install_revisions(repository, [(rev, revision_tree, None)])
1974
def install_revisions(repository, iterable, num_revisions=None, pb=None):
1975
"""Install all revision data into a repository.
1977
Accepts an iterable of revision, tree, signature tuples. The signature
1980
repository.start_write_group()
1982
for n, (revision, revision_tree, signature) in enumerate(iterable):
1983
_install_revision(repository, revision, revision_tree, signature)
1985
pb.update('Transferring revisions', n + 1, num_revisions)
1987
repository.abort_write_group()
1990
repository.commit_write_group()
1993
def _install_revision(repository, rev, revision_tree, signature):
1994
"""Install all revision data into a repository."""
1995
present_parents = []
1997
for p_id in rev.parent_ids:
1998
if repository.has_revision(p_id):
1999
present_parents.append(p_id)
2000
parent_trees[p_id] = repository.revision_tree(p_id)
2002
parent_trees[p_id] = repository.revision_tree(
2003
_mod_revision.NULL_REVISION)
2005
inv = revision_tree.inventory
2006
entries = inv.iter_entries()
2007
# backwards compatibility hack: skip the root id.
2008
if not repository.supports_rich_root():
2009
path, root = entries.next()
2010
if root.revision != rev.revision_id:
2011
raise errors.IncompatibleRevision(repr(repository))
2013
for path, ie in entries:
2014
text_keys[(ie.file_id, ie.revision)] = ie
2015
text_parent_map = repository.texts.get_parent_map(text_keys)
2016
missing_texts = set(text_keys) - set(text_parent_map)
2017
# Add the texts that are not already present
2018
for text_key in missing_texts:
2019
ie = text_keys[text_key]
2021
# FIXME: TODO: The following loop overlaps/duplicates that done by
2022
# commit to determine parents. There is a latent/real bug here where
2023
# the parents inserted are not those commit would do - in particular
2024
# they are not filtered by heads(). RBC, AB
2025
for revision, tree in parent_trees.iteritems():
2026
if ie.file_id not in tree:
2028
parent_id = tree.inventory[ie.file_id].revision
2029
if parent_id in text_parents:
2031
text_parents.append((ie.file_id, parent_id))
2032
lines = revision_tree.get_file(ie.file_id).readlines()
2033
repository.texts.add_lines(text_key, text_parents, lines)
2035
# install the inventory
2036
repository.add_inventory(rev.revision_id, inv, present_parents)
2037
except errors.RevisionAlreadyPresent:
2039
if signature is not None:
2040
repository.add_signature_text(rev.revision_id, signature)
2041
repository.add_revision(rev.revision_id, rev, inv)
2044
class MetaDirRepository(Repository):
2045
"""Repositories in the new meta-dir layout.
2047
:ivar _transport: Transport for access to repository control files,
2048
typically pointing to .bzr/repository.
2051
def __init__(self, _format, a_bzrdir, control_files):
2052
super(MetaDirRepository, self).__init__(_format, a_bzrdir, control_files)
2053
self._transport = control_files._transport
2055
def is_shared(self):
2056
"""Return True if this repository is flagged as a shared repository."""
2057
return self._transport.has('shared-storage')
2060
def set_make_working_trees(self, new_value):
2061
"""Set the policy flag for making working trees when creating branches.
2063
This only applies to branches that use this repository.
2065
The default is 'True'.
2066
:param new_value: True to restore the default, False to disable making
2071
self._transport.delete('no-working-trees')
2072
except errors.NoSuchFile:
2075
self._transport.put_bytes('no-working-trees', '',
2076
mode=self.bzrdir._get_file_mode())
2078
def make_working_trees(self):
2079
"""Returns the policy for making working trees on new branches."""
2080
return not self._transport.has('no-working-trees')
2083
class MetaDirVersionedFileRepository(MetaDirRepository):
2084
"""Repositories in a meta-dir, that work via versioned file objects."""
2086
def __init__(self, _format, a_bzrdir, control_files):
2087
super(MetaDirVersionedFileRepository, self).__init__(_format, a_bzrdir,
2091
class RepositoryFormatRegistry(registry.Registry):
2092
"""Registry of RepositoryFormats."""
2094
def get(self, format_string):
2095
r = registry.Registry.get(self, format_string)
2101
format_registry = RepositoryFormatRegistry()
2102
"""Registry of formats, indexed by their identifying format string.
2104
This can contain either format instances themselves, or classes/factories that
2105
can be called to obtain one.
2109
#####################################################################
2110
# Repository Formats
2112
class RepositoryFormat(object):
2113
"""A repository format.
2115
Formats provide three things:
2116
* An initialization routine to construct repository data on disk.
2117
* a format string which is used when the BzrDir supports versioned
2119
* an open routine which returns a Repository instance.
2121
There is one and only one Format subclass for each on-disk format. But
2122
there can be one Repository subclass that is used for several different
2123
formats. The _format attribute on a Repository instance can be used to
2124
determine the disk format.
2126
Formats are placed in an dict by their format string for reference
2127
during opening. These should be subclasses of RepositoryFormat
2130
Once a format is deprecated, just deprecate the initialize and open
2131
methods on the format class. Do not deprecate the object, as the
2132
object will be created every system load.
2134
Common instance attributes:
2135
_matchingbzrdir - the bzrdir format that the repository format was
2136
originally written to work with. This can be used if manually
2137
constructing a bzrdir and repository, or more commonly for test suite
2141
# Set to True or False in derived classes. True indicates that the format
2142
# supports ghosts gracefully.
2143
supports_ghosts = None
2144
# Can this repository be given external locations to lookup additional
2145
# data. Set to True or False in derived classes.
2146
supports_external_lookups = None
2149
return "<%s>" % self.__class__.__name__
2151
def __eq__(self, other):
2152
# format objects are generally stateless
2153
return isinstance(other, self.__class__)
2155
def __ne__(self, other):
2156
return not self == other
2159
def find_format(klass, a_bzrdir):
2160
"""Return the format for the repository object in a_bzrdir.
2162
This is used by bzr native formats that have a "format" file in
2163
the repository. Other methods may be used by different types of
2167
transport = a_bzrdir.get_repository_transport(None)
2168
format_string = transport.get("format").read()
2169
return format_registry.get(format_string)
2170
except errors.NoSuchFile:
2171
raise errors.NoRepositoryPresent(a_bzrdir)
2173
raise errors.UnknownFormatError(format=format_string,
2177
def register_format(klass, format):
2178
format_registry.register(format.get_format_string(), format)
2181
def unregister_format(klass, format):
2182
format_registry.remove(format.get_format_string())
2185
def get_default_format(klass):
2186
"""Return the current default format."""
2187
from bzrlib import bzrdir
2188
return bzrdir.format_registry.make_bzrdir('default').repository_format
2190
def get_format_string(self):
2191
"""Return the ASCII format string that identifies this format.
2193
Note that in pre format ?? repositories the format string is
2194
not permitted nor written to disk.
2196
raise NotImplementedError(self.get_format_string)
2198
def get_format_description(self):
2199
"""Return the short description for this format."""
2200
raise NotImplementedError(self.get_format_description)
2202
# TODO: this shouldn't be in the base class, it's specific to things that
2203
# use weaves or knits -- mbp 20070207
2204
def _get_versioned_file_store(self,
2209
versionedfile_class=None,
2210
versionedfile_kwargs={},
2212
if versionedfile_class is None:
2213
versionedfile_class = self._versionedfile_class
2214
weave_transport = control_files._transport.clone(name)
2215
dir_mode = control_files._dir_mode
2216
file_mode = control_files._file_mode
2217
return VersionedFileStore(weave_transport, prefixed=prefixed,
2219
file_mode=file_mode,
2220
versionedfile_class=versionedfile_class,
2221
versionedfile_kwargs=versionedfile_kwargs,
2224
def initialize(self, a_bzrdir, shared=False):
2225
"""Initialize a repository of this format in a_bzrdir.
2227
:param a_bzrdir: The bzrdir to put the new repository in it.
2228
:param shared: The repository should be initialized as a sharable one.
2229
:returns: The new repository object.
2231
This may raise UninitializableFormat if shared repository are not
2232
compatible the a_bzrdir.
2234
raise NotImplementedError(self.initialize)
2236
def is_supported(self):
2237
"""Is this format supported?
2239
Supported formats must be initializable and openable.
2240
Unsupported formats may not support initialization or committing or
2241
some other features depending on the reason for not being supported.
2245
def check_conversion_target(self, target_format):
2246
raise NotImplementedError(self.check_conversion_target)
2248
def open(self, a_bzrdir, _found=False):
2249
"""Return an instance of this format for the bzrdir a_bzrdir.
2251
_found is a private parameter, do not use it.
2253
raise NotImplementedError(self.open)
2256
class MetaDirRepositoryFormat(RepositoryFormat):
2257
"""Common base class for the new repositories using the metadir layout."""
2259
rich_root_data = False
2260
supports_tree_reference = False
2261
supports_external_lookups = False
2264
def _matchingbzrdir(self):
2265
matching = bzrdir.BzrDirMetaFormat1()
2266
matching.repository_format = self
2270
super(MetaDirRepositoryFormat, self).__init__()
2272
def _create_control_files(self, a_bzrdir):
2273
"""Create the required files and the initial control_files object."""
2274
# FIXME: RBC 20060125 don't peek under the covers
2275
# NB: no need to escape relative paths that are url safe.
2276
repository_transport = a_bzrdir.get_repository_transport(self)
2277
control_files = lockable_files.LockableFiles(repository_transport,
2278
'lock', lockdir.LockDir)
2279
control_files.create_lock()
2280
return control_files
2282
def _upload_blank_content(self, a_bzrdir, dirs, files, utf8_files, shared):
2283
"""Upload the initial blank content."""
2284
control_files = self._create_control_files(a_bzrdir)
2285
control_files.lock_write()
2286
transport = control_files._transport
2288
utf8_files += [('shared-storage', '')]
2290
transport.mkdir_multi(dirs, mode=a_bzrdir._get_dir_mode())
2291
for (filename, content_stream) in files:
2292
transport.put_file(filename, content_stream,
2293
mode=a_bzrdir._get_file_mode())
2294
for (filename, content_bytes) in utf8_files:
2295
transport.put_bytes_non_atomic(filename, content_bytes,
2296
mode=a_bzrdir._get_file_mode())
2298
control_files.unlock()
2301
# formats which have no format string are not discoverable
2302
# and not independently creatable, so are not registered. They're
2303
# all in bzrlib.repofmt.weaverepo now. When an instance of one of these is
2304
# needed, it's constructed directly by the BzrDir. Non-native formats where
2305
# the repository is not separately opened are similar.
2307
format_registry.register_lazy(
2308
'Bazaar-NG Repository format 7',
2309
'bzrlib.repofmt.weaverepo',
2313
format_registry.register_lazy(
2314
'Bazaar-NG Knit Repository Format 1',
2315
'bzrlib.repofmt.knitrepo',
2316
'RepositoryFormatKnit1',
2319
format_registry.register_lazy(
2320
'Bazaar Knit Repository Format 3 (bzr 0.15)\n',
2321
'bzrlib.repofmt.knitrepo',
2322
'RepositoryFormatKnit3',
2325
format_registry.register_lazy(
2326
'Bazaar Knit Repository Format 4 (bzr 1.0)\n',
2327
'bzrlib.repofmt.knitrepo',
2328
'RepositoryFormatKnit4',
2331
# Pack-based formats. There is one format for pre-subtrees, and one for
2332
# post-subtrees to allow ease of testing.
2333
# NOTE: These are experimental in 0.92. Stable in 1.0 and above
2334
format_registry.register_lazy(
2335
'Bazaar pack repository format 1 (needs bzr 0.92)\n',
2336
'bzrlib.repofmt.pack_repo',
2337
'RepositoryFormatKnitPack1',
2339
format_registry.register_lazy(
2340
'Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n',
2341
'bzrlib.repofmt.pack_repo',
2342
'RepositoryFormatKnitPack3',
2344
format_registry.register_lazy(
2345
'Bazaar pack repository format 1 with rich root (needs bzr 1.0)\n',
2346
'bzrlib.repofmt.pack_repo',
2347
'RepositoryFormatKnitPack4',
2349
format_registry.register_lazy(
2350
'Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n',
2351
'bzrlib.repofmt.pack_repo',
2352
'RepositoryFormatKnitPack5',
2354
format_registry.register_lazy(
2355
'Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n',
2356
'bzrlib.repofmt.pack_repo',
2357
'RepositoryFormatKnitPack5RichRoot',
2359
format_registry.register_lazy(
2360
'Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n',
2361
'bzrlib.repofmt.pack_repo',
2362
'RepositoryFormatKnitPack5RichRootBroken',
2364
format_registry.register_lazy(
2365
'Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n',
2366
'bzrlib.repofmt.pack_repo',
2367
'RepositoryFormatKnitPack6',
2369
format_registry.register_lazy(
2370
'Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n',
2371
'bzrlib.repofmt.pack_repo',
2372
'RepositoryFormatKnitPack6RichRoot',
2375
# Development formats.
2376
# 1.7->1.8 go below here
2377
format_registry.register_lazy(
2378
"Bazaar development format 2 (needs bzr.dev from before 1.8)\n",
2379
'bzrlib.repofmt.pack_repo',
2380
'RepositoryFormatPackDevelopment2',
2382
format_registry.register_lazy(
2383
("Bazaar development format 2 with subtree support "
2384
"(needs bzr.dev from before 1.8)\n"),
2385
'bzrlib.repofmt.pack_repo',
2386
'RepositoryFormatPackDevelopment2Subtree',
2390
class InterRepository(InterObject):
2391
"""This class represents operations taking place between two repositories.
2393
Its instances have methods like copy_content and fetch, and contain
2394
references to the source and target repositories these operations can be
2397
Often we will provide convenience methods on 'repository' which carry out
2398
operations with another repository - they will always forward to
2399
InterRepository.get(other).method_name(parameters).
2402
_walk_to_common_revisions_batch_size = 1
2404
"""The available optimised InterRepository types."""
2406
def __init__(self, source, target):
2407
InterObject.__init__(self, source, target)
2408
# These two attributes may be overridden by e.g. InterOtherToRemote to
2409
# provide a faster implementation.
2410
self.target_get_graph = self.target.get_graph
2411
self.target_get_parent_map = self.target.get_parent_map
2413
def copy_content(self, revision_id=None):
2414
raise NotImplementedError(self.copy_content)
2416
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2417
"""Fetch the content required to construct revision_id.
2419
The content is copied from self.source to self.target.
2421
:param revision_id: if None all content is copied, if NULL_REVISION no
2423
:param pb: optional progress bar to use for progress reports. If not
2424
provided a default one will be created.
2426
:returns: (copied_revision_count, failures).
2428
# Normally we should find a specific InterRepository subclass to do
2429
# the fetch; if nothing else then at least InterSameDataRepository.
2430
# If none of them is suitable it looks like fetching is not possible;
2431
# we try to give a good message why. _assert_same_model will probably
2432
# give a helpful message; otherwise a generic one.
2433
self._assert_same_model(self.source, self.target)
2434
raise errors.IncompatibleRepositories(self.source, self.target,
2435
"no suitableInterRepository found")
2437
def _walk_to_common_revisions(self, revision_ids):
2438
"""Walk out from revision_ids in source to revisions target has.
2440
:param revision_ids: The start point for the search.
2441
:return: A set of revision ids.
2443
target_graph = self.target_get_graph()
2444
revision_ids = frozenset(revision_ids)
2445
# Fast path for the case where all the revisions are already in the
2447
# (Although this does incur an extra round trip for the
2448
# fairly common case where the target doesn't already have the revision
2450
if set(target_graph.get_parent_map(revision_ids)) == revision_ids:
2451
return graph.SearchResult(revision_ids, set(), 0, set())
2452
missing_revs = set()
2453
source_graph = self.source.get_graph()
2454
# ensure we don't pay silly lookup costs.
2455
searcher = source_graph._make_breadth_first_searcher(revision_ids)
2456
null_set = frozenset([_mod_revision.NULL_REVISION])
2457
searcher_exhausted = False
2461
# Iterate the searcher until we have enough next_revs
2462
while len(next_revs) < self._walk_to_common_revisions_batch_size:
2464
next_revs_part, ghosts_part = searcher.next_with_ghosts()
2465
next_revs.update(next_revs_part)
2466
ghosts.update(ghosts_part)
2467
except StopIteration:
2468
searcher_exhausted = True
2470
# If there are ghosts in the source graph, and the caller asked for
2471
# them, make sure that they are present in the target.
2472
# We don't care about other ghosts as we can't fetch them and
2473
# haven't been asked to.
2474
ghosts_to_check = set(revision_ids.intersection(ghosts))
2475
revs_to_get = set(next_revs).union(ghosts_to_check)
2477
have_revs = set(target_graph.get_parent_map(revs_to_get))
2478
# we always have NULL_REVISION present.
2479
have_revs = have_revs.union(null_set)
2480
# Check if the target is missing any ghosts we need.
2481
ghosts_to_check.difference_update(have_revs)
2483
# One of the caller's revision_ids is a ghost in both the
2484
# source and the target.
2485
raise errors.NoSuchRevision(
2486
self.source, ghosts_to_check.pop())
2487
missing_revs.update(next_revs - have_revs)
2488
# Because we may have walked past the original stop point, make
2489
# sure everything is stopped
2490
stop_revs = searcher.find_seen_ancestors(have_revs)
2491
searcher.stop_searching_any(stop_revs)
2492
if searcher_exhausted:
2494
return searcher.get_result()
2496
@deprecated_method(one_two)
2498
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2499
"""Return the revision ids that source has that target does not.
2501
These are returned in topological order.
2503
:param revision_id: only return revision ids included by this
2505
:param find_ghosts: If True find missing revisions in deep history
2506
rather than just finding the surface difference.
2508
return list(self.search_missing_revision_ids(
2509
revision_id, find_ghosts).get_keys())
2512
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2513
"""Return the revision ids that source has that target does not.
2515
:param revision_id: only return revision ids included by this
2517
:param find_ghosts: If True find missing revisions in deep history
2518
rather than just finding the surface difference.
2519
:return: A bzrlib.graph.SearchResult.
2521
# stop searching at found target revisions.
2522
if not find_ghosts and revision_id is not None:
2523
return self._walk_to_common_revisions([revision_id])
2524
# generic, possibly worst case, slow code path.
2525
target_ids = set(self.target.all_revision_ids())
2526
if revision_id is not None:
2527
source_ids = self.source.get_ancestry(revision_id)
2528
if source_ids[0] is not None:
2529
raise AssertionError()
2532
source_ids = self.source.all_revision_ids()
2533
result_set = set(source_ids).difference(target_ids)
2534
return self.source.revision_ids_to_search_result(result_set)
2537
def _same_model(source, target):
2538
"""True if source and target have the same data representation.
2540
Note: this is always called on the base class; overriding it in a
2541
subclass will have no effect.
2544
InterRepository._assert_same_model(source, target)
2546
except errors.IncompatibleRepositories, e:
2550
def _assert_same_model(source, target):
2551
"""Raise an exception if two repositories do not use the same model.
2553
if source.supports_rich_root() != target.supports_rich_root():
2554
raise errors.IncompatibleRepositories(source, target,
2555
"different rich-root support")
2556
if source._serializer != target._serializer:
2557
raise errors.IncompatibleRepositories(source, target,
2558
"different serializers")
2561
class InterSameDataRepository(InterRepository):
2562
"""Code for converting between repositories that represent the same data.
2564
Data format and model must match for this to work.
2568
def _get_repo_format_to_test(self):
2569
"""Repository format for testing with.
2571
InterSameData can pull from subtree to subtree and from non-subtree to
2572
non-subtree, so we test this with the richest repository format.
2574
from bzrlib.repofmt import knitrepo
2575
return knitrepo.RepositoryFormatKnit3()
2578
def is_compatible(source, target):
2579
return InterRepository._same_model(source, target)
2582
def copy_content(self, revision_id=None):
2583
"""Make a complete copy of the content in self into destination.
2585
This copies both the repository's revision data, and configuration information
2586
such as the make_working_trees setting.
2588
This is a destructive operation! Do not use it on existing
2591
:param revision_id: Only copy the content needed to construct
2592
revision_id and its parents.
2595
self.target.set_make_working_trees(self.source.make_working_trees())
2596
except NotImplementedError:
2598
# but don't bother fetching if we have the needed data now.
2599
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2600
self.target.has_revision(revision_id)):
2602
self.target.fetch(self.source, revision_id=revision_id)
2605
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2606
"""See InterRepository.fetch()."""
2607
from bzrlib.fetch import RepoFetcher
2608
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2609
self.source, self.source._format, self.target,
2610
self.target._format)
2611
f = RepoFetcher(to_repository=self.target,
2612
from_repository=self.source,
2613
last_revision=revision_id,
2614
pb=pb, find_ghosts=find_ghosts)
2615
return f.count_copied, f.failed_revisions
2618
class InterWeaveRepo(InterSameDataRepository):
2619
"""Optimised code paths between Weave based repositories.
2621
This should be in bzrlib/repofmt/weaverepo.py but we have not yet
2622
implemented lazy inter-object optimisation.
2626
def _get_repo_format_to_test(self):
2627
from bzrlib.repofmt import weaverepo
2628
return weaverepo.RepositoryFormat7()
2631
def is_compatible(source, target):
2632
"""Be compatible with known Weave formats.
2634
We don't test for the stores being of specific types because that
2635
could lead to confusing results, and there is no need to be
2638
from bzrlib.repofmt.weaverepo import (
2644
return (isinstance(source._format, (RepositoryFormat5,
2646
RepositoryFormat7)) and
2647
isinstance(target._format, (RepositoryFormat5,
2649
RepositoryFormat7)))
2650
except AttributeError:
2654
def copy_content(self, revision_id=None):
2655
"""See InterRepository.copy_content()."""
2656
# weave specific optimised path:
2658
self.target.set_make_working_trees(self.source.make_working_trees())
2659
except (errors.RepositoryUpgradeRequired, NotImplemented):
2661
# FIXME do not peek!
2662
if self.source._transport.listable():
2663
pb = ui.ui_factory.nested_progress_bar()
2665
self.target.texts.insert_record_stream(
2666
self.source.texts.get_record_stream(
2667
self.source.texts.keys(), 'topological', False))
2668
pb.update('copying inventory', 0, 1)
2669
self.target.inventories.insert_record_stream(
2670
self.source.inventories.get_record_stream(
2671
self.source.inventories.keys(), 'topological', False))
2672
self.target.signatures.insert_record_stream(
2673
self.source.signatures.get_record_stream(
2674
self.source.signatures.keys(),
2676
self.target.revisions.insert_record_stream(
2677
self.source.revisions.get_record_stream(
2678
self.source.revisions.keys(),
2679
'topological', True))
2683
self.target.fetch(self.source, revision_id=revision_id)
2686
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2687
"""See InterRepository.fetch()."""
2688
from bzrlib.fetch import RepoFetcher
2689
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2690
self.source, self.source._format, self.target, self.target._format)
2691
f = RepoFetcher(to_repository=self.target,
2692
from_repository=self.source,
2693
last_revision=revision_id,
2694
pb=pb, find_ghosts=find_ghosts)
2695
return f.count_copied, f.failed_revisions
2698
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2699
"""See InterRepository.missing_revision_ids()."""
2700
# we want all revisions to satisfy revision_id in source.
2701
# but we don't want to stat every file here and there.
2702
# we want then, all revisions other needs to satisfy revision_id
2703
# checked, but not those that we have locally.
2704
# so the first thing is to get a subset of the revisions to
2705
# satisfy revision_id in source, and then eliminate those that
2706
# we do already have.
2707
# this is slow on high latency connection to self, but as as this
2708
# disk format scales terribly for push anyway due to rewriting
2709
# inventory.weave, this is considered acceptable.
2711
if revision_id is not None:
2712
source_ids = self.source.get_ancestry(revision_id)
2713
if source_ids[0] is not None:
2714
raise AssertionError()
2717
source_ids = self.source._all_possible_ids()
2718
source_ids_set = set(source_ids)
2719
# source_ids is the worst possible case we may need to pull.
2720
# now we want to filter source_ids against what we actually
2721
# have in target, but don't try to check for existence where we know
2722
# we do not have a revision as that would be pointless.
2723
target_ids = set(self.target._all_possible_ids())
2724
possibly_present_revisions = target_ids.intersection(source_ids_set)
2725
actually_present_revisions = set(
2726
self.target._eliminate_revisions_not_present(possibly_present_revisions))
2727
required_revisions = source_ids_set.difference(actually_present_revisions)
2728
if revision_id is not None:
2729
# we used get_ancestry to determine source_ids then we are assured all
2730
# revisions referenced are present as they are installed in topological order.
2731
# and the tip revision was validated by get_ancestry.
2732
result_set = required_revisions
2734
# if we just grabbed the possibly available ids, then
2735
# we only have an estimate of whats available and need to validate
2736
# that against the revision records.
2738
self.source._eliminate_revisions_not_present(required_revisions))
2739
return self.source.revision_ids_to_search_result(result_set)
2742
class InterKnitRepo(InterSameDataRepository):
2743
"""Optimised code paths between Knit based repositories."""
2746
def _get_repo_format_to_test(self):
2747
from bzrlib.repofmt import knitrepo
2748
return knitrepo.RepositoryFormatKnit1()
2751
def is_compatible(source, target):
2752
"""Be compatible with known Knit formats.
2754
We don't test for the stores being of specific types because that
2755
could lead to confusing results, and there is no need to be
2758
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
2760
are_knits = (isinstance(source._format, RepositoryFormatKnit) and
2761
isinstance(target._format, RepositoryFormatKnit))
2762
except AttributeError:
2764
return are_knits and InterRepository._same_model(source, target)
2767
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2768
"""See InterRepository.fetch()."""
2769
from bzrlib.fetch import RepoFetcher
2770
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2771
self.source, self.source._format, self.target, self.target._format)
2772
f = RepoFetcher(to_repository=self.target,
2773
from_repository=self.source,
2774
last_revision=revision_id,
2775
pb=pb, find_ghosts=find_ghosts)
2776
return f.count_copied, f.failed_revisions
2779
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2780
"""See InterRepository.missing_revision_ids()."""
2781
if revision_id is not None:
2782
source_ids = self.source.get_ancestry(revision_id)
2783
if source_ids[0] is not None:
2784
raise AssertionError()
2787
source_ids = self.source.all_revision_ids()
2788
source_ids_set = set(source_ids)
2789
# source_ids is the worst possible case we may need to pull.
2790
# now we want to filter source_ids against what we actually
2791
# have in target, but don't try to check for existence where we know
2792
# we do not have a revision as that would be pointless.
2793
target_ids = set(self.target.all_revision_ids())
2794
possibly_present_revisions = target_ids.intersection(source_ids_set)
2795
actually_present_revisions = set(
2796
self.target._eliminate_revisions_not_present(possibly_present_revisions))
2797
required_revisions = source_ids_set.difference(actually_present_revisions)
2798
if revision_id is not None:
2799
# we used get_ancestry to determine source_ids then we are assured all
2800
# revisions referenced are present as they are installed in topological order.
2801
# and the tip revision was validated by get_ancestry.
2802
result_set = required_revisions
2804
# if we just grabbed the possibly available ids, then
2805
# we only have an estimate of whats available and need to validate
2806
# that against the revision records.
2808
self.source._eliminate_revisions_not_present(required_revisions))
2809
return self.source.revision_ids_to_search_result(result_set)
2812
class InterPackRepo(InterSameDataRepository):
2813
"""Optimised code paths between Pack based repositories."""
2816
def _get_repo_format_to_test(self):
2817
from bzrlib.repofmt import pack_repo
2818
return pack_repo.RepositoryFormatKnitPack1()
2821
def is_compatible(source, target):
2822
"""Be compatible with known Pack formats.
2824
We don't test for the stores being of specific types because that
2825
could lead to confusing results, and there is no need to be
2828
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
2830
are_packs = (isinstance(source._format, RepositoryFormatPack) and
2831
isinstance(target._format, RepositoryFormatPack))
2832
except AttributeError:
2834
return are_packs and InterRepository._same_model(source, target)
2837
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2838
"""See InterRepository.fetch()."""
2839
if (len(self.source._fallback_repositories) > 0 or
2840
len(self.target._fallback_repositories) > 0):
2841
# The pack layer is not aware of fallback repositories, so when
2842
# fetching from a stacked repository or into a stacked repository
2843
# we use the generic fetch logic which uses the VersionedFiles
2844
# attributes on repository.
2845
from bzrlib.fetch import RepoFetcher
2846
fetcher = RepoFetcher(self.target, self.source, revision_id,
2848
return fetcher.count_copied, fetcher.failed_revisions
2849
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2850
self.source, self.source._format, self.target, self.target._format)
2851
self.count_copied = 0
2852
if revision_id is None:
2854
# everything to do - use pack logic
2855
# to fetch from all packs to one without
2856
# inventory parsing etc, IFF nothing to be copied is in the target.
2858
source_revision_ids = frozenset(self.source.all_revision_ids())
2859
revision_ids = source_revision_ids - \
2860
frozenset(self.target_get_parent_map(source_revision_ids))
2861
revision_keys = [(revid,) for revid in revision_ids]
2862
target_pack_collection = self._get_target_pack_collection()
2863
index = target_pack_collection.revision_index.combined_index
2864
present_revision_ids = set(item[1][0] for item in
2865
index.iter_entries(revision_keys))
2866
revision_ids = set(revision_ids) - present_revision_ids
2867
# implementing the TODO will involve:
2868
# - detecting when all of a pack is selected
2869
# - avoiding as much as possible pre-selection, so the
2870
# more-core routines such as create_pack_from_packs can filter in
2871
# a just-in-time fashion. (though having a HEADS list on a
2872
# repository might make this a lot easier, because we could
2873
# sensibly detect 'new revisions' without doing a full index scan.
2874
elif _mod_revision.is_null(revision_id):
2879
revision_ids = self.search_missing_revision_ids(revision_id,
2880
find_ghosts=find_ghosts).get_keys()
2881
except errors.NoSuchRevision:
2882
raise errors.InstallFailed([revision_id])
2883
if len(revision_ids) == 0:
2885
return self._pack(self.source, self.target, revision_ids)
2887
def _pack(self, source, target, revision_ids):
2888
from bzrlib.repofmt.pack_repo import Packer
2889
target_pack_collection = self._get_target_pack_collection()
2890
packs = source._pack_collection.all_packs()
2891
pack = Packer(target_pack_collection, packs, '.fetch',
2892
revision_ids).pack()
2893
if pack is not None:
2894
target_pack_collection._save_pack_names()
2895
copied_revs = pack.get_revision_count()
2896
# Trigger an autopack. This may duplicate effort as we've just done
2897
# a pack creation, but for now it is simpler to think about as
2898
# 'upload data, then repack if needed'.
2900
return (copied_revs, [])
2904
def _autopack(self):
2905
self.target._pack_collection.autopack()
2907
def _get_target_pack_collection(self):
2908
return self.target._pack_collection
2911
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2912
"""See InterRepository.missing_revision_ids().
2914
:param find_ghosts: Find ghosts throughout the ancestry of
2917
if not find_ghosts and revision_id is not None:
2918
return self._walk_to_common_revisions([revision_id])
2919
elif revision_id is not None:
2920
# Find ghosts: search for revisions pointing from one repository to
2921
# the other, and vice versa, anywhere in the history of revision_id.
2922
graph = self.target_get_graph(other_repository=self.source)
2923
searcher = graph._make_breadth_first_searcher([revision_id])
2927
next_revs, ghosts = searcher.next_with_ghosts()
2928
except StopIteration:
2930
if revision_id in ghosts:
2931
raise errors.NoSuchRevision(self.source, revision_id)
2932
found_ids.update(next_revs)
2933
found_ids.update(ghosts)
2934
found_ids = frozenset(found_ids)
2935
# Double query here: should be able to avoid this by changing the
2936
# graph api further.
2937
result_set = found_ids - frozenset(
2938
self.target_get_parent_map(found_ids))
2940
source_ids = self.source.all_revision_ids()
2941
# source_ids is the worst possible case we may need to pull.
2942
# now we want to filter source_ids against what we actually
2943
# have in target, but don't try to check for existence where we know
2944
# we do not have a revision as that would be pointless.
2945
target_ids = set(self.target.all_revision_ids())
2946
result_set = set(source_ids).difference(target_ids)
2947
return self.source.revision_ids_to_search_result(result_set)
2950
class InterModel1and2(InterRepository):
2953
def _get_repo_format_to_test(self):
2957
def is_compatible(source, target):
2958
if not source.supports_rich_root() and target.supports_rich_root():
2964
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2965
"""See InterRepository.fetch()."""
2966
from bzrlib.fetch import Model1toKnit2Fetcher
2967
f = Model1toKnit2Fetcher(to_repository=self.target,
2968
from_repository=self.source,
2969
last_revision=revision_id,
2970
pb=pb, find_ghosts=find_ghosts)
2971
return f.count_copied, f.failed_revisions
2974
def copy_content(self, revision_id=None):
2975
"""Make a complete copy of the content in self into destination.
2977
This is a destructive operation! Do not use it on existing
2980
:param revision_id: Only copy the content needed to construct
2981
revision_id and its parents.
2984
self.target.set_make_working_trees(self.source.make_working_trees())
2985
except NotImplementedError:
2987
# but don't bother fetching if we have the needed data now.
2988
if (revision_id not in (None, _mod_revision.NULL_REVISION) and
2989
self.target.has_revision(revision_id)):
2991
self.target.fetch(self.source, revision_id=revision_id)
2994
class InterKnit1and2(InterKnitRepo):
2997
def _get_repo_format_to_test(self):
3001
def is_compatible(source, target):
3002
"""Be compatible with Knit1 source and Knit3 target"""
3004
from bzrlib.repofmt.knitrepo import (
3005
RepositoryFormatKnit1,
3006
RepositoryFormatKnit3,
3008
from bzrlib.repofmt.pack_repo import (
3009
RepositoryFormatKnitPack1,
3010
RepositoryFormatKnitPack3,
3011
RepositoryFormatKnitPack4,
3012
RepositoryFormatKnitPack5,
3013
RepositoryFormatKnitPack5RichRoot,
3014
RepositoryFormatKnitPack6,
3015
RepositoryFormatKnitPack6RichRoot,
3016
RepositoryFormatPackDevelopment2,
3017
RepositoryFormatPackDevelopment2Subtree,
3020
RepositoryFormatKnit1, # no rr, no subtree
3021
RepositoryFormatKnitPack1, # no rr, no subtree
3022
RepositoryFormatPackDevelopment2, # no rr, no subtree
3023
RepositoryFormatKnitPack5, # no rr, no subtree
3024
RepositoryFormatKnitPack6, # no rr, no subtree
3027
RepositoryFormatKnit3, # rr, subtree
3028
RepositoryFormatKnitPack3, # rr, subtree
3029
RepositoryFormatKnitPack4, # rr, no subtree
3030
RepositoryFormatKnitPack5RichRoot,# rr, no subtree
3031
RepositoryFormatKnitPack6RichRoot,# rr, no subtree
3032
RepositoryFormatPackDevelopment2Subtree, # rr, subtree
3034
for format in norichroot:
3035
if format.rich_root_data:
3036
raise AssertionError('Format %s is a rich-root format'
3037
' but is included in the non-rich-root list'
3039
for format in richroot:
3040
if not format.rich_root_data:
3041
raise AssertionError('Format %s is not a rich-root format'
3042
' but is included in the rich-root list'
3044
# TODO: One alternative is to just check format.rich_root_data,
3045
# instead of keeping membership lists. However, the formats
3046
# *also* have to use the same 'Knit' style of storage
3047
# (line-deltas, fulltexts, etc.)
3048
return (isinstance(source._format, norichroot) and
3049
isinstance(target._format, richroot))
3050
except AttributeError:
3054
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3055
"""See InterRepository.fetch()."""
3056
from bzrlib.fetch import Knit1to2Fetcher
3057
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
3058
self.source, self.source._format, self.target,
3059
self.target._format)
3060
f = Knit1to2Fetcher(to_repository=self.target,
3061
from_repository=self.source,
3062
last_revision=revision_id,
3063
pb=pb, find_ghosts=find_ghosts)
3064
return f.count_copied, f.failed_revisions
3067
class InterDifferingSerializer(InterKnitRepo):
3070
def _get_repo_format_to_test(self):
3074
def is_compatible(source, target):
3075
"""Be compatible with Knit2 source and Knit3 target"""
3076
if source.supports_rich_root() != target.supports_rich_root():
3078
# Ideally, we'd support fetching if the source had no tree references
3079
# even if it supported them...
3080
if (getattr(source, '_format.supports_tree_reference', False) and
3081
not getattr(target, '_format.supports_tree_reference', False)):
3086
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3087
"""See InterRepository.fetch()."""
3088
revision_ids = self.target.search_missing_revision_ids(self.source,
3089
revision_id, find_ghosts=find_ghosts).get_keys()
3090
revision_ids = tsort.topo_sort(
3091
self.source.get_graph().get_parent_map(revision_ids))
3092
def revisions_iterator():
3093
rev_ids = list(revision_ids)
3094
for offset in xrange(0, len(rev_ids), 100):
3095
current_revids = rev_ids[offset:offset+100]
3096
revisions = self.source.get_revisions(current_revids)
3097
trees = self.source.revision_trees(current_revids)
3098
keys = [(r,) for r in current_revids]
3099
sig_stream = self.source.signatures.get_record_stream(
3100
keys, 'unordered', True)
3102
for record in versionedfile.filter_absent(sig_stream):
3103
sigs[record.key[0]] = record.get_bytes_as('fulltext')
3104
for rev, tree in zip(revisions, trees):
3105
yield rev, tree, sigs.get(rev.revision_id, None)
3107
my_pb = ui.ui_factory.nested_progress_bar()
3112
install_revisions(self.target, revisions_iterator(),
3113
len(revision_ids), pb)
3115
if my_pb is not None:
3117
return len(revision_ids), 0
3120
class InterOtherToRemote(InterRepository):
3121
"""An InterRepository that simply delegates to the 'real' InterRepository
3122
calculated for (source, target._real_repository).
3125
_walk_to_common_revisions_batch_size = 50
3127
def __init__(self, source, target):
3128
InterRepository.__init__(self, source, target)
3129
self._real_inter = None
3132
def is_compatible(source, target):
3133
if isinstance(target, remote.RemoteRepository):
3137
def _ensure_real_inter(self):
3138
if self._real_inter is None:
3139
self.target._ensure_real()
3140
real_target = self.target._real_repository
3141
self._real_inter = InterRepository.get(self.source, real_target)
3142
# Make _real_inter use the RemoteRepository for get_parent_map
3143
self._real_inter.target_get_graph = self.target.get_graph
3144
self._real_inter.target_get_parent_map = self.target.get_parent_map
3146
def copy_content(self, revision_id=None):
3147
self._ensure_real_inter()
3148
self._real_inter.copy_content(revision_id=revision_id)
3150
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3151
self._ensure_real_inter()
3152
return self._real_inter.fetch(revision_id=revision_id, pb=pb,
3153
find_ghosts=find_ghosts)
3156
def _get_repo_format_to_test(self):
3160
class InterRemoteToOther(InterRepository):
3162
def __init__(self, source, target):
3163
InterRepository.__init__(self, source, target)
3164
self._real_inter = None
3167
def is_compatible(source, target):
3168
if not isinstance(source, remote.RemoteRepository):
3170
# Is source's model compatible with target's model?
3171
source._ensure_real()
3172
real_source = source._real_repository
3173
if isinstance(real_source, remote.RemoteRepository):
3174
raise NotImplementedError(
3175
"We don't support remote repos backed by remote repos yet.")
3176
return InterRepository._same_model(real_source, target)
3178
def _ensure_real_inter(self):
3179
if self._real_inter is None:
3180
self.source._ensure_real()
3181
real_source = self.source._real_repository
3182
self._real_inter = InterRepository.get(real_source, self.target)
3184
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
3185
self._ensure_real_inter()
3186
return self._real_inter.fetch(revision_id=revision_id, pb=pb,
3187
find_ghosts=find_ghosts)
3189
def copy_content(self, revision_id=None):
3190
self._ensure_real_inter()
3191
self._real_inter.copy_content(revision_id=revision_id)
3194
def _get_repo_format_to_test(self):
3199
class InterPackToRemotePack(InterPackRepo):
3200
"""A specialisation of InterPackRepo for a target that is a
3203
This will use the get_parent_map RPC rather than plain readvs, and also
3204
uses an RPC for autopacking.
3207
_walk_to_common_revisions_batch_size = 50
3210
def is_compatible(source, target):
3211
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
3212
if isinstance(source._format, RepositoryFormatPack):
3213
if isinstance(target, remote.RemoteRepository):
3214
target._ensure_real()
3215
if isinstance(target._real_repository._format,
3216
RepositoryFormatPack):
3217
if InterRepository._same_model(source, target):
3221
def _autopack(self):
3222
self.target.autopack()
3224
def _get_target_pack_collection(self):
3225
return self.target._real_repository._pack_collection
3228
def _get_repo_format_to_test(self):
3232
InterRepository.register_optimiser(InterDifferingSerializer)
3233
InterRepository.register_optimiser(InterSameDataRepository)
3234
InterRepository.register_optimiser(InterWeaveRepo)
3235
InterRepository.register_optimiser(InterKnitRepo)
3236
InterRepository.register_optimiser(InterModel1and2)
3237
InterRepository.register_optimiser(InterKnit1and2)
3238
InterRepository.register_optimiser(InterPackRepo)
3239
InterRepository.register_optimiser(InterOtherToRemote)
3240
InterRepository.register_optimiser(InterRemoteToOther)
3241
InterRepository.register_optimiser(InterPackToRemotePack)
3244
class CopyConverter(object):
3245
"""A repository conversion tool which just performs a copy of the content.
3247
This is slow but quite reliable.
3250
def __init__(self, target_format):
3251
"""Create a CopyConverter.
3253
:param target_format: The format the resulting repository should be.
3255
self.target_format = target_format
3257
def convert(self, repo, pb):
3258
"""Perform the conversion of to_convert, giving feedback via pb.
3260
:param to_convert: The disk object to convert.
3261
:param pb: a progress bar to use for progress information.
3266
# this is only useful with metadir layouts - separated repo content.
3267
# trigger an assertion if not such
3268
repo._format.get_format_string()
3269
self.repo_dir = repo.bzrdir
3270
self.step('Moving repository to repository.backup')
3271
self.repo_dir.transport.move('repository', 'repository.backup')
3272
backup_transport = self.repo_dir.transport.clone('repository.backup')
3273
repo._format.check_conversion_target(self.target_format)
3274
self.source_repo = repo._format.open(self.repo_dir,
3276
_override_transport=backup_transport)
3277
self.step('Creating new repository')
3278
converted = self.target_format.initialize(self.repo_dir,
3279
self.source_repo.is_shared())
3280
converted.lock_write()
3282
self.step('Copying content into repository.')
3283
self.source_repo.copy_content_into(converted)
3286
self.step('Deleting old repository content.')
3287
self.repo_dir.transport.delete_tree('repository.backup')
3288
self.pb.note('repository converted')
3290
def step(self, message):
3291
"""Update the pb by a step."""
3293
self.pb.update(message, self.count, self.total)
3305
def _unescaper(match, _map=_unescape_map):
3306
code = match.group(1)
3310
if not code.startswith('#'):
3312
return unichr(int(code[1:])).encode('utf8')
3318
def _unescape_xml(data):
3319
"""Unescape predefined XML entities in a string of data."""
3321
if _unescape_re is None:
3322
_unescape_re = re.compile('\&([^;]*);')
3323
return _unescape_re.sub(_unescaper, data)
3326
class _VersionedFileChecker(object):
3328
def __init__(self, repository):
3329
self.repository = repository
3330
self.text_index = self.repository._generate_text_key_index()
3332
def calculate_file_version_parents(self, text_key):
3333
"""Calculate the correct parents for a file version according to
3336
parent_keys = self.text_index[text_key]
3337
if parent_keys == [_mod_revision.NULL_REVISION]:
3339
return tuple(parent_keys)
3341
def check_file_version_parents(self, texts, progress_bar=None):
3342
"""Check the parents stored in a versioned file are correct.
3344
It also detects file versions that are not referenced by their
3345
corresponding revision's inventory.
3347
:returns: A tuple of (wrong_parents, dangling_file_versions).
3348
wrong_parents is a dict mapping {revision_id: (stored_parents,
3349
correct_parents)} for each revision_id where the stored parents
3350
are not correct. dangling_file_versions is a set of (file_id,
3351
revision_id) tuples for versions that are present in this versioned
3352
file, but not used by the corresponding inventory.
3355
self.file_ids = set([file_id for file_id, _ in
3356
self.text_index.iterkeys()])
3357
# text keys is now grouped by file_id
3358
n_weaves = len(self.file_ids)
3359
files_in_revisions = {}
3360
revisions_of_files = {}
3361
n_versions = len(self.text_index)
3362
progress_bar.update('loading text store', 0, n_versions)
3363
parent_map = self.repository.texts.get_parent_map(self.text_index)
3364
# On unlistable transports this could well be empty/error...
3365
text_keys = self.repository.texts.keys()
3366
unused_keys = frozenset(text_keys) - set(self.text_index)
3367
for num, key in enumerate(self.text_index.iterkeys()):
3368
if progress_bar is not None:
3369
progress_bar.update('checking text graph', num, n_versions)
3370
correct_parents = self.calculate_file_version_parents(key)
3372
knit_parents = parent_map[key]
3373
except errors.RevisionNotPresent:
3376
if correct_parents != knit_parents:
3377
wrong_parents[key] = (knit_parents, correct_parents)
3378
return wrong_parents, unused_keys
3381
def _old_get_graph(repository, revision_id):
3382
"""DO NOT USE. That is all. I'm serious."""
3383
graph = repository.get_graph()
3384
revision_graph = dict(((key, value) for key, value in
3385
graph.iter_ancestry([revision_id]) if value is not None))
3386
return _strip_NULL_ghosts(revision_graph)
3389
def _strip_NULL_ghosts(revision_graph):
3390
"""Also don't use this. more compatibility code for unmigrated clients."""
3391
# Filter ghosts, and null:
3392
if _mod_revision.NULL_REVISION in revision_graph:
3393
del revision_graph[_mod_revision.NULL_REVISION]
3394
for key, parents in revision_graph.items():
3395
revision_graph[key] = tuple(parent for parent in parents if parent
3397
return revision_graph