1
# Copyright (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from copy import deepcopy
18
from cStringIO import StringIO
19
from unittest import TestSuite
20
import xml.sax.saxutils
23
import bzrlib.bzrdir as bzrdir
24
from bzrlib.decorators import needs_read_lock, needs_write_lock
25
import bzrlib.errors as errors
26
from bzrlib.errors import InvalidRevisionId
27
from bzrlib.lockable_files import LockableFiles
28
from bzrlib.osutils import safe_unicode
29
from bzrlib.revision import NULL_REVISION
30
from bzrlib.store import copy_all
31
from bzrlib.store.weave import WeaveStore
32
from bzrlib.store.text import TextStore
33
from bzrlib.symbol_versioning import *
34
from bzrlib.trace import mutter
35
from bzrlib.tree import RevisionTree
36
from bzrlib.testament import Testament
37
from bzrlib.tree import EmptyTree
41
class Repository(object):
42
"""Repository holding history for one or more branches.
44
The repository holds and retrieves historical information including
45
revisions and file history. It's normally accessed only by the Branch,
46
which views a particular line of development through that history.
48
The Repository builds on top of Stores and a Transport, which respectively
49
describe the disk data format and the way of accessing the (possibly
54
def all_revision_ids(self):
55
"""Returns a list of all the revision ids in the repository.
57
It would be nice to have this topologically sorted, but its not yet.
59
possible_ids = self.get_inventory_weave().names()
61
for id in possible_ids:
62
if self.has_revision(id):
68
"""Construct the current default format repository in a_bzrdir."""
69
return RepositoryFormat.get_default_format().initialize(a_bzrdir)
71
def __init__(self, transport, branch_format, _format=None, a_bzrdir=None):
73
if transport is not None:
74
warn("Repository.__init__(..., transport=XXX): The transport parameter is "
75
"deprecated and was never in a supported release. Please use "
76
"bzrdir.open_repository() or bzrdir.open_branch().repository.",
79
self.control_files = LockableFiles(transport.clone(bzrlib.BZRDIR), 'README')
81
# TODO: clone into repository if needed
82
self.control_files = LockableFiles(a_bzrdir.get_repository_transport(None), 'README')
84
dir_mode = self.control_files._dir_mode
85
file_mode = self.control_files._file_mode
86
self._format = _format
87
self.bzrdir = a_bzrdir
89
def get_weave(name, prefixed=False):
91
name = safe_unicode(name)
94
relpath = self.control_files._escape(name)
95
weave_transport = self.control_files._transport.clone(relpath)
96
ws = WeaveStore(weave_transport, prefixed=prefixed,
99
if self.control_files._transport.should_cache():
100
ws.enable_cache = True
104
def get_store(name, compressed=True, prefixed=False):
105
# FIXME: This approach of assuming stores are all entirely compressed
106
# or entirely uncompressed is tidy, but breaks upgrade from
107
# some existing branches where there's a mixture; we probably
108
# still want the option to look for both.
110
name = safe_unicode(name)
113
relpath = self.control_files._escape(name)
114
store = TextStore(self.control_files._transport.clone(relpath),
115
prefixed=prefixed, compressed=compressed,
118
#if self._transport.should_cache():
119
# cache_path = os.path.join(self.cache_root, name)
120
# os.mkdir(cache_path)
121
# store = bzrlib.store.CachedStore(store, cache_path)
124
if branch_format is not None:
125
# circular dependencies:
126
from bzrlib.branch import (BzrBranchFormat4,
130
if isinstance(branch_format, BzrBranchFormat4):
131
self._format = RepositoryFormat4()
132
elif isinstance(branch_format, BzrBranchFormat5):
133
self._format = RepositoryFormat5()
134
elif isinstance(branch_format, BzrBranchFormat6):
135
self._format = RepositoryFormat6()
138
if isinstance(self._format, RepositoryFormat4):
139
self.inventory_store = get_store('inventory-store')
140
self.text_store = get_store('text-store')
141
self.revision_store = get_store('revision-store')
142
elif isinstance(self._format, RepositoryFormat5):
143
self.control_weaves = get_weave('')
144
self.weave_store = get_weave('weaves')
145
self.revision_store = get_store('revision-store', compressed=False)
146
elif isinstance(self._format, RepositoryFormat6):
147
self.control_weaves = get_weave('')
148
self.weave_store = get_weave('weaves', prefixed=True)
149
self.revision_store = get_store('revision-store', compressed=False,
151
elif isinstance(self._format, RepositoryFormat7):
152
self.control_weaves = get_weave('')
153
self.weave_store = get_weave('weaves', prefixed=True)
154
self.revision_store = get_store('revision-store', compressed=False,
156
self.revision_store.register_suffix('sig')
158
def lock_write(self):
159
self.control_files.lock_write()
162
self.control_files.lock_read()
166
"""Open the repository rooted at base.
168
For instance, if the repository is at URL/.bzr/repository,
169
Repository.open(URL) -> a Repository instance.
171
control = bzrdir.BzrDir.open(base)
172
return control.open_repository()
174
def push_stores(self, to, revision=NULL_REVISION):
175
"""FIXME: document and find a consistent name with other classes."""
176
if (not isinstance(self._format, RepositoryFormat4) or
177
self._format != to._format):
178
from bzrlib.fetch import RepoFetcher
179
mutter("Using fetch logic to push between %s(%s) and %s(%s)",
180
self, self._format, to, to._format)
181
RepoFetcher(to_repository=to, from_repository=self,
182
last_revision=revision)
185
# format 4 to format 4 logic only.
186
store_pairs = ((self.text_store, to.text_store),
187
(self.inventory_store, to.inventory_store),
188
(self.revision_store, to.revision_store))
190
for from_store, to_store in store_pairs:
191
copy_all(from_store, to_store)
192
except UnlistableStore:
193
raise UnlistableBranch(from_store)
196
self.control_files.unlock()
199
def clone(self, a_bzrdir):
200
"""Clone this repository into a_bzrdir using the current format.
202
Currently no check is made that the format of this repository and
203
the bzrdir format are compatible. FIXME RBC 20060201.
205
result = self._format.initialize(a_bzrdir)
210
def copy(self, destination):
211
destination.lock_write()
213
destination.control_weaves.copy_multi(self.control_weaves,
215
copy_all(self.weave_store, destination.weave_store)
216
copy_all(self.revision_store, destination.revision_store)
220
def has_revision(self, revision_id):
221
"""True if this branch has a copy of the revision.
223
This does not necessarily imply the revision is merge
224
or on the mainline."""
225
return (revision_id is None
226
or self.revision_store.has_id(revision_id))
229
def get_revision_xml_file(self, revision_id):
230
"""Return XML file object for revision object."""
231
if not revision_id or not isinstance(revision_id, basestring):
232
raise InvalidRevisionId(revision_id=revision_id, branch=self)
234
return self.revision_store.get(revision_id)
235
except (IndexError, KeyError):
236
raise bzrlib.errors.NoSuchRevision(self, revision_id)
239
def get_revision_xml(self, revision_id):
240
return self.get_revision_xml_file(revision_id).read()
243
def get_revision(self, revision_id):
244
"""Return the Revision object for a named revision"""
245
xml_file = self.get_revision_xml_file(revision_id)
248
r = bzrlib.xml5.serializer_v5.read_revision(xml_file)
249
except SyntaxError, e:
250
raise bzrlib.errors.BzrError('failed to unpack revision_xml',
254
assert r.revision_id == revision_id
258
def get_revision_sha1(self, revision_id):
259
"""Hash the stored value of a revision, and return it."""
260
# In the future, revision entries will be signed. At that
261
# point, it is probably best *not* to include the signature
262
# in the revision hash. Because that lets you re-sign
263
# the revision, (add signatures/remove signatures) and still
264
# have all hash pointers stay consistent.
265
# But for now, just hash the contents.
266
return bzrlib.osutils.sha_file(self.get_revision_xml_file(revision_id))
269
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
270
self.revision_store.add(StringIO(gpg_strategy.sign(plaintext)),
273
def fileid_involved_between_revs(self, from_revid, to_revid):
274
"""Find file_id(s) which are involved in the changes between revisions.
276
This determines the set of revisions which are involved, and then
277
finds all file ids affected by those revisions.
279
# TODO: jam 20060119 This code assumes that w.inclusions will
280
# always be correct. But because of the presence of ghosts
281
# it is possible to be wrong.
282
# One specific example from Robert Collins:
283
# Two branches, with revisions ABC, and AD
284
# C is a ghost merge of D.
285
# Inclusions doesn't recognize D as an ancestor.
286
# If D is ever merged in the future, the weave
287
# won't be fixed, because AD never saw revision C
288
# to cause a conflict which would force a reweave.
289
w = self.get_inventory_weave()
290
from_set = set(w.inclusions([w.lookup(from_revid)]))
291
to_set = set(w.inclusions([w.lookup(to_revid)]))
292
included = to_set.difference(from_set)
293
changed = map(w.idx_to_name, included)
294
return self._fileid_involved_by_set(changed)
296
def fileid_involved(self, last_revid=None):
297
"""Find all file_ids modified in the ancestry of last_revid.
299
:param last_revid: If None, last_revision() will be used.
301
w = self.get_inventory_weave()
303
changed = set(w._names)
305
included = w.inclusions([w.lookup(last_revid)])
306
changed = map(w.idx_to_name, included)
307
return self._fileid_involved_by_set(changed)
309
def fileid_involved_by_set(self, changes):
310
"""Find all file_ids modified by the set of revisions passed in.
312
:param changes: A set() of revision ids
314
# TODO: jam 20060119 This line does *nothing*, remove it.
315
# or better yet, change _fileid_involved_by_set so
316
# that it takes the inventory weave, rather than
317
# pulling it out by itself.
318
return self._fileid_involved_by_set(changes)
320
def _fileid_involved_by_set(self, changes):
321
"""Find the set of file-ids affected by the set of revisions.
323
:param changes: A set() of revision ids.
324
:return: A set() of file ids.
326
This peaks at the Weave, interpreting each line, looking to
327
see if it mentions one of the revisions. And if so, includes
328
the file id mentioned.
329
This expects both the Weave format, and the serialization
330
to have a single line per file/directory, and to have
331
fileid="" and revision="" on that line.
333
assert isinstance(self._format, (RepositoryFormat5,
335
RepositoryFormat7)), \
336
"fileid_involved only supported for branches which store inventory as unnested xml"
338
w = self.get_inventory_weave()
340
for line in w._weave:
342
# it is ugly, but it is due to the weave structure
343
if not isinstance(line, basestring): continue
345
start = line.find('file_id="')+9
346
if start < 9: continue
347
end = line.find('"', start)
349
file_id = xml.sax.saxutils.unescape(line[start:end])
351
# check if file_id is already present
352
if file_id in file_ids: continue
354
start = line.find('revision="')+10
355
if start < 10: continue
356
end = line.find('"', start)
358
revision_id = xml.sax.saxutils.unescape(line[start:end])
360
if revision_id in changes:
361
file_ids.add(file_id)
365
def get_inventory_weave(self):
366
return self.control_weaves.get_weave('inventory',
367
self.get_transaction())
370
def get_inventory(self, revision_id):
371
"""Get Inventory object by hash."""
372
xml = self.get_inventory_xml(revision_id)
373
return bzrlib.xml5.serializer_v5.read_inventory_from_string(xml)
376
def get_inventory_xml(self, revision_id):
377
"""Get inventory XML as a file object."""
379
assert isinstance(revision_id, basestring), type(revision_id)
380
iw = self.get_inventory_weave()
381
return iw.get_text(iw.lookup(revision_id))
383
raise bzrlib.errors.HistoryMissing(self, 'inventory', revision_id)
386
def get_inventory_sha1(self, revision_id):
387
"""Return the sha1 hash of the inventory entry
389
return self.get_revision(revision_id).inventory_sha1
392
def get_revision_inventory(self, revision_id):
393
"""Return inventory of a past revision."""
394
# TODO: Unify this with get_inventory()
395
# bzr 0.0.6 and later imposes the constraint that the inventory_id
396
# must be the same as its revision, so this is trivial.
397
if revision_id is None:
398
# This does not make sense: if there is no revision,
399
# then it is the current tree inventory surely ?!
400
# and thus get_root_id() is something that looks at the last
401
# commit on the branch, and the get_root_id is an inventory check.
402
raise NotImplementedError
403
# return Inventory(self.get_root_id())
405
return self.get_inventory(revision_id)
408
def revision_tree(self, revision_id):
409
"""Return Tree for a revision on this branch.
411
`revision_id` may be None for the null revision, in which case
412
an `EmptyTree` is returned."""
413
# TODO: refactor this to use an existing revision object
414
# so we don't need to read it in twice.
415
if revision_id is None or revision_id == NULL_REVISION:
418
inv = self.get_revision_inventory(revision_id)
419
return RevisionTree(self, inv, revision_id)
422
def get_ancestry(self, revision_id):
423
"""Return a list of revision-ids integrated by a revision.
425
This is topologically sorted.
427
if revision_id is None:
429
if not self.has_revision(revision_id):
430
raise errors.NoSuchRevision(self, revision_id)
431
w = self.get_inventory_weave()
432
return [None] + map(w.idx_to_name,
433
w.inclusions([w.lookup(revision_id)]))
436
def print_file(self, file, revision_id):
437
"""Print `file` to stdout.
439
FIXME RBC 20060125 as John Meinel points out this is a bad api
440
- it writes to stdout, it assumes that that is valid etc. Fix
441
by creating a new more flexible convenience function.
443
tree = self.revision_tree(revision_id)
444
# use inventory as it was in that revision
445
file_id = tree.inventory.path2id(file)
447
raise BzrError("%r is not present in revision %s" % (file, revno))
449
revno = self.revision_id_to_revno(revision_id)
450
except errors.NoSuchRevision:
451
# TODO: This should not be BzrError,
452
# but NoSuchFile doesn't fit either
453
raise BzrError('%r is not present in revision %s'
454
% (file, revision_id))
456
raise BzrError('%r is not present in revision %s'
458
tree.print_file(file_id)
460
def get_transaction(self):
461
return self.control_files.get_transaction()
464
def sign_revision(self, revision_id, gpg_strategy):
465
plaintext = Testament.from_revision(self, revision_id).as_short_text()
466
self.store_revision_signature(gpg_strategy, plaintext, revision_id)
469
class RepositoryFormat(object):
470
"""A repository format.
472
Formats provide three things:
473
* An initialization routine to construct repository data on disk.
474
* a format string which is used when the BzrDir supports versioned
476
* an open routine which returns a Repository instance.
478
Formats are placed in an dict by their format string for reference
479
during opening. These should be subclasses of RepositoryFormat
482
Once a format is deprecated, just deprecate the initialize and open
483
methods on the format class. Do not deprecate the object, as the
484
object will be created every system load.
486
Common instance attributes:
487
_matchingbzrdir - the bzrdir format that the repository format was
488
originally written to work with. This can be used if manually
489
constructing a bzrdir and repository, or more commonly for test suite
493
_default_format = None
494
"""The default format used for new repositories."""
497
"""The known formats."""
500
def find_format(klass, a_bzrdir):
501
"""Return the format for the repository object in a_bzrdir."""
503
transport = a_bzrdir.get_repository_transport(None)
504
format_string = transport.get("format").read()
505
return klass._formats[format_string]
506
except errors.NoSuchFile:
507
raise errors.NoRepositoryPresent(a_bzrdir)
509
raise errors.UnknownFormatError(format_string)
512
def get_default_format(klass):
513
"""Return the current default format."""
514
return klass._default_format
516
def get_format_string(self):
517
"""Return the ASCII format string that identifies this format.
519
Note that in pre format ?? repositories the format string is
520
not permitted nor written to disk.
522
raise NotImplementedError(self.get_format_string)
524
def initialize(self, a_bzrdir):
525
"""Create a weave repository.
527
TODO: when creating split out bzr branch formats, move this to a common
528
base for Format5, Format6. or something like that.
530
from bzrlib.weavefile import write_weave_v5
531
from bzrlib.weave import Weave
533
# Create an empty weave
535
bzrlib.weavefile.write_weave_v5(Weave(), sio)
536
empty_weave = sio.getvalue()
538
mutter('creating repository in %s.', a_bzrdir.transport.base)
539
dirs = ['revision-store', 'weaves']
540
lock_file = 'branch-lock'
541
files = [('inventory.weave', StringIO(empty_weave)),
544
# FIXME: RBC 20060125 dont peek under the covers
545
# NB: no need to escape relative paths that are url safe.
546
control_files = LockableFiles(a_bzrdir.transport, 'branch-lock')
547
control_files.lock_write()
548
control_files._transport.mkdir_multi(dirs,
549
mode=control_files._dir_mode)
551
for file, content in files:
552
control_files.put(file, content)
554
control_files.unlock()
555
return Repository(None, branch_format=None, _format=self, a_bzrdir=a_bzrdir)
557
def is_supported(self):
558
"""Is this format supported?
560
Supported formats must be initializable and openable.
561
Unsupported formats may not support initialization or committing or
562
some other features depending on the reason for not being supported.
566
def open(self, a_bzrdir, _found=False):
567
"""Return an instance of this format for the bzrdir a_bzrdir.
569
_found is a private parameter, do not use it.
572
# we are being called directly and must probe.
573
raise NotImplementedError
574
return Repository(None, branch_format=None, _format=self, a_bzrdir=a_bzrdir)
577
def register_format(klass, format):
578
klass._formats[format.get_format_string()] = format
581
def set_default_format(klass, format):
582
klass._default_format = format
585
def unregister_format(klass, format):
586
assert klass._formats[format.get_format_string()] is format
587
del klass._formats[format.get_format_string()]
590
class RepositoryFormat4(RepositoryFormat):
591
"""Bzr repository format 4.
593
This repository format has:
595
- TextStores for texts, inventories,revisions.
597
This format is deprecated: it indexes texts using a text id which is
598
removed in format 5; initializationa and write support for this format
603
super(RepositoryFormat4, self).__init__()
604
self._matchingbzrdir = bzrdir.BzrDirFormat4()
606
def initialize(self, url):
607
"""Format 4 branches cannot be created."""
608
raise errors.UninitializableFormat(self)
610
def is_supported(self):
611
"""Format 4 is not supported.
613
It is not supported because the model changed from 4 to 5 and the
614
conversion logic is expensive - so doing it on the fly was not
620
class RepositoryFormat5(RepositoryFormat):
621
"""Bzr control format 5.
623
This repository format has:
624
- weaves for file texts and inventory
626
- TextStores for revisions and signatures.
630
super(RepositoryFormat5, self).__init__()
631
self._matchingbzrdir = bzrdir.BzrDirFormat5()
634
class RepositoryFormat6(RepositoryFormat):
635
"""Bzr control format 6.
637
This repository format has:
638
- weaves for file texts and inventory
639
- hash subdirectory based stores.
640
- TextStores for revisions and signatures.
644
super(RepositoryFormat6, self).__init__()
645
self._matchingbzrdir = bzrdir.BzrDirFormat6()
648
class RepositoryFormat7(RepositoryFormat):
651
This repository format has:
652
- weaves for file texts and inventory
653
- hash subdirectory based stores.
654
- TextStores for revisions and signatures.
655
- a format marker of its own
658
def get_format_string(self):
659
"""See RepositoryFormat.get_format_string()."""
660
return "Bazaar-NG Repository format 7"
662
def initialize(self, a_bzrdir):
663
"""Create a weave repository.
665
from bzrlib.weavefile import write_weave_v5
666
from bzrlib.weave import Weave
668
# Create an empty weave
670
bzrlib.weavefile.write_weave_v5(Weave(), sio)
671
empty_weave = sio.getvalue()
673
mutter('creating repository in %s.', a_bzrdir.transport.base)
674
dirs = ['revision-store', 'weaves']
675
files = [('inventory.weave', StringIO(empty_weave)),
677
utf8_files = [('format', self.get_format_string())]
679
# FIXME: RBC 20060125 dont peek under the covers
680
# NB: no need to escape relative paths that are url safe.
682
repository_transport = a_bzrdir.get_repository_transport(self)
683
repository_transport.put(lock_file, StringIO()) # TODO get the file mode from the bzrdir lock files., mode=file_mode)
684
control_files = LockableFiles(repository_transport, 'lock')
685
control_files.lock_write()
686
control_files._transport.mkdir_multi(dirs,
687
mode=control_files._dir_mode)
689
for file, content in files:
690
control_files.put(file, content)
691
for file, content in utf8_files:
692
control_files.put_utf8(file, content)
694
control_files.unlock()
695
return Repository(None, branch_format=None, _format=self, a_bzrdir=a_bzrdir)
698
super(RepositoryFormat7, self).__init__()
699
self._matchingbzrdir = bzrdir.BzrDirMetaFormat1()
702
# formats which have no format string are not discoverable
703
# and not independently creatable, so are not registered.
704
__default_format = RepositoryFormat7()
705
RepositoryFormat.register_format(__default_format)
706
RepositoryFormat.set_default_format(__default_format)
707
_legacy_formats = [RepositoryFormat4(),
712
# TODO: jam 20060108 Create a new branch format, and as part of upgrade
713
# make sure that ancestry.weave is deleted (it is never used, but
714
# used to be created)
716
class RepositoryTestProviderAdapter(object):
717
"""A tool to generate a suite testing multiple repository formats at once.
719
This is done by copying the test once for each transport and injecting
720
the transport_server, transport_readonly_server, and bzrdir_format and
721
repository_format classes into each copy. Each copy is also given a new id()
722
to make it easy to identify.
725
def __init__(self, transport_server, transport_readonly_server, formats):
726
self._transport_server = transport_server
727
self._transport_readonly_server = transport_readonly_server
728
self._formats = formats
730
def adapt(self, test):
732
for repository_format, bzrdir_format in self._formats:
733
new_test = deepcopy(test)
734
new_test.transport_server = self._transport_server
735
new_test.transport_readonly_server = self._transport_readonly_server
736
new_test.bzrdir_format = bzrdir_format
737
new_test.repository_format = repository_format
738
def make_new_test_id():
739
new_id = "%s(%s)" % (new_test.id(), repository_format.__class__.__name__)
740
return lambda: new_id
741
new_test.id = make_new_test_id()
742
result.addTest(new_test)