1
# Copyright (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from copy import deepcopy
18
from cStringIO import StringIO
19
from unittest import TestSuite
20
import xml.sax.saxutils
23
import bzrlib.bzrdir as bzrdir
24
from bzrlib.decorators import needs_read_lock, needs_write_lock
25
import bzrlib.errors as errors
26
from bzrlib.errors import InvalidRevisionId
27
from bzrlib.lockable_files import LockableFiles
28
from bzrlib.osutils import safe_unicode
29
from bzrlib.revision import NULL_REVISION
30
from bzrlib.store import copy_all
31
from bzrlib.store.weave import WeaveStore
32
from bzrlib.store.text import TextStore
33
from bzrlib.trace import mutter
34
from bzrlib.tree import RevisionTree
35
from bzrlib.testament import Testament
36
from bzrlib.tree import EmptyTree
40
class Repository(object):
41
"""Repository holding history for one or more branches.
43
The repository holds and retrieves historical information including
44
revisions and file history. It's normally accessed only by the Branch,
45
which views a particular line of development through that history.
47
The Repository builds on top of Stores and a Transport, which respectively
48
describe the disk data format and the way of accessing the (possibly
53
def all_revision_ids(self):
54
"""Returns a list of all the revision ids in the repository.
56
It would be nice to have this topologically sorted, but its not yet.
58
possible_ids = self.get_inventory_weave().names()
60
for id in possible_ids:
61
if self.has_revision(id):
67
"""Construct the current default format repository in a_bzrdir."""
68
return RepositoryFormat.get_default_format().initialize(a_bzrdir)
70
def __init__(self, transport, branch_format, _format=None, a_bzrdir=None):
72
if transport is not None:
73
self.control_files = LockableFiles(transport.clone(bzrlib.BZRDIR), 'README')
75
# TODO: clone into repository if needed
76
self.control_files = LockableFiles(a_bzrdir.transport, 'README')
78
dir_mode = self.control_files._dir_mode
79
file_mode = self.control_files._file_mode
80
self._format = _format
81
self.bzrdir = a_bzrdir
83
def get_weave(name, prefixed=False):
85
name = safe_unicode(name)
88
relpath = self.control_files._escape(name)
89
weave_transport = self.control_files._transport.clone(relpath)
90
ws = WeaveStore(weave_transport, prefixed=prefixed,
93
if self.control_files._transport.should_cache():
94
ws.enable_cache = True
98
def get_store(name, compressed=True, prefixed=False):
99
# FIXME: This approach of assuming stores are all entirely compressed
100
# or entirely uncompressed is tidy, but breaks upgrade from
101
# some existing branches where there's a mixture; we probably
102
# still want the option to look for both.
104
name = safe_unicode(name)
107
relpath = self.control_files._escape(name)
108
store = TextStore(self.control_files._transport.clone(relpath),
109
prefixed=prefixed, compressed=compressed,
112
#if self._transport.should_cache():
113
# cache_path = os.path.join(self.cache_root, name)
114
# os.mkdir(cache_path)
115
# store = bzrlib.store.CachedStore(store, cache_path)
118
if branch_format is not None:
119
# circular dependencies:
120
from bzrlib.branch import (BzrBranchFormat4,
124
if isinstance(branch_format, BzrBranchFormat4):
125
self._format = RepositoryFormat4()
126
elif isinstance(branch_format, BzrBranchFormat5):
127
self._format = RepositoryFormat5()
128
elif isinstance(branch_format, BzrBranchFormat6):
129
self._format = RepositoryFormat6()
132
if isinstance(self._format, RepositoryFormat4):
133
self.inventory_store = get_store('inventory-store')
134
self.text_store = get_store('text-store')
135
self.revision_store = get_store('revision-store')
136
elif isinstance(self._format, RepositoryFormat5):
137
self.control_weaves = get_weave('')
138
self.weave_store = get_weave('weaves')
139
self.revision_store = get_store('revision-store', compressed=False)
140
elif isinstance(self._format, RepositoryFormat6):
141
self.control_weaves = get_weave('')
142
self.weave_store = get_weave('weaves', prefixed=True)
143
self.revision_store = get_store('revision-store', compressed=False,
145
self.revision_store.register_suffix('sig')
147
def lock_write(self):
148
self.control_files.lock_write()
151
self.control_files.lock_read()
155
"""Open the repository rooted at base.
157
For instance, if the repository is at URL/.bzr/repository,
158
Repository.open(URL) -> a Repository instance.
160
control = bzrdir.BzrDir.open(base)
161
return control.open_repository()
163
def push_stores(self, to, revision=NULL_REVISION):
164
"""FIXME: document and find a consistent name with other classes."""
165
if (not isinstance(self._format, RepositoryFormat4) or
166
self._format != to._format):
167
from bzrlib.fetch import RepoFetcher
168
mutter("Using fetch logic to push between %s(%s) and %s(%s)",
169
self, self._format, to, to._format)
170
RepoFetcher(to_repository=to, from_repository=self,
171
last_revision=revision)
174
# format 4 to format 4 logic only.
175
store_pairs = ((self.text_store, to.text_store),
176
(self.inventory_store, to.inventory_store),
177
(self.revision_store, to.revision_store))
179
for from_store, to_store in store_pairs:
180
copy_all(from_store, to_store)
181
except UnlistableStore:
182
raise UnlistableBranch(from_store)
185
self.control_files.unlock()
188
def clone(self, a_bzrdir):
189
"""Clone this repository into a_bzrdir using the current format.
191
Currently no check is made that the format of this repository and
192
the bzrdir format are compatible. FIXME RBC 20060201.
194
result = self._format.initialize(a_bzrdir)
199
def copy(self, destination):
200
destination.lock_write()
202
destination.control_weaves.copy_multi(self.control_weaves,
204
copy_all(self.weave_store, destination.weave_store)
205
copy_all(self.revision_store, destination.revision_store)
209
def has_revision(self, revision_id):
210
"""True if this branch has a copy of the revision.
212
This does not necessarily imply the revision is merge
213
or on the mainline."""
214
return (revision_id is None
215
or self.revision_store.has_id(revision_id))
218
def get_revision_xml_file(self, revision_id):
219
"""Return XML file object for revision object."""
220
if not revision_id or not isinstance(revision_id, basestring):
221
raise InvalidRevisionId(revision_id=revision_id, branch=self)
223
return self.revision_store.get(revision_id)
224
except (IndexError, KeyError):
225
raise bzrlib.errors.NoSuchRevision(self, revision_id)
228
def get_revision_xml(self, revision_id):
229
return self.get_revision_xml_file(revision_id).read()
232
def get_revision(self, revision_id):
233
"""Return the Revision object for a named revision"""
234
xml_file = self.get_revision_xml_file(revision_id)
237
r = bzrlib.xml5.serializer_v5.read_revision(xml_file)
238
except SyntaxError, e:
239
raise bzrlib.errors.BzrError('failed to unpack revision_xml',
243
assert r.revision_id == revision_id
247
def get_revision_sha1(self, revision_id):
248
"""Hash the stored value of a revision, and return it."""
249
# In the future, revision entries will be signed. At that
250
# point, it is probably best *not* to include the signature
251
# in the revision hash. Because that lets you re-sign
252
# the revision, (add signatures/remove signatures) and still
253
# have all hash pointers stay consistent.
254
# But for now, just hash the contents.
255
return bzrlib.osutils.sha_file(self.get_revision_xml_file(revision_id))
258
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
259
self.revision_store.add(StringIO(gpg_strategy.sign(plaintext)),
262
def fileid_involved_between_revs(self, from_revid, to_revid):
263
"""Find file_id(s) which are involved in the changes between revisions.
265
This determines the set of revisions which are involved, and then
266
finds all file ids affected by those revisions.
268
# TODO: jam 20060119 This code assumes that w.inclusions will
269
# always be correct. But because of the presence of ghosts
270
# it is possible to be wrong.
271
# One specific example from Robert Collins:
272
# Two branches, with revisions ABC, and AD
273
# C is a ghost merge of D.
274
# Inclusions doesn't recognize D as an ancestor.
275
# If D is ever merged in the future, the weave
276
# won't be fixed, because AD never saw revision C
277
# to cause a conflict which would force a reweave.
278
w = self.get_inventory_weave()
279
from_set = set(w.inclusions([w.lookup(from_revid)]))
280
to_set = set(w.inclusions([w.lookup(to_revid)]))
281
included = to_set.difference(from_set)
282
changed = map(w.idx_to_name, included)
283
return self._fileid_involved_by_set(changed)
285
def fileid_involved(self, last_revid=None):
286
"""Find all file_ids modified in the ancestry of last_revid.
288
:param last_revid: If None, last_revision() will be used.
290
w = self.get_inventory_weave()
292
changed = set(w._names)
294
included = w.inclusions([w.lookup(last_revid)])
295
changed = map(w.idx_to_name, included)
296
return self._fileid_involved_by_set(changed)
298
def fileid_involved_by_set(self, changes):
299
"""Find all file_ids modified by the set of revisions passed in.
301
:param changes: A set() of revision ids
303
# TODO: jam 20060119 This line does *nothing*, remove it.
304
# or better yet, change _fileid_involved_by_set so
305
# that it takes the inventory weave, rather than
306
# pulling it out by itself.
307
return self._fileid_involved_by_set(changes)
309
def _fileid_involved_by_set(self, changes):
310
"""Find the set of file-ids affected by the set of revisions.
312
:param changes: A set() of revision ids.
313
:return: A set() of file ids.
315
This peaks at the Weave, interpreting each line, looking to
316
see if it mentions one of the revisions. And if so, includes
317
the file id mentioned.
318
This expects both the Weave format, and the serialization
319
to have a single line per file/directory, and to have
320
fileid="" and revision="" on that line.
322
assert (isinstance(self._format, RepositoryFormat5) or
323
isinstance(self._format, RepositoryFormat6)), \
324
"fileid_involved only supported for branches which store inventory as xml"
326
w = self.get_inventory_weave()
328
for line in w._weave:
330
# it is ugly, but it is due to the weave structure
331
if not isinstance(line, basestring): continue
333
start = line.find('file_id="')+9
334
if start < 9: continue
335
end = line.find('"', start)
337
file_id = xml.sax.saxutils.unescape(line[start:end])
339
# check if file_id is already present
340
if file_id in file_ids: continue
342
start = line.find('revision="')+10
343
if start < 10: continue
344
end = line.find('"', start)
346
revision_id = xml.sax.saxutils.unescape(line[start:end])
348
if revision_id in changes:
349
file_ids.add(file_id)
353
def get_inventory_weave(self):
354
return self.control_weaves.get_weave('inventory',
355
self.get_transaction())
358
def get_inventory(self, revision_id):
359
"""Get Inventory object by hash."""
360
xml = self.get_inventory_xml(revision_id)
361
return bzrlib.xml5.serializer_v5.read_inventory_from_string(xml)
364
def get_inventory_xml(self, revision_id):
365
"""Get inventory XML as a file object."""
367
assert isinstance(revision_id, basestring), type(revision_id)
368
iw = self.get_inventory_weave()
369
return iw.get_text(iw.lookup(revision_id))
371
raise bzrlib.errors.HistoryMissing(self, 'inventory', revision_id)
374
def get_inventory_sha1(self, revision_id):
375
"""Return the sha1 hash of the inventory entry
377
return self.get_revision(revision_id).inventory_sha1
380
def get_revision_inventory(self, revision_id):
381
"""Return inventory of a past revision."""
382
# TODO: Unify this with get_inventory()
383
# bzr 0.0.6 and later imposes the constraint that the inventory_id
384
# must be the same as its revision, so this is trivial.
385
if revision_id is None:
386
# This does not make sense: if there is no revision,
387
# then it is the current tree inventory surely ?!
388
# and thus get_root_id() is something that looks at the last
389
# commit on the branch, and the get_root_id is an inventory check.
390
raise NotImplementedError
391
# return Inventory(self.get_root_id())
393
return self.get_inventory(revision_id)
396
def revision_tree(self, revision_id):
397
"""Return Tree for a revision on this branch.
399
`revision_id` may be None for the null revision, in which case
400
an `EmptyTree` is returned."""
401
# TODO: refactor this to use an existing revision object
402
# so we don't need to read it in twice.
403
if revision_id is None or revision_id == NULL_REVISION:
406
inv = self.get_revision_inventory(revision_id)
407
return RevisionTree(self, inv, revision_id)
410
def get_ancestry(self, revision_id):
411
"""Return a list of revision-ids integrated by a revision.
413
This is topologically sorted.
415
if revision_id is None:
417
if not self.has_revision(revision_id):
418
raise errors.NoSuchRevision(self, revision_id)
419
w = self.get_inventory_weave()
420
return [None] + map(w.idx_to_name,
421
w.inclusions([w.lookup(revision_id)]))
424
def print_file(self, file, revision_id):
425
"""Print `file` to stdout.
427
FIXME RBC 20060125 as John Meinel points out this is a bad api
428
- it writes to stdout, it assumes that that is valid etc. Fix
429
by creating a new more flexible convenience function.
431
tree = self.revision_tree(revision_id)
432
# use inventory as it was in that revision
433
file_id = tree.inventory.path2id(file)
435
raise BzrError("%r is not present in revision %s" % (file, revno))
437
revno = self.revision_id_to_revno(revision_id)
438
except errors.NoSuchRevision:
439
# TODO: This should not be BzrError,
440
# but NoSuchFile doesn't fit either
441
raise BzrError('%r is not present in revision %s'
442
% (file, revision_id))
444
raise BzrError('%r is not present in revision %s'
446
tree.print_file(file_id)
448
def get_transaction(self):
449
return self.control_files.get_transaction()
452
def sign_revision(self, revision_id, gpg_strategy):
453
plaintext = Testament.from_revision(self, revision_id).as_short_text()
454
self.store_revision_signature(gpg_strategy, plaintext, revision_id)
457
class RepositoryFormat(object):
458
"""A repository format.
460
Formats provide three things:
461
* An initialization routine to construct repository data on disk.
462
* a format string which is used when the BzrDir supports versioned
464
* an open routine which returns a Repository instance.
466
Formats are placed in an dict by their format string for reference
467
during opening. These should be subclasses of RepositoryFormat
470
Once a format is deprecated, just deprecate the initialize and open
471
methods on the format class. Do not deprecate the object, as the
472
object will be created every system load.
474
Common instance attributes:
475
_matchingbzrdir - the bzrdir format that the repository format was
476
originally written to work with. This can be used if manually
477
constructing a bzrdir and repository, or more commonly for test suite
481
_default_format = None
482
"""The default format used for new repositories."""
485
"""The known formats."""
488
def get_default_format(klass):
489
"""Return the current default format."""
490
return klass._default_format
492
def get_format_string(self):
493
"""Return the ASCII format string that identifies this format.
495
Note that in pre format ?? repositories the format string is
496
not permitted nor written to disk.
498
raise NotImplementedError(self.get_format_string)
500
def initialize(self, a_bzrdir):
501
"""Create a weave repository.
503
TODO: when creating split out bzr branch formats, move this to a common
504
base for Format5, Format6. or something like that.
506
from bzrlib.weavefile import write_weave_v5
507
from bzrlib.weave import Weave
509
# Create an empty weave
511
bzrlib.weavefile.write_weave_v5(Weave(), sio)
512
empty_weave = sio.getvalue()
514
mutter('creating repository in %s.', a_bzrdir.transport.base)
515
dirs = ['revision-store', 'weaves']
516
lock_file = 'branch-lock'
517
files = [('inventory.weave', StringIO(empty_weave)),
520
# FIXME: RBC 20060125 dont peek under the covers
521
# NB: no need to escape relative paths that are url safe.
522
control_files = LockableFiles(a_bzrdir.transport, 'branch-lock')
523
control_files.lock_write()
524
control_files._transport.mkdir_multi(dirs,
525
mode=control_files._dir_mode)
527
for file, content in files:
528
control_files.put(file, content)
530
control_files.unlock()
531
return Repository(None, branch_format=None, _format=self, a_bzrdir=a_bzrdir)
533
def is_supported(self):
534
"""Is this format supported?
536
Supported formats must be initializable and openable.
537
Unsupported formats may not support initialization or committing or
538
some other features depending on the reason for not being supported.
542
def open(self, a_bzrdir, _found=False):
543
"""Return an instance of this format for the bzrdir a_bzrdir.
545
_found is a private parameter, do not use it.
548
# we are being called directly and must probe.
549
raise NotImplementedError
550
return Repository(None, branch_format=None, _format=self, a_bzrdir=a_bzrdir)
553
def register_format(klass, format):
554
klass._formats[format.get_format_string()] = format
557
def set_default_format(klass, format):
558
klass._default_format = format
561
def unregister_format(klass, format):
562
assert klass._formats[format.get_format_string()] is format
563
del klass._formats[format.get_format_string()]
566
class RepositoryFormat4(RepositoryFormat):
567
"""Bzr repository format 4.
569
This repository format has:
571
- TextStores for texts, inventories,revisions.
573
This format is deprecated: it indexes texts using a text id which is
574
removed in format 5; initializationa and write support for this format
579
super(RepositoryFormat4, self).__init__()
580
self._matchingbzrdir = bzrdir.BzrDirFormat4()
582
def initialize(self, url):
583
"""Format 4 branches cannot be created."""
584
raise errors.UninitializableFormat(self)
586
def is_supported(self):
587
"""Format 4 is not supported.
589
It is not supported because the model changed from 4 to 5 and the
590
conversion logic is expensive - so doing it on the fly was not
596
class RepositoryFormat5(RepositoryFormat):
597
"""Bzr control format 5.
599
This repository format has:
600
- weaves for file texts and inventory
602
- TextStores for revisions and signatures.
606
super(RepositoryFormat5, self).__init__()
607
self._matchingbzrdir = bzrdir.BzrDirFormat5()
610
class RepositoryFormat6(RepositoryFormat):
611
"""Bzr control format 6.
613
This repository format has:
614
- weaves for file texts and inventory
615
- hash subdirectory based stores.
616
- TextStores for revisions and signatures.
620
super(RepositoryFormat6, self).__init__()
621
self._matchingbzrdir = bzrdir.BzrDirFormat6()
623
# formats which have no format string are not discoverable
624
# and not independently creatable, so are not registered.
625
# __default_format = RepositoryFormatXXX()
626
# RepositoryFormat.register_format(__default_format)
627
# RepositoryFormat.set_default_format(__default_format)
628
_legacy_formats = [RepositoryFormat4(),
633
# TODO: jam 20060108 Create a new branch format, and as part of upgrade
634
# make sure that ancestry.weave is deleted (it is never used, but
635
# used to be created)
637
class RepositoryTestProviderAdapter(object):
638
"""A tool to generate a suite testing multiple repository formats at once.
640
This is done by copying the test once for each transport and injecting
641
the transport_server, transport_readonly_server, and bzrdir_format and
642
repository_format classes into each copy. Each copy is also given a new id()
643
to make it easy to identify.
646
def __init__(self, transport_server, transport_readonly_server, formats):
647
self._transport_server = transport_server
648
self._transport_readonly_server = transport_readonly_server
649
self._formats = formats
651
def adapt(self, test):
653
for repository_format, bzrdir_format in self._formats:
654
new_test = deepcopy(test)
655
new_test.transport_server = self._transport_server
656
new_test.transport_readonly_server = self._transport_readonly_server
657
new_test.bzrdir_format = bzrdir_format
658
new_test.repository_format = repository_format
659
def make_new_test_id():
660
new_id = "%s(%s)" % (new_test.id(), repository_format.__class__.__name__)
661
return lambda: new_id
662
new_test.id = make_new_test_id()
663
result.addTest(new_test)