1
# Copyright (C) 2005-2010 Canonical Ltd
1
# Copyright (C) 2005, 2006, 2007, 2008, 2009 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
52
50
from bzrlib.testament import Testament
59
53
from bzrlib.decorators import needs_read_lock, needs_write_lock, only_raises
60
54
from bzrlib.inter import InterObject
61
55
from bzrlib.inventory import (
67
from bzrlib.lock import _RelockDebugMixin, LogicalLockResult
61
from bzrlib.lock import _RelockDebugMixin
62
from bzrlib import registry
68
63
from bzrlib.trace import (
69
64
log_exception_quietly, note, mutter, mutter_callsite, warning)
73
68
_deprecation_warning_done = False
76
class IsInWriteGroupError(errors.InternalBzrError):
78
_fmt = "May not refresh_data of repo %(repo)s while in a write group."
80
def __init__(self, repo):
81
errors.InternalBzrError.__init__(self, repo=repo)
84
71
class CommitBuilder(object):
85
72
"""Provides an interface to build up a commit.
870
857
# versioned roots do not change unless the tree found a change.
873
class RepositoryWriteLockResult(LogicalLockResult):
874
"""The result of write locking a repository.
876
:ivar repository_token: The token obtained from the underlying lock, or
878
:ivar unlock: A callable which will unlock the lock.
881
def __init__(self, unlock, repository_token):
882
LogicalLockResult.__init__(self, unlock)
883
self.repository_token = repository_token
886
return "RepositoryWriteLockResult(%s, %s)" % (self.repository_token,
890
860
######################################################################
894
class Repository(_RelockDebugMixin, bzrdir.ControlComponent):
864
class Repository(_RelockDebugMixin):
895
865
"""Repository holding history for one or more branches.
897
867
The repository holds and retrieves historical information including
1056
1026
:seealso: add_inventory, for the contract.
1058
inv_lines = self._serializer.write_inventory_to_lines(inv)
1028
inv_lines = self._serialise_inventory_to_lines(inv)
1059
1029
return self._inventory_add_lines(revision_id, parents,
1060
1030
inv_lines, check_content=False)
1268
1238
"""Check a single text from this repository."""
1269
1239
if kind == 'inventories':
1270
1240
rev_id = record.key[0]
1271
inv = self._deserialise_inventory(rev_id,
1241
inv = self.deserialise_inventory(rev_id,
1272
1242
record.get_bytes_as('fulltext'))
1273
1243
if last_object is not None:
1274
1244
delta = inv._make_delta(last_object)
1319
1289
:param _format: The format of the repository on disk.
1320
1290
:param a_bzrdir: The BzrDir of the repository.
1292
In the future we will have a single api for all stores for
1293
getting file texts, inventories and revisions, then
1294
this construct will accept instances of those things.
1322
# In the future we will have a single api for all stores for
1323
# getting file texts, inventories and revisions, then
1324
# this construct will accept instances of those things.
1325
1296
super(Repository, self).__init__()
1326
1297
self._format = _format
1327
1298
# the following are part of the public API for Repository:
1333
1304
self._reconcile_does_inventory_gc = True
1334
1305
self._reconcile_fixes_text_parents = False
1335
1306
self._reconcile_backsup_inventory = True
1307
# not right yet - should be more semantically clear ?
1309
# TODO: make sure to construct the right store classes, etc, depending
1310
# on whether escaping is required.
1311
self._warn_if_deprecated()
1336
1312
self._write_group = None
1337
1313
# Additional places to query for data.
1338
1314
self._fallback_repositories = []
1339
1315
# An InventoryEntry cache, used during deserialization
1340
1316
self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)
1341
# Is it safe to return inventory entries directly from the entry cache,
1342
# rather copying them?
1343
self._safe_to_return_from_cache = False
1346
def user_transport(self):
1347
return self.bzrdir.user_transport
1350
def control_transport(self):
1351
return self._transport
1353
1318
def __repr__(self):
1354
1319
if self._fallback_repositories:
1403
1368
data during reads, and allows a 'write_group' to be obtained. Write
1404
1369
groups must be used for actual data insertion.
1406
A token should be passed in if you know that you have locked the object
1407
some other way, and need to synchronise this object's state with that
1410
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
1412
1371
:param token: if this is already locked, then lock_write will fail
1413
1372
unless the token matches the existing lock.
1414
1373
:returns: a token if this instance supports tokens, otherwise None.
1417
1376
:raises MismatchedToken: if the specified token doesn't match the token
1418
1377
of the existing lock.
1419
1378
:seealso: start_write_group.
1420
:return: A RepositoryWriteLockResult.
1380
A token should be passed in if you know that you have locked the object
1381
some other way, and need to synchronise this object's state with that
1384
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
1422
1386
locked = self.is_locked()
1423
token = self.control_files.lock_write(token=token)
1387
result = self.control_files.lock_write(token=token)
1425
self._warn_if_deprecated()
1426
1389
self._note_lock('w')
1427
1390
for repo in self._fallback_repositories:
1428
1391
# Writes don't affect fallback repos
1429
1392
repo.lock_read()
1430
1393
self._refresh_data()
1431
return RepositoryWriteLockResult(self.unlock, token)
1433
1396
def lock_read(self):
1434
"""Lock the repository for read operations.
1436
:return: An object with an unlock method which will release the lock
1439
1397
locked = self.is_locked()
1440
1398
self.control_files.lock_read()
1442
self._warn_if_deprecated()
1443
1400
self._note_lock('r')
1444
1401
for repo in self._fallback_repositories:
1445
1402
repo.lock_read()
1446
1403
self._refresh_data()
1447
return LogicalLockResult(self.unlock)
1449
1405
def get_physical_lock_status(self):
1450
1406
return self.control_files.get_physical_lock_status()
1511
1467
# now gather global repository information
1512
1468
# XXX: This is available for many repos regardless of listability.
1513
if self.user_transport.listable():
1469
if self.bzrdir.root_transport.listable():
1514
1470
# XXX: do we want to __define len__() ?
1515
1471
# Maybe the versionedfiles object should provide a different
1516
1472
# method to get the number of keys.
1526
1482
:param using: If True, list only branches using this repository.
1528
1484
if using and not self.is_shared():
1529
return self.bzrdir.list_branches()
1486
return [self.bzrdir.open_branch()]
1487
except errors.NotBranchError:
1530
1489
class Evaluator(object):
1532
1491
def __init__(self):
1541
1500
except errors.NoRepositoryPresent:
1544
return False, ([], repository)
1503
return False, (None, repository)
1545
1504
self.first_call = False
1546
value = (bzrdir.list_branches(), None)
1506
value = (bzrdir.open_branch(), None)
1507
except errors.NotBranchError:
1508
value = (None, None)
1547
1509
return True, value
1550
for branches, repository in bzrdir.BzrDir.find_bzrdirs(
1551
self.user_transport, evaluate=Evaluator()):
1552
if branches is not None:
1553
ret.extend(branches)
1512
for branch, repository in bzrdir.BzrDir.find_bzrdirs(
1513
self.bzrdir.root_transport, evaluate=Evaluator()):
1514
if branch is not None:
1515
branches.append(branch)
1554
1516
if not using and repository is not None:
1555
ret.extend(repository.find_branches())
1517
branches.extend(repository.find_branches())
1558
1520
@needs_read_lock
1559
1521
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
1668
1630
return missing_keys
1670
1632
def refresh_data(self):
1671
"""Re-read any data needed to synchronise with disk.
1633
"""Re-read any data needed to to synchronise with disk.
1673
1635
This method is intended to be called after another repository instance
1674
1636
(such as one used by a smart server) has inserted data into the
1675
repository. On all repositories this will work outside of write groups.
1676
Some repository formats (pack and newer for bzrlib native formats)
1677
support refresh_data inside write groups. If called inside a write
1678
group on a repository that does not support refreshing in a write group
1679
IsInWriteGroupError will be raised.
1637
repository. It may not be called during a write group, but may be
1638
called at any other time.
1640
if self.is_in_write_group():
1641
raise errors.InternalBzrError(
1642
"May not refresh_data while in a write group.")
1681
1643
self._refresh_data()
1683
1645
def resume_write_group(self, tokens):
1937
1899
rev = self._serializer.read_revision_from_string(text)
1938
1900
yield (revid, rev)
1903
def get_revision_xml(self, revision_id):
1904
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1905
# would have already do it.
1906
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1907
# TODO: this can't just be replaced by:
1908
# return self._serializer.write_revision_to_string(
1909
# self.get_revision(revision_id))
1910
# as cStringIO preservers the encoding unlike write_revision_to_string
1911
# or some other call down the path.
1912
rev = self.get_revision(revision_id)
1913
rev_tmp = cStringIO.StringIO()
1914
# the current serializer..
1915
self._serializer.write_revision(rev, rev_tmp)
1917
return rev_tmp.getvalue()
1940
1919
def get_deltas_for_revisions(self, revisions, specific_fileids=None):
1941
1920
"""Produce a generator of revision deltas.
2185
2164
selected_keys = set((revid,) for revid in revision_ids)
2186
2165
w = _inv_weave or self.inventories
2187
return self._find_file_ids_from_xml_inventory_lines(
2188
w.iter_lines_added_or_present_in_keys(
2189
selected_keys, pb=None),
2166
pb = ui.ui_factory.nested_progress_bar()
2168
return self._find_file_ids_from_xml_inventory_lines(
2169
w.iter_lines_added_or_present_in_keys(
2170
selected_keys, pb=pb),
2192
2175
def iter_files_bytes(self, desired_files):
2193
2176
"""Iterate through file versions.
2403
2386
"""single-document based inventory iteration."""
2404
2387
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
2405
2388
for text, revision_id in inv_xmls:
2406
yield self._deserialise_inventory(revision_id, text)
2389
yield self.deserialise_inventory(revision_id, text)
2408
2391
def _iter_inventory_xmls(self, revision_ids, ordering):
2409
2392
if ordering is None:
2441
2424
next_key = None
2444
def _deserialise_inventory(self, revision_id, xml):
2427
def deserialise_inventory(self, revision_id, xml):
2445
2428
"""Transform the xml into an inventory object.
2447
2430
:param revision_id: The expected revision id of the inventory.
2448
2431
:param xml: A serialised inventory.
2450
2433
result = self._serializer.read_inventory_from_string(xml, revision_id,
2451
entry_cache=self._inventory_entry_cache,
2452
return_from_cache=self._safe_to_return_from_cache)
2434
entry_cache=self._inventory_entry_cache)
2453
2435
if result.revision_id != revision_id:
2454
2436
raise AssertionError('revision id mismatch %s != %s' % (
2455
2437
result.revision_id, revision_id))
2440
def serialise_inventory(self, inv):
2441
return self._serializer.write_inventory_to_string(inv)
2443
def _serialise_inventory_to_lines(self, inv):
2444
return self._serializer.write_inventory_to_lines(inv)
2458
2446
def get_serializer_format(self):
2459
2447
return self._serializer.format_num
2461
2449
@needs_read_lock
2462
def _get_inventory_xml(self, revision_id):
2463
"""Get serialized inventory as a string."""
2450
def get_inventory_xml(self, revision_id):
2451
"""Get inventory XML as a file object."""
2464
2452
texts = self._iter_inventory_xmls([revision_id], 'unordered')
2466
2454
text, revision_id = texts.next()
2468
2456
raise errors.HistoryMissing(self, 'inventory', revision_id)
2460
def get_inventory_sha1(self, revision_id):
2461
"""Return the sha1 hash of the inventory entry
2463
return self.get_revision(revision_id).inventory_sha1
2471
2465
def get_rev_id_for_revno(self, revno, known_pair):
2472
2466
"""Return the revision id of a revno, given a later (revno, revid)
2473
2467
pair in the same history.
2525
2519
next_id = parents[0]
2522
def get_revision_inventory(self, revision_id):
2523
"""Return inventory of a past revision."""
2524
# TODO: Unify this with get_inventory()
2525
# bzr 0.0.6 and later imposes the constraint that the inventory_id
2526
# must be the same as its revision, so this is trivial.
2527
if revision_id is None:
2528
# This does not make sense: if there is no revision,
2529
# then it is the current tree inventory surely ?!
2530
# and thus get_root_id() is something that looks at the last
2531
# commit on the branch, and the get_root_id is an inventory check.
2532
raise NotImplementedError
2533
# return Inventory(self.get_root_id())
2535
return self.get_inventory(revision_id)
2527
2537
def is_shared(self):
2528
2538
"""Return True if this repository is flagged as a shared repository."""
2529
2539
raise NotImplementedError(self.is_shared)
2563
2573
return RevisionTree(self, Inventory(root_id=None),
2564
2574
_mod_revision.NULL_REVISION)
2566
inv = self.get_inventory(revision_id)
2576
inv = self.get_revision_inventory(revision_id)
2567
2577
return RevisionTree(self, inv, revision_id)
2569
2579
def revision_trees(self, revision_ids):
2622
2632
keys = tsort.topo_sort(parent_map)
2623
2633
return [None] + list(keys)
2625
def pack(self, hint=None, clean_obsolete_packs=False):
2635
def pack(self, hint=None):
2626
2636
"""Compress the data within the repository.
2628
2638
This operation only makes sense for some repository types. For other
2638
2648
obtained from the result of commit_write_group(). Out of
2639
2649
date hints are simply ignored, because concurrent operations
2640
2650
can obsolete them rapidly.
2642
:param clean_obsolete_packs: Clean obsolete packs immediately after
2646
2653
def get_transaction(self):
2671
2678
def _make_parents_provider(self):
2675
def get_known_graph_ancestry(self, revision_ids):
2676
"""Return the known graph for a set of revision ids and their ancestors.
2678
st = static_tuple.StaticTuple
2679
revision_keys = [st(r_id).intern() for r_id in revision_ids]
2680
known_graph = self.revisions.get_known_graph_ancestry(revision_keys)
2681
return graph.GraphThunkIdsToKeys(known_graph)
2683
2681
def get_graph(self, other_repository=None):
2684
2682
"""Return the graph walker for this repository format"""
2685
2683
parents_provider = self._make_parents_provider()
2780
2778
result.check(callback_refs)
2783
def _warn_if_deprecated(self, branch=None):
2781
def _warn_if_deprecated(self):
2784
2782
global _deprecation_warning_done
2785
2783
if _deprecation_warning_done:
2789
conf = config.GlobalConfig()
2791
conf = branch.get_config()
2792
if conf.suppress_warning('format_deprecation'):
2794
warning("Format %s for %s is deprecated -"
2795
" please use 'bzr upgrade' to get better performance"
2796
% (self._format, self.bzrdir.transport.base))
2798
_deprecation_warning_done = True
2785
_deprecation_warning_done = True
2786
warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
2787
% (self._format, self.bzrdir.transport.base))
2800
2789
def supports_rich_root(self):
2801
2790
return self._format.rich_root_data
3084
3073
pack_compresses = False
3085
3074
# Does the repository inventory storage understand references to trees?
3086
3075
supports_tree_reference = None
3087
# Is the format experimental ?
3088
experimental = False
3091
return "%s()" % self.__class__.__name__
3078
return "<%s>" % self.__class__.__name__
3093
3080
def __eq__(self, other):
3094
3081
# format objects are generally stateless
3109
3096
transport = a_bzrdir.get_repository_transport(None)
3110
format_string = transport.get_bytes("format")
3097
format_string = transport.get("format").read()
3111
3098
return format_registry.get(format_string)
3112
3099
except errors.NoSuchFile:
3113
3100
raise errors.NoRepositoryPresent(a_bzrdir)
3213
3200
raise NotImplementedError(self.open)
3215
def _run_post_repo_init_hooks(self, repository, a_bzrdir, shared):
3216
from bzrlib.bzrdir import BzrDir, RepoInitHookParams
3217
hooks = BzrDir.hooks['post_repo_init']
3220
params = RepoInitHookParams(repository, self, a_bzrdir, shared)
3225
3203
class MetaDirRepositoryFormat(RepositoryFormat):
3226
3204
"""Common base class for the new repositories using the metadir layout."""
3432
3410
:param revision_id: if None all content is copied, if NULL_REVISION no
3433
3411
content is copied.
3412
:param pb: optional progress bar to use for progress reports. If not
3413
provided a default one will be created.
3437
ui.ui_factory.warn_experimental_format_fetch(self)
3438
from bzrlib.fetch import RepoFetcher
3439
# See <https://launchpad.net/bugs/456077> asking for a warning here
3440
if self.source._format.network_name() != self.target._format.network_name():
3441
ui.ui_factory.show_user_warning('cross_format_fetch',
3442
from_format=self.source._format,
3443
to_format=self.target._format)
3444
f = RepoFetcher(to_repository=self.target,
3416
f = _mod_fetch.RepoFetcher(to_repository=self.target,
3445
3417
from_repository=self.source,
3446
3418
last_revision=revision_id,
3447
3419
fetch_spec=fetch_spec,
3448
find_ghosts=find_ghosts)
3420
pb=pb, find_ghosts=find_ghosts)
3450
3422
def _walk_to_common_revisions(self, revision_ids):
3451
3423
"""Walk out from revision_ids in source to revisions target has.
3868
3840
pending_revisions = []
3869
3841
parent_map = self.source.get_parent_map(revision_ids)
3870
3842
self._fetch_parent_invs_for_stacking(parent_map, cache)
3871
self.source._safe_to_return_from_cache = True
3872
3843
for tree in self.source.revision_trees(revision_ids):
3873
3844
# Find a inventory delta for this revision.
3874
3845
# Find text entries that need to be copied, too.
3922
3893
pending_revisions.append(revision)
3923
3894
cache[current_revision_id] = tree
3924
3895
basis_id = current_revision_id
3925
self.source._safe_to_return_from_cache = False
3926
3896
# Copy file texts
3927
3897
from_texts = self.source.texts
3928
3898
to_texts = self.target.texts
4007
3977
basis_id = self._fetch_batch(batch, basis_id, cache,
4008
3978
a_graph=a_graph)
4010
self.source._safe_to_return_from_cache = False
4011
3980
self.target.abort_write_group()
4025
3994
"""See InterRepository.fetch()."""
4026
3995
if fetch_spec is not None:
4027
3996
raise AssertionError("Not implemented yet...")
4028
ui.ui_factory.warn_experimental_format_fetch(self)
4029
3997
if (not self.source.supports_rich_root()
4030
3998
and self.target.supports_rich_root()):
4031
3999
self._converting_to_rich_root = True
4032
4000
self._revision_id_to_root_id = {}
4034
4002
self._converting_to_rich_root = False
4035
# See <https://launchpad.net/bugs/456077> asking for a warning here
4036
if self.source._format.network_name() != self.target._format.network_name():
4037
ui.ui_factory.show_user_warning('cross_format_fetch',
4038
from_format=self.source._format,
4039
to_format=self.target._format)
4040
4003
revision_ids = self.target.search_missing_revision_ids(self.source,
4041
4004
revision_id, find_ghosts=find_ghosts).get_keys()
4042
4005
if not revision_ids:
4111
4074
:param to_convert: The disk object to convert.
4112
4075
:param pb: a progress bar to use for progress information.
4114
pb = ui.ui_factory.nested_progress_bar()
4117
4080
# this is only useful with metadir layouts - separated repo content.
4118
4081
# trigger an assertion if not such
4119
4082
repo._format.get_format_string()
4120
4083
self.repo_dir = repo.bzrdir
4121
pb.update('Moving repository to repository.backup')
4084
self.step('Moving repository to repository.backup')
4122
4085
self.repo_dir.transport.move('repository', 'repository.backup')
4123
4086
backup_transport = self.repo_dir.transport.clone('repository.backup')
4124
4087
repo._format.check_conversion_target(self.target_format)
4125
4088
self.source_repo = repo._format.open(self.repo_dir,
4127
4090
_override_transport=backup_transport)
4128
pb.update('Creating new repository')
4091
self.step('Creating new repository')
4129
4092
converted = self.target_format.initialize(self.repo_dir,
4130
4093
self.source_repo.is_shared())
4131
4094
converted.lock_write()
4133
pb.update('Copying content')
4096
self.step('Copying content')
4134
4097
self.source_repo.copy_content_into(converted)
4136
4099
converted.unlock()
4137
pb.update('Deleting old repository content')
4100
self.step('Deleting old repository content')
4138
4101
self.repo_dir.transport.delete_tree('repository.backup')
4139
4102
ui.ui_factory.note('repository converted')
4104
def step(self, message):
4105
"""Update the pb by a step."""
4107
self.pb.update(message, self.count, self.total)
4143
4110
_unescape_map = {
4636
4603
def _get_convertable_inventory_stream(self, revision_ids,
4637
4604
delta_versus_null=False):
4638
# The two formats are sufficiently different that there is no fast
4639
# path, so we need to send just inventorydeltas, which any
4640
# sufficiently modern client can insert into any repository.
4641
# The StreamSink code expects to be able to
4605
# The source is using CHKs, but the target either doesn't or it has a
4606
# different serializer. The StreamSink code expects to be able to
4642
4607
# convert on the target, so we need to put bytes-on-the-wire that can
4643
4608
# be converted. That means inventory deltas (if the remote is <1.19,
4644
4609
# RemoteStreamSink will fallback to VFS to insert the deltas).