1
# Copyright (C) 2005-2010 Canonical Ltd
1
# Copyright (C) 2005, 2006, 2007, 2008, 2009 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
52
49
from bzrlib.testament import Testament
59
52
from bzrlib.decorators import needs_read_lock, needs_write_lock, only_raises
60
53
from bzrlib.inter import InterObject
61
54
from bzrlib.inventory import (
67
from bzrlib.lock import _RelockDebugMixin, LogicalLockResult
60
from bzrlib.lock import _RelockDebugMixin
61
from bzrlib import registry
68
62
from bzrlib.trace import (
69
63
log_exception_quietly, note, mutter, mutter_callsite, warning)
73
67
_deprecation_warning_done = False
76
class IsInWriteGroupError(errors.InternalBzrError):
78
_fmt = "May not refresh_data of repo %(repo)s while in a write group."
80
def __init__(self, repo):
81
errors.InternalBzrError.__init__(self, repo=repo)
84
70
class CommitBuilder(object):
85
71
"""Provides an interface to build up a commit.
870
856
# versioned roots do not change unless the tree found a change.
873
class RepositoryWriteLockResult(LogicalLockResult):
874
"""The result of write locking a repository.
876
:ivar repository_token: The token obtained from the underlying lock, or
878
:ivar unlock: A callable which will unlock the lock.
881
def __init__(self, unlock, repository_token):
882
LogicalLockResult.__init__(self, unlock)
883
self.repository_token = repository_token
886
return "RepositoryWriteLockResult(%s, %s)" % (self.repository_token,
890
859
######################################################################
894
class Repository(_RelockDebugMixin, bzrdir.ControlComponent):
863
class Repository(_RelockDebugMixin):
895
864
"""Repository holding history for one or more branches.
897
866
The repository holds and retrieves historical information including
1056
1025
:seealso: add_inventory, for the contract.
1058
inv_lines = self._serializer.write_inventory_to_lines(inv)
1027
inv_lines = self._serialise_inventory_to_lines(inv)
1059
1028
return self._inventory_add_lines(revision_id, parents,
1060
1029
inv_lines, check_content=False)
1268
1237
"""Check a single text from this repository."""
1269
1238
if kind == 'inventories':
1270
1239
rev_id = record.key[0]
1271
inv = self._deserialise_inventory(rev_id,
1240
inv = self.deserialise_inventory(rev_id,
1272
1241
record.get_bytes_as('fulltext'))
1273
1242
if last_object is not None:
1274
1243
delta = inv._make_delta(last_object)
1319
1288
:param _format: The format of the repository on disk.
1320
1289
:param a_bzrdir: The BzrDir of the repository.
1291
In the future we will have a single api for all stores for
1292
getting file texts, inventories and revisions, then
1293
this construct will accept instances of those things.
1322
# In the future we will have a single api for all stores for
1323
# getting file texts, inventories and revisions, then
1324
# this construct will accept instances of those things.
1325
1295
super(Repository, self).__init__()
1326
1296
self._format = _format
1327
1297
# the following are part of the public API for Repository:
1333
1303
self._reconcile_does_inventory_gc = True
1334
1304
self._reconcile_fixes_text_parents = False
1335
1305
self._reconcile_backsup_inventory = True
1306
# not right yet - should be more semantically clear ?
1308
# TODO: make sure to construct the right store classes, etc, depending
1309
# on whether escaping is required.
1310
self._warn_if_deprecated()
1336
1311
self._write_group = None
1337
1312
# Additional places to query for data.
1338
1313
self._fallback_repositories = []
1339
1314
# An InventoryEntry cache, used during deserialization
1340
1315
self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)
1341
# Is it safe to return inventory entries directly from the entry cache,
1342
# rather copying them?
1343
self._safe_to_return_from_cache = False
1346
def user_transport(self):
1347
return self.bzrdir.user_transport
1350
def control_transport(self):
1351
return self._transport
1353
1317
def __repr__(self):
1354
1318
if self._fallback_repositories:
1403
1367
data during reads, and allows a 'write_group' to be obtained. Write
1404
1368
groups must be used for actual data insertion.
1406
A token should be passed in if you know that you have locked the object
1407
some other way, and need to synchronise this object's state with that
1410
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
1412
1370
:param token: if this is already locked, then lock_write will fail
1413
1371
unless the token matches the existing lock.
1414
1372
:returns: a token if this instance supports tokens, otherwise None.
1417
1375
:raises MismatchedToken: if the specified token doesn't match the token
1418
1376
of the existing lock.
1419
1377
:seealso: start_write_group.
1420
:return: A RepositoryWriteLockResult.
1379
A token should be passed in if you know that you have locked the object
1380
some other way, and need to synchronise this object's state with that
1383
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
1422
1385
locked = self.is_locked()
1423
token = self.control_files.lock_write(token=token)
1386
result = self.control_files.lock_write(token=token)
1425
self._warn_if_deprecated()
1426
1388
self._note_lock('w')
1427
1389
for repo in self._fallback_repositories:
1428
1390
# Writes don't affect fallback repos
1429
1391
repo.lock_read()
1430
1392
self._refresh_data()
1431
return RepositoryWriteLockResult(self.unlock, token)
1433
1395
def lock_read(self):
1434
"""Lock the repository for read operations.
1436
:return: An object with an unlock method which will release the lock
1439
1396
locked = self.is_locked()
1440
1397
self.control_files.lock_read()
1442
self._warn_if_deprecated()
1443
1399
self._note_lock('r')
1444
1400
for repo in self._fallback_repositories:
1445
1401
repo.lock_read()
1446
1402
self._refresh_data()
1447
return LogicalLockResult(self.unlock)
1449
1404
def get_physical_lock_status(self):
1450
1405
return self.control_files.get_physical_lock_status()
1511
1466
# now gather global repository information
1512
1467
# XXX: This is available for many repos regardless of listability.
1513
if self.user_transport.listable():
1468
if self.bzrdir.root_transport.listable():
1514
1469
# XXX: do we want to __define len__() ?
1515
1470
# Maybe the versionedfiles object should provide a different
1516
1471
# method to get the number of keys.
1526
1481
:param using: If True, list only branches using this repository.
1528
1483
if using and not self.is_shared():
1529
return self.bzrdir.list_branches()
1485
return [self.bzrdir.open_branch()]
1486
except errors.NotBranchError:
1530
1488
class Evaluator(object):
1532
1490
def __init__(self):
1541
1499
except errors.NoRepositoryPresent:
1544
return False, ([], repository)
1502
return False, (None, repository)
1545
1503
self.first_call = False
1546
value = (bzrdir.list_branches(), None)
1505
value = (bzrdir.open_branch(), None)
1506
except errors.NotBranchError:
1507
value = (None, None)
1547
1508
return True, value
1550
for branches, repository in bzrdir.BzrDir.find_bzrdirs(
1551
self.user_transport, evaluate=Evaluator()):
1552
if branches is not None:
1553
ret.extend(branches)
1511
for branch, repository in bzrdir.BzrDir.find_bzrdirs(
1512
self.bzrdir.root_transport, evaluate=Evaluator()):
1513
if branch is not None:
1514
branches.append(branch)
1554
1515
if not using and repository is not None:
1555
ret.extend(repository.find_branches())
1516
branches.extend(repository.find_branches())
1558
1519
@needs_read_lock
1559
1520
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
1668
1629
return missing_keys
1670
1631
def refresh_data(self):
1671
"""Re-read any data needed to synchronise with disk.
1632
"""Re-read any data needed to to synchronise with disk.
1673
1634
This method is intended to be called after another repository instance
1674
1635
(such as one used by a smart server) has inserted data into the
1675
repository. On all repositories this will work outside of write groups.
1676
Some repository formats (pack and newer for bzrlib native formats)
1677
support refresh_data inside write groups. If called inside a write
1678
group on a repository that does not support refreshing in a write group
1679
IsInWriteGroupError will be raised.
1636
repository. It may not be called during a write group, but may be
1637
called at any other time.
1639
if self.is_in_write_group():
1640
raise errors.InternalBzrError(
1641
"May not refresh_data while in a write group.")
1681
1642
self._refresh_data()
1683
1644
def resume_write_group(self, tokens):
1937
1898
rev = self._serializer.read_revision_from_string(text)
1938
1899
yield (revid, rev)
1902
def get_revision_xml(self, revision_id):
1903
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1904
# would have already do it.
1905
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1906
# TODO: this can't just be replaced by:
1907
# return self._serializer.write_revision_to_string(
1908
# self.get_revision(revision_id))
1909
# as cStringIO preservers the encoding unlike write_revision_to_string
1910
# or some other call down the path.
1911
rev = self.get_revision(revision_id)
1912
rev_tmp = cStringIO.StringIO()
1913
# the current serializer..
1914
self._serializer.write_revision(rev, rev_tmp)
1916
return rev_tmp.getvalue()
1940
1918
def get_deltas_for_revisions(self, revisions, specific_fileids=None):
1941
1919
"""Produce a generator of revision deltas.
2185
2163
selected_keys = set((revid,) for revid in revision_ids)
2186
2164
w = _inv_weave or self.inventories
2187
return self._find_file_ids_from_xml_inventory_lines(
2188
w.iter_lines_added_or_present_in_keys(
2189
selected_keys, pb=None),
2165
pb = ui.ui_factory.nested_progress_bar()
2167
return self._find_file_ids_from_xml_inventory_lines(
2168
w.iter_lines_added_or_present_in_keys(
2169
selected_keys, pb=pb),
2192
2174
def iter_files_bytes(self, desired_files):
2193
2175
"""Iterate through file versions.
2403
2385
"""single-document based inventory iteration."""
2404
2386
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
2405
2387
for text, revision_id in inv_xmls:
2406
yield self._deserialise_inventory(revision_id, text)
2388
yield self.deserialise_inventory(revision_id, text)
2408
2390
def _iter_inventory_xmls(self, revision_ids, ordering):
2409
2391
if ordering is None:
2441
2423
next_key = None
2444
def _deserialise_inventory(self, revision_id, xml):
2426
def deserialise_inventory(self, revision_id, xml):
2445
2427
"""Transform the xml into an inventory object.
2447
2429
:param revision_id: The expected revision id of the inventory.
2448
2430
:param xml: A serialised inventory.
2450
2432
result = self._serializer.read_inventory_from_string(xml, revision_id,
2451
entry_cache=self._inventory_entry_cache,
2452
return_from_cache=self._safe_to_return_from_cache)
2433
entry_cache=self._inventory_entry_cache)
2453
2434
if result.revision_id != revision_id:
2454
2435
raise AssertionError('revision id mismatch %s != %s' % (
2455
2436
result.revision_id, revision_id))
2439
def serialise_inventory(self, inv):
2440
return self._serializer.write_inventory_to_string(inv)
2442
def _serialise_inventory_to_lines(self, inv):
2443
return self._serializer.write_inventory_to_lines(inv)
2458
2445
def get_serializer_format(self):
2459
2446
return self._serializer.format_num
2461
2448
@needs_read_lock
2462
def _get_inventory_xml(self, revision_id):
2463
"""Get serialized inventory as a string."""
2449
def get_inventory_xml(self, revision_id):
2450
"""Get inventory XML as a file object."""
2464
2451
texts = self._iter_inventory_xmls([revision_id], 'unordered')
2466
2453
text, revision_id = texts.next()
2468
2455
raise errors.HistoryMissing(self, 'inventory', revision_id)
2459
def get_inventory_sha1(self, revision_id):
2460
"""Return the sha1 hash of the inventory entry
2462
return self.get_revision(revision_id).inventory_sha1
2471
2464
def get_rev_id_for_revno(self, revno, known_pair):
2472
2465
"""Return the revision id of a revno, given a later (revno, revid)
2473
2466
pair in the same history.
2525
2518
next_id = parents[0]
2521
def get_revision_inventory(self, revision_id):
2522
"""Return inventory of a past revision."""
2523
# TODO: Unify this with get_inventory()
2524
# bzr 0.0.6 and later imposes the constraint that the inventory_id
2525
# must be the same as its revision, so this is trivial.
2526
if revision_id is None:
2527
# This does not make sense: if there is no revision,
2528
# then it is the current tree inventory surely ?!
2529
# and thus get_root_id() is something that looks at the last
2530
# commit on the branch, and the get_root_id is an inventory check.
2531
raise NotImplementedError
2532
# return Inventory(self.get_root_id())
2534
return self.get_inventory(revision_id)
2527
2536
def is_shared(self):
2528
2537
"""Return True if this repository is flagged as a shared repository."""
2529
2538
raise NotImplementedError(self.is_shared)
2563
2572
return RevisionTree(self, Inventory(root_id=None),
2564
2573
_mod_revision.NULL_REVISION)
2566
inv = self.get_inventory(revision_id)
2575
inv = self.get_revision_inventory(revision_id)
2567
2576
return RevisionTree(self, inv, revision_id)
2569
2578
def revision_trees(self, revision_ids):
2622
2631
keys = tsort.topo_sort(parent_map)
2623
2632
return [None] + list(keys)
2625
def pack(self, hint=None, clean_obsolete_packs=False):
2634
def pack(self, hint=None):
2626
2635
"""Compress the data within the repository.
2628
2637
This operation only makes sense for some repository types. For other
2638
2647
obtained from the result of commit_write_group(). Out of
2639
2648
date hints are simply ignored, because concurrent operations
2640
2649
can obsolete them rapidly.
2642
:param clean_obsolete_packs: Clean obsolete packs immediately after
2646
2652
def get_transaction(self):
2662
2668
for ((revision_id,), parent_keys) in \
2663
2669
self.revisions.get_parent_map(query_keys).iteritems():
2664
2670
if parent_keys:
2665
result[revision_id] = tuple([parent_revid
2666
for (parent_revid,) in parent_keys])
2671
result[revision_id] = tuple(parent_revid
2672
for (parent_revid,) in parent_keys)
2668
2674
result[revision_id] = (_mod_revision.NULL_REVISION,)
2671
2677
def _make_parents_provider(self):
2675
def get_known_graph_ancestry(self, revision_ids):
2676
"""Return the known graph for a set of revision ids and their ancestors.
2678
st = static_tuple.StaticTuple
2679
revision_keys = [st(r_id).intern() for r_id in revision_ids]
2680
known_graph = self.revisions.get_known_graph_ancestry(revision_keys)
2681
return graph.GraphThunkIdsToKeys(known_graph)
2683
2680
def get_graph(self, other_repository=None):
2684
2681
"""Return the graph walker for this repository format"""
2685
2682
parents_provider = self._make_parents_provider()
2780
2777
result.check(callback_refs)
2783
def _warn_if_deprecated(self, branch=None):
2780
def _warn_if_deprecated(self):
2784
2781
global _deprecation_warning_done
2785
2782
if _deprecation_warning_done:
2789
conf = config.GlobalConfig()
2791
conf = branch.get_config()
2792
if conf.suppress_warning('format_deprecation'):
2794
warning("Format %s for %s is deprecated -"
2795
" please use 'bzr upgrade' to get better performance"
2796
% (self._format, self.bzrdir.transport.base))
2798
_deprecation_warning_done = True
2784
_deprecation_warning_done = True
2785
warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
2786
% (self._format, self.bzrdir.transport.base))
2800
2788
def supports_rich_root(self):
2801
2789
return self._format.rich_root_data
3084
3072
pack_compresses = False
3085
3073
# Does the repository inventory storage understand references to trees?
3086
3074
supports_tree_reference = None
3087
# Is the format experimental ?
3088
experimental = False
3091
return "%s()" % self.__class__.__name__
3077
return "<%s>" % self.__class__.__name__
3093
3079
def __eq__(self, other):
3094
3080
# format objects are generally stateless
3109
3095
transport = a_bzrdir.get_repository_transport(None)
3110
format_string = transport.get_bytes("format")
3096
format_string = transport.get("format").read()
3111
3097
return format_registry.get(format_string)
3112
3098
except errors.NoSuchFile:
3113
3099
raise errors.NoRepositoryPresent(a_bzrdir)
3213
3199
raise NotImplementedError(self.open)
3215
def _run_post_repo_init_hooks(self, repository, a_bzrdir, shared):
3216
from bzrlib.bzrdir import BzrDir, RepoInitHookParams
3217
hooks = BzrDir.hooks['post_repo_init']
3220
params = RepoInitHookParams(repository, self, a_bzrdir, shared)
3225
3202
class MetaDirRepositoryFormat(RepositoryFormat):
3226
3203
"""Common base class for the new repositories using the metadir layout."""
3432
3409
:param revision_id: if None all content is copied, if NULL_REVISION no
3433
3410
content is copied.
3411
:param pb: optional progress bar to use for progress reports. If not
3412
provided a default one will be created.
3437
ui.ui_factory.warn_experimental_format_fetch(self)
3438
3415
from bzrlib.fetch import RepoFetcher
3439
# See <https://launchpad.net/bugs/456077> asking for a warning here
3440
if self.source._format.network_name() != self.target._format.network_name():
3441
ui.ui_factory.show_user_warning('cross_format_fetch',
3442
from_format=self.source._format,
3443
to_format=self.target._format)
3444
3416
f = RepoFetcher(to_repository=self.target,
3445
3417
from_repository=self.source,
3446
3418
last_revision=revision_id,
3447
3419
fetch_spec=fetch_spec,
3448
find_ghosts=find_ghosts)
3420
pb=pb, find_ghosts=find_ghosts)
3450
3422
def _walk_to_common_revisions(self, revision_ids):
3451
3423
"""Walk out from revision_ids in source to revisions target has.
3847
3819
basis_id, delta, current_revision_id, parents_parents)
3848
3820
cache[current_revision_id] = parent_tree
3850
def _fetch_batch(self, revision_ids, basis_id, cache, a_graph=None):
3822
def _fetch_batch(self, revision_ids, basis_id, cache):
3851
3823
"""Fetch across a few revisions.
3853
3825
:param revision_ids: The revisions to copy
3854
3826
:param basis_id: The revision_id of a tree that must be in cache, used
3855
3827
as a basis for delta when no other base is available
3856
3828
:param cache: A cache of RevisionTrees that we can use.
3857
:param a_graph: A Graph object to determine the heads() of the
3858
rich-root data stream.
3859
3829
:return: The revision_id of the last converted tree. The RevisionTree
3860
3830
for it will be in cache
3868
3838
pending_revisions = []
3869
3839
parent_map = self.source.get_parent_map(revision_ids)
3870
3840
self._fetch_parent_invs_for_stacking(parent_map, cache)
3871
self.source._safe_to_return_from_cache = True
3872
3841
for tree in self.source.revision_trees(revision_ids):
3873
3842
# Find a inventory delta for this revision.
3874
3843
# Find text entries that need to be copied, too.
3922
3891
pending_revisions.append(revision)
3923
3892
cache[current_revision_id] = tree
3924
3893
basis_id = current_revision_id
3925
self.source._safe_to_return_from_cache = False
3926
3894
# Copy file texts
3927
3895
from_texts = self.source.texts
3928
3896
to_texts = self.target.texts
3929
3897
if root_keys_to_create:
3930
root_stream = _mod_fetch._new_root_data_stream(
3898
from bzrlib.fetch import _new_root_data_stream
3899
root_stream = _new_root_data_stream(
3931
3900
root_keys_to_create, self._revision_id_to_root_id, parent_map,
3932
self.source, graph=a_graph)
3933
3902
to_texts.insert_record_stream(root_stream)
3934
3903
to_texts.insert_record_stream(from_texts.get_record_stream(
3935
3904
text_keys, self.target._format._fetch_order,
3992
3961
cache[basis_id] = basis_tree
3993
3962
del basis_tree # We don't want to hang on to it here
3995
if self._converting_to_rich_root and len(revision_ids) > 100:
3996
a_graph = _mod_fetch._get_rich_root_heads_graph(self.source,
4001
3964
for offset in range(0, len(revision_ids), batch_size):
4002
3965
self.target.start_write_group()
4004
3967
pb.update('Transferring revisions', offset,
4005
3968
len(revision_ids))
4006
3969
batch = revision_ids[offset:offset+batch_size]
4007
basis_id = self._fetch_batch(batch, basis_id, cache,
3970
basis_id = self._fetch_batch(batch, basis_id, cache)
4010
self.source._safe_to_return_from_cache = False
4011
3972
self.target.abort_write_group()
4025
3986
"""See InterRepository.fetch()."""
4026
3987
if fetch_spec is not None:
4027
3988
raise AssertionError("Not implemented yet...")
4028
ui.ui_factory.warn_experimental_format_fetch(self)
4029
3989
if (not self.source.supports_rich_root()
4030
3990
and self.target.supports_rich_root()):
4031
3991
self._converting_to_rich_root = True
4032
3992
self._revision_id_to_root_id = {}
4034
3994
self._converting_to_rich_root = False
4035
# See <https://launchpad.net/bugs/456077> asking for a warning here
4036
if self.source._format.network_name() != self.target._format.network_name():
4037
ui.ui_factory.show_user_warning('cross_format_fetch',
4038
from_format=self.source._format,
4039
to_format=self.target._format)
4040
3995
revision_ids = self.target.search_missing_revision_ids(self.source,
4041
3996
revision_id, find_ghosts=find_ghosts).get_keys()
4042
3997
if not revision_ids:
4111
4066
:param to_convert: The disk object to convert.
4112
4067
:param pb: a progress bar to use for progress information.
4114
pb = ui.ui_factory.nested_progress_bar()
4117
4072
# this is only useful with metadir layouts - separated repo content.
4118
4073
# trigger an assertion if not such
4119
4074
repo._format.get_format_string()
4120
4075
self.repo_dir = repo.bzrdir
4121
pb.update('Moving repository to repository.backup')
4076
self.step('Moving repository to repository.backup')
4122
4077
self.repo_dir.transport.move('repository', 'repository.backup')
4123
4078
backup_transport = self.repo_dir.transport.clone('repository.backup')
4124
4079
repo._format.check_conversion_target(self.target_format)
4125
4080
self.source_repo = repo._format.open(self.repo_dir,
4127
4082
_override_transport=backup_transport)
4128
pb.update('Creating new repository')
4083
self.step('Creating new repository')
4129
4084
converted = self.target_format.initialize(self.repo_dir,
4130
4085
self.source_repo.is_shared())
4131
4086
converted.lock_write()
4133
pb.update('Copying content')
4088
self.step('Copying content')
4134
4089
self.source_repo.copy_content_into(converted)
4136
4091
converted.unlock()
4137
pb.update('Deleting old repository content')
4092
self.step('Deleting old repository content')
4138
4093
self.repo_dir.transport.delete_tree('repository.backup')
4139
4094
ui.ui_factory.note('repository converted')
4096
def step(self, message):
4097
"""Update the pb by a step."""
4099
self.pb.update(message, self.count, self.total)
4143
4102
_unescape_map = {
4487
4446
fetching the inventory weave.
4489
4448
if self._rich_root_upgrade():
4490
return _mod_fetch.Inter1and2Helper(
4450
return bzrlib.fetch.Inter1and2Helper(
4491
4451
self.from_repository).generate_root_texts(revs)
4636
4596
def _get_convertable_inventory_stream(self, revision_ids,
4637
4597
delta_versus_null=False):
4638
# The two formats are sufficiently different that there is no fast
4639
# path, so we need to send just inventorydeltas, which any
4640
# sufficiently modern client can insert into any repository.
4641
# The StreamSink code expects to be able to
4598
# The source is using CHKs, but the target either doesn't or it has a
4599
# different serializer. The StreamSink code expects to be able to
4642
4600
# convert on the target, so we need to put bytes-on-the-wire that can
4643
4601
# be converted. That means inventory deltas (if the remote is <1.19,
4644
4602
# RemoteStreamSink will fallback to VFS to insert the deltas).