1
# Copyright (C) 2005-2010 Canonical Ltd
1
# Copyright (C) 2005, 2006, 2007, 2008, 2009 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
52
49
from bzrlib.testament import Testament
59
52
from bzrlib.decorators import needs_read_lock, needs_write_lock, only_raises
60
53
from bzrlib.inter import InterObject
61
54
from bzrlib.inventory import (
67
from bzrlib.lock import _RelockDebugMixin, LogicalLockResult
60
from bzrlib.lock import _RelockDebugMixin
61
from bzrlib import registry
68
62
from bzrlib.trace import (
69
63
log_exception_quietly, note, mutter, mutter_callsite, warning)
73
67
_deprecation_warning_done = False
76
class IsInWriteGroupError(errors.InternalBzrError):
78
_fmt = "May not refresh_data of repo %(repo)s while in a write group."
80
def __init__(self, repo):
81
errors.InternalBzrError.__init__(self, repo=repo)
84
70
class CommitBuilder(object):
85
71
"""Provides an interface to build up a commit.
220
206
# an inventory delta was accumulated without creating a new
222
208
basis_id = self.basis_delta_revision
223
# We ignore the 'inventory' returned by add_inventory_by_delta
224
# because self.new_inventory is used to hint to the rest of the
225
# system what code path was taken
226
self.inv_sha1, _ = self.repository.add_inventory_by_delta(
209
self.inv_sha1 = self.repository.add_inventory_by_delta(
227
210
basis_id, self._basis_delta, self._new_revision_id,
870
853
# versioned roots do not change unless the tree found a change.
873
class RepositoryWriteLockResult(LogicalLockResult):
874
"""The result of write locking a repository.
876
:ivar repository_token: The token obtained from the underlying lock, or
878
:ivar unlock: A callable which will unlock the lock.
881
def __init__(self, unlock, repository_token):
882
LogicalLockResult.__init__(self, unlock)
883
self.repository_token = repository_token
886
return "RepositoryWriteLockResult(%s, %s)" % (self.repository_token,
890
856
######################################################################
894
class Repository(_RelockDebugMixin, bzrdir.ControlComponent):
860
class Repository(_RelockDebugMixin):
895
861
"""Repository holding history for one or more branches.
897
863
The repository holds and retrieves historical information including
1056
1022
:seealso: add_inventory, for the contract.
1058
inv_lines = self._serializer.write_inventory_to_lines(inv)
1024
inv_lines = self._serialise_inventory_to_lines(inv)
1059
1025
return self._inventory_add_lines(revision_id, parents,
1060
1026
inv_lines, check_content=False)
1268
1234
"""Check a single text from this repository."""
1269
1235
if kind == 'inventories':
1270
1236
rev_id = record.key[0]
1271
inv = self._deserialise_inventory(rev_id,
1237
inv = self.deserialise_inventory(rev_id,
1272
1238
record.get_bytes_as('fulltext'))
1273
1239
if last_object is not None:
1274
1240
delta = inv._make_delta(last_object)
1319
1285
:param _format: The format of the repository on disk.
1320
1286
:param a_bzrdir: The BzrDir of the repository.
1288
In the future we will have a single api for all stores for
1289
getting file texts, inventories and revisions, then
1290
this construct will accept instances of those things.
1322
# In the future we will have a single api for all stores for
1323
# getting file texts, inventories and revisions, then
1324
# this construct will accept instances of those things.
1325
1292
super(Repository, self).__init__()
1326
1293
self._format = _format
1327
1294
# the following are part of the public API for Repository:
1333
1300
self._reconcile_does_inventory_gc = True
1334
1301
self._reconcile_fixes_text_parents = False
1335
1302
self._reconcile_backsup_inventory = True
1303
# not right yet - should be more semantically clear ?
1305
# TODO: make sure to construct the right store classes, etc, depending
1306
# on whether escaping is required.
1307
self._warn_if_deprecated()
1336
1308
self._write_group = None
1337
1309
# Additional places to query for data.
1338
1310
self._fallback_repositories = []
1339
1311
# An InventoryEntry cache, used during deserialization
1340
1312
self._inventory_entry_cache = fifo_cache.FIFOCache(10*1024)
1341
# Is it safe to return inventory entries directly from the entry cache,
1342
# rather copying them?
1343
self._safe_to_return_from_cache = False
1346
def user_transport(self):
1347
return self.bzrdir.user_transport
1350
def control_transport(self):
1351
return self._transport
1353
1314
def __repr__(self):
1354
1315
if self._fallback_repositories:
1403
1364
data during reads, and allows a 'write_group' to be obtained. Write
1404
1365
groups must be used for actual data insertion.
1406
A token should be passed in if you know that you have locked the object
1407
some other way, and need to synchronise this object's state with that
1410
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
1412
1367
:param token: if this is already locked, then lock_write will fail
1413
1368
unless the token matches the existing lock.
1414
1369
:returns: a token if this instance supports tokens, otherwise None.
1417
1372
:raises MismatchedToken: if the specified token doesn't match the token
1418
1373
of the existing lock.
1419
1374
:seealso: start_write_group.
1420
:return: A RepositoryWriteLockResult.
1376
A token should be passed in if you know that you have locked the object
1377
some other way, and need to synchronise this object's state with that
1380
XXX: this docstring is duplicated in many places, e.g. lockable_files.py
1422
1382
locked = self.is_locked()
1423
token = self.control_files.lock_write(token=token)
1383
result = self.control_files.lock_write(token=token)
1425
self._warn_if_deprecated()
1426
1385
self._note_lock('w')
1427
1386
for repo in self._fallback_repositories:
1428
1387
# Writes don't affect fallback repos
1429
1388
repo.lock_read()
1430
1389
self._refresh_data()
1431
return RepositoryWriteLockResult(self.unlock, token)
1433
1392
def lock_read(self):
1434
"""Lock the repository for read operations.
1436
:return: An object with an unlock method which will release the lock
1439
1393
locked = self.is_locked()
1440
1394
self.control_files.lock_read()
1442
self._warn_if_deprecated()
1443
1396
self._note_lock('r')
1444
1397
for repo in self._fallback_repositories:
1445
1398
repo.lock_read()
1446
1399
self._refresh_data()
1447
return LogicalLockResult(self.unlock)
1449
1401
def get_physical_lock_status(self):
1450
1402
return self.control_files.get_physical_lock_status()
1511
1463
# now gather global repository information
1512
1464
# XXX: This is available for many repos regardless of listability.
1513
if self.user_transport.listable():
1465
if self.bzrdir.root_transport.listable():
1514
1466
# XXX: do we want to __define len__() ?
1515
1467
# Maybe the versionedfiles object should provide a different
1516
1468
# method to get the number of keys.
1526
1478
:param using: If True, list only branches using this repository.
1528
1480
if using and not self.is_shared():
1529
return self.bzrdir.list_branches()
1482
return [self.bzrdir.open_branch()]
1483
except errors.NotBranchError:
1530
1485
class Evaluator(object):
1532
1487
def __init__(self):
1541
1496
except errors.NoRepositoryPresent:
1544
return False, ([], repository)
1499
return False, (None, repository)
1545
1500
self.first_call = False
1546
value = (bzrdir.list_branches(), None)
1502
value = (bzrdir.open_branch(), None)
1503
except errors.NotBranchError:
1504
value = (None, None)
1547
1505
return True, value
1550
for branches, repository in bzrdir.BzrDir.find_bzrdirs(
1551
self.user_transport, evaluate=Evaluator()):
1552
if branches is not None:
1553
ret.extend(branches)
1508
for branch, repository in bzrdir.BzrDir.find_bzrdirs(
1509
self.bzrdir.root_transport, evaluate=Evaluator()):
1510
if branch is not None:
1511
branches.append(branch)
1554
1512
if not using and repository is not None:
1555
ret.extend(repository.find_branches())
1513
branches.extend(repository.find_branches())
1558
1516
@needs_read_lock
1559
1517
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
1668
1626
return missing_keys
1670
1628
def refresh_data(self):
1671
"""Re-read any data needed to synchronise with disk.
1629
"""Re-read any data needed to to synchronise with disk.
1673
1631
This method is intended to be called after another repository instance
1674
1632
(such as one used by a smart server) has inserted data into the
1675
repository. On all repositories this will work outside of write groups.
1676
Some repository formats (pack and newer for bzrlib native formats)
1677
support refresh_data inside write groups. If called inside a write
1678
group on a repository that does not support refreshing in a write group
1679
IsInWriteGroupError will be raised.
1633
repository. It may not be called during a write group, but may be
1634
called at any other time.
1636
if self.is_in_write_group():
1637
raise errors.InternalBzrError(
1638
"May not refresh_data while in a write group.")
1681
1639
self._refresh_data()
1683
1641
def resume_write_group(self, tokens):
1937
1895
rev = self._serializer.read_revision_from_string(text)
1938
1896
yield (revid, rev)
1899
def get_revision_xml(self, revision_id):
1900
# TODO: jam 20070210 This shouldn't be necessary since get_revision
1901
# would have already do it.
1902
# TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1903
# TODO: this can't just be replaced by:
1904
# return self._serializer.write_revision_to_string(
1905
# self.get_revision(revision_id))
1906
# as cStringIO preservers the encoding unlike write_revision_to_string
1907
# or some other call down the path.
1908
rev = self.get_revision(revision_id)
1909
rev_tmp = cStringIO.StringIO()
1910
# the current serializer..
1911
self._serializer.write_revision(rev, rev_tmp)
1913
return rev_tmp.getvalue()
1940
1915
def get_deltas_for_revisions(self, revisions, specific_fileids=None):
1941
1916
"""Produce a generator of revision deltas.
2185
2160
selected_keys = set((revid,) for revid in revision_ids)
2186
2161
w = _inv_weave or self.inventories
2187
return self._find_file_ids_from_xml_inventory_lines(
2188
w.iter_lines_added_or_present_in_keys(
2189
selected_keys, pb=None),
2162
pb = ui.ui_factory.nested_progress_bar()
2164
return self._find_file_ids_from_xml_inventory_lines(
2165
w.iter_lines_added_or_present_in_keys(
2166
selected_keys, pb=pb),
2192
2171
def iter_files_bytes(self, desired_files):
2193
2172
"""Iterate through file versions.
2403
2382
"""single-document based inventory iteration."""
2404
2383
inv_xmls = self._iter_inventory_xmls(revision_ids, ordering)
2405
2384
for text, revision_id in inv_xmls:
2406
yield self._deserialise_inventory(revision_id, text)
2385
yield self.deserialise_inventory(revision_id, text)
2408
2387
def _iter_inventory_xmls(self, revision_ids, ordering):
2409
2388
if ordering is None:
2441
2420
next_key = None
2444
def _deserialise_inventory(self, revision_id, xml):
2423
def deserialise_inventory(self, revision_id, xml):
2445
2424
"""Transform the xml into an inventory object.
2447
2426
:param revision_id: The expected revision id of the inventory.
2448
2427
:param xml: A serialised inventory.
2450
2429
result = self._serializer.read_inventory_from_string(xml, revision_id,
2451
entry_cache=self._inventory_entry_cache,
2452
return_from_cache=self._safe_to_return_from_cache)
2430
entry_cache=self._inventory_entry_cache)
2453
2431
if result.revision_id != revision_id:
2454
2432
raise AssertionError('revision id mismatch %s != %s' % (
2455
2433
result.revision_id, revision_id))
2436
def serialise_inventory(self, inv):
2437
return self._serializer.write_inventory_to_string(inv)
2439
def _serialise_inventory_to_lines(self, inv):
2440
return self._serializer.write_inventory_to_lines(inv)
2458
2442
def get_serializer_format(self):
2459
2443
return self._serializer.format_num
2461
2445
@needs_read_lock
2462
def _get_inventory_xml(self, revision_id):
2463
"""Get serialized inventory as a string."""
2446
def get_inventory_xml(self, revision_id):
2447
"""Get inventory XML as a file object."""
2464
2448
texts = self._iter_inventory_xmls([revision_id], 'unordered')
2466
2450
text, revision_id = texts.next()
2468
2452
raise errors.HistoryMissing(self, 'inventory', revision_id)
2456
def get_inventory_sha1(self, revision_id):
2457
"""Return the sha1 hash of the inventory entry
2459
return self.get_revision(revision_id).inventory_sha1
2471
2461
def get_rev_id_for_revno(self, revno, known_pair):
2472
2462
"""Return the revision id of a revno, given a later (revno, revid)
2473
2463
pair in the same history.
2525
2515
next_id = parents[0]
2518
def get_revision_inventory(self, revision_id):
2519
"""Return inventory of a past revision."""
2520
# TODO: Unify this with get_inventory()
2521
# bzr 0.0.6 and later imposes the constraint that the inventory_id
2522
# must be the same as its revision, so this is trivial.
2523
if revision_id is None:
2524
# This does not make sense: if there is no revision,
2525
# then it is the current tree inventory surely ?!
2526
# and thus get_root_id() is something that looks at the last
2527
# commit on the branch, and the get_root_id is an inventory check.
2528
raise NotImplementedError
2529
# return Inventory(self.get_root_id())
2531
return self.get_inventory(revision_id)
2527
2533
def is_shared(self):
2528
2534
"""Return True if this repository is flagged as a shared repository."""
2529
2535
raise NotImplementedError(self.is_shared)
2563
2569
return RevisionTree(self, Inventory(root_id=None),
2564
2570
_mod_revision.NULL_REVISION)
2566
inv = self.get_inventory(revision_id)
2572
inv = self.get_revision_inventory(revision_id)
2567
2573
return RevisionTree(self, inv, revision_id)
2569
2575
def revision_trees(self, revision_ids):
2622
2628
keys = tsort.topo_sort(parent_map)
2623
2629
return [None] + list(keys)
2625
def pack(self, hint=None, clean_obsolete_packs=False):
2631
def pack(self, hint=None):
2626
2632
"""Compress the data within the repository.
2628
2634
This operation only makes sense for some repository types. For other
2638
2644
obtained from the result of commit_write_group(). Out of
2639
2645
date hints are simply ignored, because concurrent operations
2640
2646
can obsolete them rapidly.
2642
:param clean_obsolete_packs: Clean obsolete packs immediately after
2646
2649
def get_transaction(self):
2662
2665
for ((revision_id,), parent_keys) in \
2663
2666
self.revisions.get_parent_map(query_keys).iteritems():
2664
2667
if parent_keys:
2665
result[revision_id] = tuple([parent_revid
2666
for (parent_revid,) in parent_keys])
2668
result[revision_id] = tuple(parent_revid
2669
for (parent_revid,) in parent_keys)
2668
2671
result[revision_id] = (_mod_revision.NULL_REVISION,)
2671
2674
def _make_parents_provider(self):
2675
def get_known_graph_ancestry(self, revision_ids):
2676
"""Return the known graph for a set of revision ids and their ancestors.
2678
st = static_tuple.StaticTuple
2679
revision_keys = [st(r_id).intern() for r_id in revision_ids]
2680
known_graph = self.revisions.get_known_graph_ancestry(revision_keys)
2681
return graph.GraphThunkIdsToKeys(known_graph)
2683
2677
def get_graph(self, other_repository=None):
2684
2678
"""Return the graph walker for this repository format"""
2685
2679
parents_provider = self._make_parents_provider()
2780
2774
result.check(callback_refs)
2783
def _warn_if_deprecated(self, branch=None):
2777
def _warn_if_deprecated(self):
2784
2778
global _deprecation_warning_done
2785
2779
if _deprecation_warning_done:
2789
conf = config.GlobalConfig()
2791
conf = branch.get_config()
2792
if conf.suppress_warning('format_deprecation'):
2794
warning("Format %s for %s is deprecated -"
2795
" please use 'bzr upgrade' to get better performance"
2796
% (self._format, self.bzrdir.transport.base))
2798
_deprecation_warning_done = True
2781
_deprecation_warning_done = True
2782
warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
2783
% (self._format, self.bzrdir.transport.base))
2800
2785
def supports_rich_root(self):
2801
2786
return self._format.rich_root_data
3084
3069
pack_compresses = False
3085
3070
# Does the repository inventory storage understand references to trees?
3086
3071
supports_tree_reference = None
3087
# Is the format experimental ?
3088
experimental = False
3091
return "%s()" % self.__class__.__name__
3074
return "<%s>" % self.__class__.__name__
3093
3076
def __eq__(self, other):
3094
3077
# format objects are generally stateless
3109
3092
transport = a_bzrdir.get_repository_transport(None)
3110
format_string = transport.get_bytes("format")
3093
format_string = transport.get("format").read()
3111
3094
return format_registry.get(format_string)
3112
3095
except errors.NoSuchFile:
3113
3096
raise errors.NoRepositoryPresent(a_bzrdir)
3213
3196
raise NotImplementedError(self.open)
3215
def _run_post_repo_init_hooks(self, repository, a_bzrdir, shared):
3216
from bzrlib.bzrdir import BzrDir, RepoInitHookParams
3217
hooks = BzrDir.hooks['post_repo_init']
3220
params = RepoInitHookParams(repository, self, a_bzrdir, shared)
3225
3199
class MetaDirRepositoryFormat(RepositoryFormat):
3226
3200
"""Common base class for the new repositories using the metadir layout."""
3432
3406
:param revision_id: if None all content is copied, if NULL_REVISION no
3433
3407
content is copied.
3408
:param pb: optional progress bar to use for progress reports. If not
3409
provided a default one will be created.
3437
ui.ui_factory.warn_experimental_format_fetch(self)
3438
3412
from bzrlib.fetch import RepoFetcher
3439
# See <https://launchpad.net/bugs/456077> asking for a warning here
3440
if self.source._format.network_name() != self.target._format.network_name():
3441
ui.ui_factory.show_user_warning('cross_format_fetch',
3442
from_format=self.source._format,
3443
to_format=self.target._format)
3444
3413
f = RepoFetcher(to_repository=self.target,
3445
3414
from_repository=self.source,
3446
3415
last_revision=revision_id,
3447
3416
fetch_spec=fetch_spec,
3448
find_ghosts=find_ghosts)
3417
pb=pb, find_ghosts=find_ghosts)
3450
3419
def _walk_to_common_revisions(self, revision_ids):
3451
3420
"""Walk out from revision_ids in source to revisions target has.
3847
3816
basis_id, delta, current_revision_id, parents_parents)
3848
3817
cache[current_revision_id] = parent_tree
3850
def _fetch_batch(self, revision_ids, basis_id, cache, a_graph=None):
3819
def _fetch_batch(self, revision_ids, basis_id, cache):
3851
3820
"""Fetch across a few revisions.
3853
3822
:param revision_ids: The revisions to copy
3854
3823
:param basis_id: The revision_id of a tree that must be in cache, used
3855
3824
as a basis for delta when no other base is available
3856
3825
:param cache: A cache of RevisionTrees that we can use.
3857
:param a_graph: A Graph object to determine the heads() of the
3858
rich-root data stream.
3859
3826
:return: The revision_id of the last converted tree. The RevisionTree
3860
3827
for it will be in cache
3868
3835
pending_revisions = []
3869
3836
parent_map = self.source.get_parent_map(revision_ids)
3870
3837
self._fetch_parent_invs_for_stacking(parent_map, cache)
3871
self.source._safe_to_return_from_cache = True
3872
3838
for tree in self.source.revision_trees(revision_ids):
3873
3839
# Find a inventory delta for this revision.
3874
3840
# Find text entries that need to be copied, too.
3922
3888
pending_revisions.append(revision)
3923
3889
cache[current_revision_id] = tree
3924
3890
basis_id = current_revision_id
3925
self.source._safe_to_return_from_cache = False
3926
3891
# Copy file texts
3927
3892
from_texts = self.source.texts
3928
3893
to_texts = self.target.texts
3929
3894
if root_keys_to_create:
3930
root_stream = _mod_fetch._new_root_data_stream(
3895
from bzrlib.fetch import _new_root_data_stream
3896
root_stream = _new_root_data_stream(
3931
3897
root_keys_to_create, self._revision_id_to_root_id, parent_map,
3932
self.source, graph=a_graph)
3933
3899
to_texts.insert_record_stream(root_stream)
3934
3900
to_texts.insert_record_stream(from_texts.get_record_stream(
3935
3901
text_keys, self.target._format._fetch_order,
3992
3958
cache[basis_id] = basis_tree
3993
3959
del basis_tree # We don't want to hang on to it here
3995
if self._converting_to_rich_root and len(revision_ids) > 100:
3996
a_graph = _mod_fetch._get_rich_root_heads_graph(self.source,
4001
3961
for offset in range(0, len(revision_ids), batch_size):
4002
3962
self.target.start_write_group()
4004
3964
pb.update('Transferring revisions', offset,
4005
3965
len(revision_ids))
4006
3966
batch = revision_ids[offset:offset+batch_size]
4007
basis_id = self._fetch_batch(batch, basis_id, cache,
3967
basis_id = self._fetch_batch(batch, basis_id, cache)
4010
self.source._safe_to_return_from_cache = False
4011
3969
self.target.abort_write_group()
4025
3983
"""See InterRepository.fetch()."""
4026
3984
if fetch_spec is not None:
4027
3985
raise AssertionError("Not implemented yet...")
4028
ui.ui_factory.warn_experimental_format_fetch(self)
4029
3986
if (not self.source.supports_rich_root()
4030
3987
and self.target.supports_rich_root()):
4031
3988
self._converting_to_rich_root = True
4032
3989
self._revision_id_to_root_id = {}
4034
3991
self._converting_to_rich_root = False
4035
# See <https://launchpad.net/bugs/456077> asking for a warning here
4036
if self.source._format.network_name() != self.target._format.network_name():
4037
ui.ui_factory.show_user_warning('cross_format_fetch',
4038
from_format=self.source._format,
4039
to_format=self.target._format)
4040
3992
revision_ids = self.target.search_missing_revision_ids(self.source,
4041
3993
revision_id, find_ghosts=find_ghosts).get_keys()
4042
3994
if not revision_ids:
4111
4063
:param to_convert: The disk object to convert.
4112
4064
:param pb: a progress bar to use for progress information.
4114
pb = ui.ui_factory.nested_progress_bar()
4117
4069
# this is only useful with metadir layouts - separated repo content.
4118
4070
# trigger an assertion if not such
4119
4071
repo._format.get_format_string()
4120
4072
self.repo_dir = repo.bzrdir
4121
pb.update('Moving repository to repository.backup')
4073
self.step('Moving repository to repository.backup')
4122
4074
self.repo_dir.transport.move('repository', 'repository.backup')
4123
4075
backup_transport = self.repo_dir.transport.clone('repository.backup')
4124
4076
repo._format.check_conversion_target(self.target_format)
4125
4077
self.source_repo = repo._format.open(self.repo_dir,
4127
4079
_override_transport=backup_transport)
4128
pb.update('Creating new repository')
4080
self.step('Creating new repository')
4129
4081
converted = self.target_format.initialize(self.repo_dir,
4130
4082
self.source_repo.is_shared())
4131
4083
converted.lock_write()
4133
pb.update('Copying content')
4085
self.step('Copying content')
4134
4086
self.source_repo.copy_content_into(converted)
4136
4088
converted.unlock()
4137
pb.update('Deleting old repository content')
4089
self.step('Deleting old repository content')
4138
4090
self.repo_dir.transport.delete_tree('repository.backup')
4139
4091
ui.ui_factory.note('repository converted')
4093
def step(self, message):
4094
"""Update the pb by a step."""
4096
self.pb.update(message, self.count, self.total)
4143
4099
_unescape_map = {
4487
4443
fetching the inventory weave.
4489
4445
if self._rich_root_upgrade():
4490
return _mod_fetch.Inter1and2Helper(
4447
return bzrlib.fetch.Inter1and2Helper(
4491
4448
self.from_repository).generate_root_texts(revs)
4636
4593
def _get_convertable_inventory_stream(self, revision_ids,
4637
4594
delta_versus_null=False):
4638
# The two formats are sufficiently different that there is no fast
4639
# path, so we need to send just inventorydeltas, which any
4640
# sufficiently modern client can insert into any repository.
4641
# The StreamSink code expects to be able to
4595
# The source is using CHKs, but the target either doesn't or it has a
4596
# different serializer. The StreamSink code expects to be able to
4642
4597
# convert on the target, so we need to put bytes-on-the-wire that can
4643
4598
# be converted. That means inventory deltas (if the remote is <1.19,
4644
4599
# RemoteStreamSink will fallback to VFS to insert the deltas).