756
765
result['size'] = t
768
def find_branches(self, using=False):
769
"""Find branches underneath this repository.
771
This will include branches inside other branches.
773
:param using: If True, list only branches using this repository.
775
if using and not self.is_shared():
777
return [self.bzrdir.open_branch()]
778
except errors.NotBranchError:
780
class Evaluator(object):
783
self.first_call = True
785
def __call__(self, bzrdir):
786
# On the first call, the parameter is always the bzrdir
787
# containing the current repo.
788
if not self.first_call:
790
repository = bzrdir.open_repository()
791
except errors.NoRepositoryPresent:
794
return False, (None, repository)
795
self.first_call = False
797
value = (bzrdir.open_branch(), None)
798
except errors.NotBranchError:
803
for branch, repository in bzrdir.BzrDir.find_bzrdirs(
804
self.bzrdir.root_transport, evaluate=Evaluator()):
805
if branch is not None:
806
branches.append(branch)
807
if not using and repository is not None:
808
branches.extend(repository.find_branches())
759
811
def get_data_stream(self, revision_ids):
760
812
raise NotImplementedError(self.get_data_stream)
814
def get_data_stream_for_search(self, search_result):
815
"""Get a data stream that can be inserted to a repository.
817
:param search_result: A bzrlib.graph.SearchResult selecting the
819
:return: A data stream that can be inserted into a repository using
822
raise NotImplementedError(self.get_data_stream_for_search)
762
824
def insert_data_stream(self, stream):
763
825
"""XXX What does this really do?
788
850
for version, options, parents, some_bytes in decoded_list:
789
851
data_list.append((version, options, len(some_bytes), parents))
790
852
knit_bytes += some_bytes
853
buffer = StringIO(knit_bytes)
854
def reader_func(count):
858
return buffer.read(count)
791
859
knit.insert_data_stream(
792
(format, data_list, StringIO(knit_bytes).read))
795
def missing_revision_ids(self, other, revision_id=None):
796
"""Return the revision ids that other has that this does not.
798
These are returned in topological order.
800
revision_id: only return revision ids included by revision_id.
802
return InterRepository.get(other, self).missing_revision_ids(revision_id)
860
(format, data_list, reader_func))
863
def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
864
"""Return the revision ids that other has that this does not.
866
These are returned in topological order.
868
revision_id: only return revision ids included by revision_id.
870
return InterRepository.get(other, self).search_missing_revision_ids(
871
revision_id, find_ghosts)
873
@deprecated_method(symbol_versioning.one_two)
875
def missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
876
"""Return the revision ids that other has that this does not.
878
These are returned in topological order.
880
revision_id: only return revision ids included by revision_id.
882
keys = self.search_missing_revision_ids(
883
other, revision_id, find_ghosts).get_keys()
886
parents = other.get_graph().get_parent_map(keys)
889
return tsort.topo_sort(parents)
1051
1146
@needs_write_lock
1052
1147
def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
1053
1148
signature = gpg_strategy.sign(plaintext)
1149
self.add_signature_text(revision_id, signature)
1152
def add_signature_text(self, revision_id, signature):
1054
1153
self._revision_store.add_revision_signature_text(revision_id,
1056
1155
self.get_transaction())
1058
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1060
"""Helper routine for fileids_altered_by_revision_ids.
1157
def find_text_key_references(self):
1158
"""Find the text key references within the repository.
1160
:return: a dictionary mapping (file_id, revision_id) tuples to altered file-ids to an iterable of
1161
revision_ids. Each altered file-ids has the exact revision_ids that
1162
altered it listed explicitly.
1163
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1164
to whether they were referred to by the inventory of the
1165
revision_id that they contain. The inventory texts from all present
1166
revision ids are assessed to generate this report.
1168
revision_ids = self.all_revision_ids()
1169
w = self.get_inventory_weave()
1170
pb = ui.ui_factory.nested_progress_bar()
1172
return self._find_text_key_references_from_xml_inventory_lines(
1173
w.iter_lines_added_or_present_in_versions(revision_ids, pb=pb))
1177
def _find_text_key_references_from_xml_inventory_lines(self,
1179
"""Core routine for extracting references to texts from inventories.
1062
1181
This performs the translation of xml lines to revision ids.
1064
1183
:param line_iterator: An iterator of lines, origin_version_id
1065
:param revision_ids: The revision ids to filter for. This should be a
1066
set or other type which supports efficient __contains__ lookups, as
1067
the revision id from each parsed line will be looked up in the
1068
revision_ids filter.
1069
:return: a dictionary mapping altered file-ids to an iterable of
1070
revision_ids. Each altered file-ids has the exact revision_ids that
1071
altered it listed explicitly.
1184
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1185
to whether they were referred to by the inventory of the
1186
revision_id that they contain. Note that if that revision_id was
1187
not part of the line_iterator's output then False will be given -
1188
even though it may actually refer to that key.
1190
if not self._serializer.support_altered_by_hack:
1191
raise AssertionError(
1192
"_find_text_key_references_from_xml_inventory_lines only "
1193
"supported for branches which store inventory as unnested xml"
1194
", not on %r" % self)
1075
1197
# this code needs to read every new line in every inventory for the
1115
1237
unescape_revid_cache[revision_id] = unescaped
1116
1238
revision_id = unescaped
1240
# Note that unconditionally unescaping means that we deserialise
1241
# every fileid, which for general 'pull' is not great, but we don't
1242
# really want to have some many fulltexts that this matters anyway.
1245
file_id = unescape_fileid_cache[file_id]
1247
unescaped = unescape(file_id)
1248
unescape_fileid_cache[file_id] = unescaped
1251
key = (file_id, revision_id)
1252
setdefault(key, False)
1253
if revision_id == version_id:
1257
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1259
"""Helper routine for fileids_altered_by_revision_ids.
1261
This performs the translation of xml lines to revision ids.
1263
:param line_iterator: An iterator of lines, origin_version_id
1264
:param revision_ids: The revision ids to filter for. This should be a
1265
set or other type which supports efficient __contains__ lookups, as
1266
the revision id from each parsed line will be looked up in the
1267
revision_ids filter.
1268
:return: a dictionary mapping altered file-ids to an iterable of
1269
revision_ids. Each altered file-ids has the exact revision_ids that
1270
altered it listed explicitly.
1273
setdefault = result.setdefault
1274
for file_id, revision_id in \
1275
self._find_text_key_references_from_xml_inventory_lines(
1276
line_iterator).iterkeys():
1118
1277
# once data is all ensured-consistent; then this is
1119
1278
# if revision_id == version_id
1120
1279
if revision_id in revision_ids:
1122
file_id = unescape_fileid_cache[file_id]
1124
unescaped = unescape(file_id)
1125
unescape_fileid_cache[file_id] = unescaped
1127
1280
setdefault(file_id, set()).add(revision_id)
1176
1326
raise errors.NoSuchIdInRepository(self, file_id)
1177
1327
yield callable_data, weave.get_lines(revision_id)
1329
def _generate_text_key_index(self, text_key_references=None,
1331
"""Generate a new text key index for the repository.
1333
This is an expensive function that will take considerable time to run.
1335
:return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1336
list of parents, also text keys. When a given key has no parents,
1337
the parents list will be [NULL_REVISION].
1339
# All revisions, to find inventory parents.
1340
if ancestors is None:
1341
revision_graph = self.get_revision_graph_with_ghosts()
1342
ancestors = revision_graph.get_ancestors()
1343
if text_key_references is None:
1344
text_key_references = self.find_text_key_references()
1345
pb = ui.ui_factory.nested_progress_bar()
1347
return self._do_generate_text_key_index(ancestors,
1348
text_key_references, pb)
1352
def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1353
"""Helper for _generate_text_key_index to avoid deep nesting."""
1354
revision_order = tsort.topo_sort(ancestors)
1355
invalid_keys = set()
1357
for revision_id in revision_order:
1358
revision_keys[revision_id] = set()
1359
text_count = len(text_key_references)
1360
# a cache of the text keys to allow reuse; costs a dict of all the
1361
# keys, but saves a 2-tuple for every child of a given key.
1363
for text_key, valid in text_key_references.iteritems():
1365
invalid_keys.add(text_key)
1367
revision_keys[text_key[1]].add(text_key)
1368
text_key_cache[text_key] = text_key
1369
del text_key_references
1371
text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1372
NULL_REVISION = _mod_revision.NULL_REVISION
1373
# Set a cache with a size of 10 - this suffices for bzr.dev but may be
1374
# too small for large or very branchy trees. However, for 55K path
1375
# trees, it would be easy to use too much memory trivially. Ideally we
1376
# could gauge this by looking at available real memory etc, but this is
1377
# always a tricky proposition.
1378
inventory_cache = lru_cache.LRUCache(10)
1379
batch_size = 10 # should be ~150MB on a 55K path tree
1380
batch_count = len(revision_order) / batch_size + 1
1382
pb.update("Calculating text parents.", processed_texts, text_count)
1383
for offset in xrange(batch_count):
1384
to_query = revision_order[offset * batch_size:(offset + 1) *
1388
for rev_tree in self.revision_trees(to_query):
1389
revision_id = rev_tree.get_revision_id()
1390
parent_ids = ancestors[revision_id]
1391
for text_key in revision_keys[revision_id]:
1392
pb.update("Calculating text parents.", processed_texts)
1393
processed_texts += 1
1394
candidate_parents = []
1395
for parent_id in parent_ids:
1396
parent_text_key = (text_key[0], parent_id)
1398
check_parent = parent_text_key not in \
1399
revision_keys[parent_id]
1401
# the parent parent_id is a ghost:
1402
check_parent = False
1403
# truncate the derived graph against this ghost.
1404
parent_text_key = None
1406
# look at the parent commit details inventories to
1407
# determine possible candidates in the per file graph.
1410
inv = inventory_cache[parent_id]
1412
inv = self.revision_tree(parent_id).inventory
1413
inventory_cache[parent_id] = inv
1414
parent_entry = inv._byid.get(text_key[0], None)
1415
if parent_entry is not None:
1417
text_key[0], parent_entry.revision)
1419
parent_text_key = None
1420
if parent_text_key is not None:
1421
candidate_parents.append(
1422
text_key_cache[parent_text_key])
1423
parent_heads = text_graph.heads(candidate_parents)
1424
new_parents = list(parent_heads)
1425
new_parents.sort(key=lambda x:candidate_parents.index(x))
1426
if new_parents == []:
1427
new_parents = [NULL_REVISION]
1428
text_index[text_key] = new_parents
1430
for text_key in invalid_keys:
1431
text_index[text_key] = [NULL_REVISION]
1179
1434
def item_keys_introduced_by(self, revision_ids, _files_pb=None):
1180
1435
"""Get an iterable listing the keys of all the data introduced by a set
1181
1436
of revision IDs.
1237
1492
@needs_read_lock
1238
1493
def get_inventory(self, revision_id):
1239
"""Get Inventory object by hash."""
1240
return self.deserialise_inventory(
1241
revision_id, self.get_inventory_xml(revision_id))
1494
"""Get Inventory object by revision id."""
1495
return self.iter_inventories([revision_id]).next()
1497
def iter_inventories(self, revision_ids):
1498
"""Get many inventories by revision_ids.
1500
This will buffer some or all of the texts used in constructing the
1501
inventories in memory, but will only parse a single inventory at a
1504
:return: An iterator of inventories.
1506
assert None not in revision_ids
1507
assert _mod_revision.NULL_REVISION not in revision_ids
1508
return self._iter_inventories(revision_ids)
1510
def _iter_inventories(self, revision_ids):
1511
"""single-document based inventory iteration."""
1512
texts = self.get_inventory_weave().get_texts(revision_ids)
1513
for text, revision_id in zip(texts, revision_ids):
1514
yield self.deserialise_inventory(revision_id, text)
1243
1516
def deserialise_inventory(self, revision_id, xml):
1244
1517
"""Transform the xml into an inventory object.
1412
1689
"""Return Tree for a revision on this branch.
1414
1691
`revision_id` may not be None or 'null:'"""
1415
assert None not in revision_ids
1416
assert _mod_revision.NULL_REVISION not in revision_ids
1417
texts = self.get_inventory_weave().get_texts(revision_ids)
1418
for text, revision_id in zip(texts, revision_ids):
1419
inv = self.deserialise_inventory(revision_id, text)
1420
yield RevisionTree(self, inv, revision_id)
1692
inventories = self.iter_inventories(revision_ids)
1693
for inv in inventories:
1694
yield RevisionTree(self, inv, inv.revision_id)
1422
1696
@needs_read_lock
1423
1697
def get_ancestry(self, revision_id, topo_sorted=True):
1472
1746
def revision_parents(self, revision_id):
1473
1747
return self.get_inventory_weave().parent_names(revision_id)
1749
@deprecated_method(symbol_versioning.one_one)
1475
1750
def get_parents(self, revision_ids):
1476
1751
"""See StackedParentsProvider.get_parents"""
1478
for revision_id in revision_ids:
1752
parent_map = self.get_parent_map(revision_ids)
1753
return [parent_map.get(r, None) for r in revision_ids]
1755
def get_parent_map(self, keys):
1756
"""See graph._StackedParentsProvider.get_parent_map"""
1758
for revision_id in keys:
1479
1759
if revision_id == _mod_revision.NULL_REVISION:
1760
parent_map[revision_id] = ()
1483
parents = self.get_revision(revision_id).parent_ids
1763
parent_id_list = self.get_revision(revision_id).parent_ids
1484
1764
except errors.NoSuchRevision:
1487
if len(parents) == 0:
1488
parents = [_mod_revision.NULL_REVISION]
1489
parents_list.append(parents)
1767
if len(parent_id_list) == 0:
1768
parent_ids = (_mod_revision.NULL_REVISION,)
1770
parent_ids = tuple(parent_id_list)
1771
parent_map[revision_id] = parent_ids
1492
1774
def _make_parents_provider(self):
1496
1778
"""Return the graph walker for this repository format"""
1497
1779
parents_provider = self._make_parents_provider()
1498
1780
if (other_repository is not None and
1499
other_repository.bzrdir.transport.base !=
1500
self.bzrdir.transport.base):
1781
not self.has_same_location(other_repository)):
1501
1782
parents_provider = graph._StackedParentsProvider(
1502
1783
[parents_provider, other_repository._make_parents_provider()])
1503
1784
return graph.Graph(parents_provider)
1505
def get_versioned_file_checker(self, revisions, revision_versions_cache):
1506
return VersionedFileChecker(revisions, revision_versions_cache, self)
1786
def _get_versioned_file_checker(self):
1787
"""Return an object suitable for checking versioned files."""
1788
return _VersionedFileChecker(self)
1790
def revision_ids_to_search_result(self, result_set):
1791
"""Convert a set of revision ids to a graph SearchResult."""
1792
result_parents = set()
1793
for parents in self.get_graph().get_parent_map(
1794
result_set).itervalues():
1795
result_parents.update(parents)
1796
included_keys = result_set.intersection(result_parents)
1797
start_keys = result_set.difference(included_keys)
1798
exclude_keys = result_parents.difference(result_set)
1799
result = graph.SearchResult(start_keys, exclude_keys,
1800
len(result_set), result_set)
1508
1803
@needs_write_lock
1509
1804
def set_make_working_trees(self, new_value):
1629
1925
def install_revision(repository, rev, revision_tree):
1630
1926
"""Install all revision data into a repository."""
1927
install_revisions(repository, [(rev, revision_tree, None)])
1930
def install_revisions(repository, iterable, num_revisions=None, pb=None):
1931
"""Install all revision data into a repository.
1933
Accepts an iterable of revision, tree, signature tuples. The signature
1631
1936
repository.start_write_group()
1633
_install_revision(repository, rev, revision_tree)
1938
for n, (revision, revision_tree, signature) in enumerate(iterable):
1939
_install_revision(repository, revision, revision_tree, signature)
1941
pb.update('Transferring revisions', n + 1, num_revisions)
1635
1943
repository.abort_write_group()
2030
2348
(copied, failures).
2032
2350
raise NotImplementedError(self.fetch)
2352
def _walk_to_common_revisions(self, revision_ids):
2353
"""Walk out from revision_ids in source to revisions target has.
2355
:param revision_ids: The start point for the search.
2356
:return: A set of revision ids.
2358
graph = self.source.get_graph()
2359
missing_revs = set()
2360
# ensure we don't pay silly lookup costs.
2361
revision_ids = frozenset(revision_ids)
2362
searcher = graph._make_breadth_first_searcher(revision_ids)
2363
null_set = frozenset([_mod_revision.NULL_REVISION])
2366
next_revs, ghosts = searcher.next_with_ghosts()
2367
except StopIteration:
2369
if revision_ids.intersection(ghosts):
2370
absent_ids = set(revision_ids.intersection(ghosts))
2371
# If all absent_ids are present in target, no error is needed.
2372
absent_ids.difference_update(
2373
self.target.has_revisions(absent_ids))
2375
raise errors.NoSuchRevision(self.source, absent_ids.pop())
2376
# we don't care about other ghosts as we can't fetch them and
2377
# haven't been asked to.
2378
next_revs = set(next_revs)
2379
# we always have NULL_REVISION present.
2380
have_revs = self.target.has_revisions(next_revs).union(null_set)
2381
missing_revs.update(next_revs - have_revs)
2382
searcher.stop_searching_any(have_revs)
2383
return searcher.get_result()
2385
@deprecated_method(symbol_versioning.one_two)
2034
2386
@needs_read_lock
2035
def missing_revision_ids(self, revision_id=None):
2387
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2036
2388
"""Return the revision ids that source has that target does not.
2038
2390
These are returned in topological order.
2040
2392
:param revision_id: only return revision ids included by this
2394
:param find_ghosts: If True find missing revisions in deep history
2395
rather than just finding the surface difference.
2397
return list(self.search_missing_revision_ids(
2398
revision_id, find_ghosts).get_keys())
2401
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2402
"""Return the revision ids that source has that target does not.
2404
:param revision_id: only return revision ids included by this
2406
:param find_ghosts: If True find missing revisions in deep history
2407
rather than just finding the surface difference.
2408
:return: A bzrlib.graph.SearchResult.
2410
# stop searching at found target revisions.
2411
if not find_ghosts and revision_id is not None:
2412
return self._walk_to_common_revisions([revision_id])
2043
2413
# generic, possibly worst case, slow code path.
2044
2414
target_ids = set(self.target.all_revision_ids())
2045
2415
if revision_id is not None:
2225
2592
# we do not have a revision as that would be pointless.
2226
2593
target_ids = set(self.target._all_possible_ids())
2227
2594
possibly_present_revisions = target_ids.intersection(source_ids_set)
2228
actually_present_revisions = set(self.target._eliminate_revisions_not_present(possibly_present_revisions))
2595
actually_present_revisions = set(
2596
self.target._eliminate_revisions_not_present(possibly_present_revisions))
2229
2597
required_revisions = source_ids_set.difference(actually_present_revisions)
2230
required_topo_revisions = [rev_id for rev_id in source_ids if rev_id in required_revisions]
2231
2598
if revision_id is not None:
2232
2599
# we used get_ancestry to determine source_ids then we are assured all
2233
2600
# revisions referenced are present as they are installed in topological order.
2234
2601
# and the tip revision was validated by get_ancestry.
2235
return required_topo_revisions
2602
result_set = required_revisions
2237
2604
# if we just grabbed the possibly available ids, then
2238
2605
# we only have an estimate of whats available and need to validate
2239
2606
# that against the revision records.
2240
return self.source._eliminate_revisions_not_present(required_topo_revisions)
2608
self.source._eliminate_revisions_not_present(required_revisions))
2609
return self.source.revision_ids_to_search_result(result_set)
2243
2612
class InterKnitRepo(InterSameDataRepository):
2292
2661
# we do not have a revision as that would be pointless.
2293
2662
target_ids = set(self.target.all_revision_ids())
2294
2663
possibly_present_revisions = target_ids.intersection(source_ids_set)
2295
actually_present_revisions = set(self.target._eliminate_revisions_not_present(possibly_present_revisions))
2664
actually_present_revisions = set(
2665
self.target._eliminate_revisions_not_present(possibly_present_revisions))
2296
2666
required_revisions = source_ids_set.difference(actually_present_revisions)
2297
required_topo_revisions = [rev_id for rev_id in source_ids if rev_id in required_revisions]
2298
2667
if revision_id is not None:
2299
2668
# we used get_ancestry to determine source_ids then we are assured all
2300
2669
# revisions referenced are present as they are installed in topological order.
2301
2670
# and the tip revision was validated by get_ancestry.
2302
return required_topo_revisions
2671
result_set = required_revisions
2304
2673
# if we just grabbed the possibly available ids, then
2305
2674
# we only have an estimate of whats available and need to validate
2306
2675
# that against the revision records.
2307
return self.source._eliminate_revisions_not_present(required_topo_revisions)
2677
self.source._eliminate_revisions_not_present(required_revisions))
2678
return self.source.revision_ids_to_search_result(result_set)
2310
2681
class InterPackRepo(InterSameDataRepository):
2370
2741
# a pack creation, but for now it is simpler to think about as
2371
2742
# 'upload data, then repack if needed'.
2372
2743
self.target._pack_collection.autopack()
2373
return pack.get_revision_count()
2744
return (pack.get_revision_count(), [])
2377
2748
@needs_read_lock
2378
def missing_revision_ids(self, revision_id=None, find_ghosts=True):
2749
def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2379
2750
"""See InterRepository.missing_revision_ids().
2381
:param find_ghosts: Find ghosts throughough the ancestry of
2752
:param find_ghosts: Find ghosts throughout the ancestry of
2384
2755
if not find_ghosts and revision_id is not None:
2385
graph = self.source.get_graph()
2386
missing_revs = set()
2387
searcher = graph._make_breadth_first_searcher([revision_id])
2389
self.target._pack_collection.revision_index.combined_index
2390
null_set = frozenset([_mod_revision.NULL_REVISION])
2393
next_revs = set(searcher.next())
2394
except StopIteration:
2396
next_revs.difference_update(null_set)
2397
target_keys = [(key,) for key in next_revs]
2398
have_revs = frozenset(node[1][0] for node in
2399
target_index.iter_entries(target_keys))
2400
missing_revs.update(next_revs - have_revs)
2401
searcher.stop_searching_any(have_revs)
2756
return self._walk_to_common_revisions([revision_id])
2403
2757
elif revision_id is not None:
2404
2758
source_ids = self.source.get_ancestry(revision_id)
2405
2759
assert source_ids[0] is None
2491
2846
f = Knit1to2Fetcher(to_repository=self.target,
2492
2847
from_repository=self.source,
2493
2848
last_revision=revision_id,
2849
pb=pb, find_ghosts=find_ghosts)
2495
2850
return f.count_copied, f.failed_revisions
2853
class InterDifferingSerializer(InterKnitRepo):
2856
def _get_repo_format_to_test(self):
2860
def is_compatible(source, target):
2861
"""Be compatible with Knit2 source and Knit3 target"""
2862
if source.supports_rich_root() != target.supports_rich_root():
2864
# Ideally, we'd support fetching if the source had no tree references
2865
# even if it supported them...
2866
if (getattr(source, '_format.supports_tree_reference', False) and
2867
not getattr(target, '_format.supports_tree_reference', False)):
2872
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2873
"""See InterRepository.fetch()."""
2874
revision_ids = self.target.search_missing_revision_ids(self.source,
2875
revision_id, find_ghosts=find_ghosts).get_keys()
2876
revision_ids = tsort.topo_sort(
2877
self.source.get_graph().get_parent_map(revision_ids))
2878
def revisions_iterator():
2879
for current_revision_id in revision_ids:
2880
revision = self.source.get_revision(current_revision_id)
2881
tree = self.source.revision_tree(current_revision_id)
2883
signature = self.source.get_signature_text(
2884
current_revision_id)
2885
except errors.NoSuchRevision:
2887
yield revision, tree, signature
2889
my_pb = ui.ui_factory.nested_progress_bar()
2894
install_revisions(self.target, revisions_iterator(),
2895
len(revision_ids), pb)
2897
if my_pb is not None:
2899
return len(revision_ids), 0
2498
2902
class InterRemoteToOther(InterRepository):
2500
2904
def __init__(self, source, target):
2505
2909
def is_compatible(source, target):
2506
2910
if not isinstance(source, remote.RemoteRepository):
2912
# Is source's model compatible with target's model?
2508
2913
source._ensure_real()
2509
2914
real_source = source._real_repository
2510
# Is source's model compatible with target's model, and are they the
2511
# same format? Currently we can only optimise fetching from an
2512
# identical model & format repo.
2513
2915
assert not isinstance(real_source, remote.RemoteRepository), (
2514
2916
"We don't support remote repos backed by remote repos yet.")
2515
return real_source._format == target._format
2917
return InterRepository._same_model(real_source, target)
2517
2919
@needs_write_lock
2518
2920
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2656
3060
return _unescape_re.sub(_unescaper, data)
2659
class _RevisionTextVersionCache(object):
2660
"""A cache of the versionedfile versions for revision and file-id."""
3063
class _VersionedFileChecker(object):
2662
3065
def __init__(self, repository):
2663
3066
self.repository = repository
2664
self.revision_versions = {}
2665
self.revision_parents = {}
2666
self.repo_graph = self.repository.get_graph()
2667
# XXX: RBC: I haven't tracked down what uses this, but it would be
2668
# better to use the headscache directly I think.
2669
self.heads = graph.HeadsCache(self.repo_graph).heads
2671
def add_revision_text_versions(self, tree):
2672
"""Cache text version data from the supplied revision tree"""
2674
for path, entry in tree.iter_entries_by_dir():
2675
inv_revisions[entry.file_id] = entry.revision
2676
self.revision_versions[tree.get_revision_id()] = inv_revisions
2677
return inv_revisions
2679
def get_text_version(self, file_id, revision_id):
2680
"""Determine the text version for a given file-id and revision-id"""
2682
inv_revisions = self.revision_versions[revision_id]
2685
tree = self.repository.revision_tree(revision_id)
2686
except errors.RevisionNotPresent:
2687
self.revision_versions[revision_id] = inv_revisions = {}
2689
inv_revisions = self.add_revision_text_versions(tree)
2690
return inv_revisions.get(file_id)
2692
def prepopulate_revs(self, revision_ids):
2693
# Filter out versions that we don't have an inventory for, so that the
2694
# revision_trees() call won't fail.
2695
inv_weave = self.repository.get_inventory_weave()
2696
revs = [r for r in revision_ids if inv_weave.has_version(r)]
2697
# XXX: this loop is very similar to
2698
# bzrlib.fetch.Inter1and2Helper.iter_rev_trees.
2700
mutter('%d revisions left to prepopulate', len(revs))
2701
for tree in self.repository.revision_trees(revs[:100]):
2702
if tree.inventory.revision_id is None:
2703
tree.inventory.revision_id = tree.get_revision_id()
2704
self.add_revision_text_versions(tree)
2707
def get_parents(self, revision_id):
2709
return self.revision_parents[revision_id]
2711
parents = self.repository.get_parents([revision_id])[0]
2712
self.revision_parents[revision_id] = parents
2715
def used_file_versions(self):
2716
"""Return a set of (revision_id, file_id) pairs for each file version
2717
referenced by any inventory cached by this _RevisionTextVersionCache.
2719
If the entire repository has been cached, this can be used to find all
2720
file versions that are actually referenced by inventories. Thus any
2721
other file version is completely unused and can be removed safely.
2724
for inventory_summary in self.revision_versions.itervalues():
2725
result.update(inventory_summary.items())
2729
class VersionedFileChecker(object):
2731
def __init__(self, planned_revisions, revision_versions, repository):
2732
self.planned_revisions = planned_revisions
2733
self.revision_versions = revision_versions
2734
self.repository = repository
3067
self.text_index = self.repository._generate_text_key_index()
2736
3069
def calculate_file_version_parents(self, revision_id, file_id):
2737
3070
"""Calculate the correct parents for a file version according to
2738
3071
the inventories.
2740
text_revision = self.revision_versions.get_text_version(
2741
file_id, revision_id)
2742
if text_revision is None:
2744
parents_of_text_revision = self.revision_versions.get_parents(
2746
parents_from_inventories = []
2747
for parent in parents_of_text_revision:
2748
if parent == _mod_revision.NULL_REVISION:
2750
introduced_in = self.revision_versions.get_text_version(file_id,
2752
if introduced_in is not None:
2753
parents_from_inventories.append(introduced_in)
2754
heads = set(self.revision_versions.heads(parents_from_inventories))
2756
for parent in parents_from_inventories:
2757
if parent in heads and parent not in new_parents:
2758
new_parents.append(parent)
2759
return tuple(new_parents)
3073
parent_keys = self.text_index[(file_id, revision_id)]
3074
if parent_keys == [_mod_revision.NULL_REVISION]:
3076
# strip the file_id, for the weave api
3077
return tuple([revision_id for file_id, revision_id in parent_keys])
2761
3079
def check_file_version_parents(self, weave, file_id):
2762
3080
"""Check the parents stored in a versioned file are correct.
2772
3090
file, but not used by the corresponding inventory.
2774
3092
wrong_parents = {}
2775
dangling_file_versions = set()
2776
for num, revision_id in enumerate(self.planned_revisions):
2777
correct_parents = self.calculate_file_version_parents(
2778
revision_id, file_id)
2779
if correct_parents is None:
2781
text_revision = self.revision_versions.get_text_version(
2782
file_id, revision_id)
3093
unused_versions = set()
3094
for num, revision_id in enumerate(weave.versions()):
2784
knit_parents = tuple(weave.get_parents(revision_id))
2785
except errors.RevisionNotPresent:
2787
if text_revision != revision_id:
2788
# This file version is not referenced by its corresponding
2790
dangling_file_versions.add((file_id, revision_id))
2791
if correct_parents != knit_parents:
2792
wrong_parents[revision_id] = (knit_parents, correct_parents)
2793
return wrong_parents, dangling_file_versions
3096
correct_parents = self.calculate_file_version_parents(
3097
revision_id, file_id)
3099
# The version is not part of the used keys.
3100
unused_versions.add(revision_id)
3103
knit_parents = tuple(weave.get_parents(revision_id))
3104
except errors.RevisionNotPresent:
3106
if correct_parents != knit_parents:
3107
wrong_parents[revision_id] = (knit_parents, correct_parents)
3108
return wrong_parents, unused_versions