1044
1050
self.get_transaction())
1046
def fileids_altered_by_revision_ids(self, revision_ids):
1047
"""Find the file ids and versions affected by revisions.
1049
:param revisions: an iterable containing revision ids.
1052
def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1054
"""Helper routine for fileids_altered_by_revision_ids.
1056
This performs the translation of xml lines to revision ids.
1058
:param line_iterator: An iterator of lines
1059
:param revision_ids: The revision ids to filter for.
1050
1060
:return: a dictionary mapping altered file-ids to an iterable of
1051
1061
revision_ids. Each altered file-ids has the exact revision_ids that
1052
1062
altered it listed explicitly.
1054
assert self._serializer.support_altered_by_hack, \
1055
("fileids_altered_by_revision_ids only supported for branches "
1056
"which store inventory as unnested xml, not on %r" % self)
1057
selected_revision_ids = set(revision_ids)
1058
w = self.get_inventory_weave()
1061
1066
# this code needs to read every new line in every inventory for the
1077
1082
search = self._file_ids_altered_regex.search
1078
1083
unescape = _unescape_xml
1079
1084
setdefault = result.setdefault
1085
for line in line_iterator:
1086
match = search(line)
1089
# One call to match.group() returning multiple items is quite a
1090
# bit faster than 2 calls to match.group() each returning 1
1091
file_id, revision_id = match.group('file_id', 'revision_id')
1093
# Inlining the cache lookups helps a lot when you make 170,000
1094
# lines and 350k ids, versus 8.4 unique ids.
1095
# Using a cache helps in 2 ways:
1096
# 1) Avoids unnecessary decoding calls
1097
# 2) Re-uses cached strings, which helps in future set and
1099
# (2) is enough that removing encoding entirely along with
1100
# the cache (so we are using plain strings) results in no
1101
# performance improvement.
1103
revision_id = unescape_revid_cache[revision_id]
1105
unescaped = unescape(revision_id)
1106
unescape_revid_cache[revision_id] = unescaped
1107
revision_id = unescaped
1109
if revision_id in revision_ids:
1111
file_id = unescape_fileid_cache[file_id]
1113
unescaped = unescape(file_id)
1114
unescape_fileid_cache[file_id] = unescaped
1116
setdefault(file_id, set()).add(revision_id)
1119
def fileids_altered_by_revision_ids(self, revision_ids):
1120
"""Find the file ids and versions affected by revisions.
1122
:param revisions: an iterable containing revision ids.
1123
:return: a dictionary mapping altered file-ids to an iterable of
1124
revision_ids. Each altered file-ids has the exact revision_ids that
1125
altered it listed explicitly.
1127
assert self._serializer.support_altered_by_hack, \
1128
("fileids_altered_by_revision_ids only supported for branches "
1129
"which store inventory as unnested xml, not on %r" % self)
1130
selected_revision_ids = set(revision_ids)
1131
w = self.get_inventory_weave()
1080
1132
pb = ui.ui_factory.nested_progress_bar()
1082
for line in w.iter_lines_added_or_present_in_versions(
1083
selected_revision_ids, pb=pb):
1084
match = search(line)
1087
# One call to match.group() returning multiple items is quite a
1088
# bit faster than 2 calls to match.group() each returning 1
1089
file_id, revision_id = match.group('file_id', 'revision_id')
1091
# Inlining the cache lookups helps a lot when you make 170,000
1092
# lines and 350k ids, versus 8.4 unique ids.
1093
# Using a cache helps in 2 ways:
1094
# 1) Avoids unnecessary decoding calls
1095
# 2) Re-uses cached strings, which helps in future set and
1097
# (2) is enough that removing encoding entirely along with
1098
# the cache (so we are using plain strings) results in no
1099
# performance improvement.
1101
revision_id = unescape_revid_cache[revision_id]
1103
unescaped = unescape(revision_id)
1104
unescape_revid_cache[revision_id] = unescaped
1105
revision_id = unescaped
1107
if revision_id in selected_revision_ids:
1109
file_id = unescape_fileid_cache[file_id]
1111
unescaped = unescape(file_id)
1112
unescape_fileid_cache[file_id] = unescaped
1114
setdefault(file_id, set()).add(revision_id)
1134
return self._find_file_ids_from_xml_inventory_lines(
1135
w.iter_lines_added_or_present_in_versions(
1136
selected_revision_ids, pb=pb),
1137
selected_revision_ids)
1119
1141
def iter_files_bytes(self, desired_files):
1120
1142
"""Iterate through file versions.
1596
1618
def install_revision(repository, rev, revision_tree):
1597
1619
"""Install all revision data into a repository."""
1620
repository.start_write_group()
1622
_install_revision(repository, rev, revision_tree)
1624
repository.abort_write_group()
1627
repository.commit_write_group()
1630
def _install_revision(repository, rev, revision_tree):
1631
"""Install all revision data into a repository."""
1598
1632
present_parents = []
1599
1633
parent_trees = {}
1600
1634
for p_id in rev.parent_ids:
1934
1968
'RepositoryFormatKnit3',
1971
# Experimental formats. These make no guarantee about data stability.
1972
# There is one format for pre-subtrees, and one for post-subtrees to
1973
# allow ease of testing.
1974
format_registry.register_lazy(
1975
'Bazaar Experimental no-subtrees\n',
1976
'bzrlib.repofmt.pack_repo',
1977
'RepositoryFormatKnitPack1',
1979
format_registry.register_lazy(
1980
'Bazaar Experimental subtrees\n',
1981
'bzrlib.repofmt.pack_repo',
1982
'RepositoryFormatKnitPack3',
1938
1986
class InterRepository(InterObject):
1939
1987
"""This class represents operations taking place between two repositories.
2244
2292
return self.source._eliminate_revisions_not_present(required_topo_revisions)
2295
class InterPackRepo(InterSameDataRepository):
2296
"""Optimised code paths between Pack based repositories."""
2299
def _get_repo_format_to_test(self):
2300
from bzrlib.repofmt import pack_repo
2301
return pack_repo.RepositoryFormatKnitPack1()
2304
def is_compatible(source, target):
2305
"""Be compatible with known Pack formats.
2307
We don't test for the stores being of specific types because that
2308
could lead to confusing results, and there is no need to be
2311
from bzrlib.repofmt.pack_repo import RepositoryFormatPack
2313
are_packs = (isinstance(source._format, RepositoryFormatPack) and
2314
isinstance(target._format, RepositoryFormatPack))
2315
except AttributeError:
2317
return are_packs and InterRepository._same_model(source, target)
2320
def fetch(self, revision_id=None, pb=None):
2321
"""See InterRepository.fetch()."""
2322
mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2323
self.source, self.source._format, self.target, self.target._format)
2324
self.count_copied = 0
2325
if revision_id is None:
2327
# everything to do - use pack logic
2328
# to fetch from all packs to one without
2329
# inventory parsing etc, IFF nothing to be copied is in the target.
2331
revision_ids = self.source.all_revision_ids()
2332
# implementing the TODO will involve:
2333
# - detecting when all of a pack is selected
2334
# - avoiding as much as possible pre-selection, so the
2335
# more-core routines such as create_pack_from_packs can filter in
2336
# a just-in-time fashion. (though having a HEADS list on a
2337
# repository might make this a lot easier, because we could
2338
# sensibly detect 'new revisions' without doing a full index scan.
2339
elif _mod_revision.is_null(revision_id):
2344
revision_ids = self.missing_revision_ids(revision_id)
2345
except errors.NoSuchRevision:
2346
raise errors.InstallFailed([revision_id])
2347
packs = self.source._pack_collection.all_packs()
2348
pack = self.target._pack_collection.create_pack_from_packs(
2349
packs, '.fetch', revision_ids,
2351
if pack is not None:
2352
self.target._pack_collection._save_pack_names()
2353
# Trigger an autopack. This may duplicate effort as we've just done
2354
# a pack creation, but for now it is simpler to think about as
2355
# 'upload data, then repack if needed'.
2356
self.target._pack_collection.autopack()
2357
return pack.get_revision_count()
2362
def missing_revision_ids(self, revision_id=None):
2363
"""See InterRepository.missing_revision_ids()."""
2364
if revision_id is not None:
2365
source_ids = self.source.get_ancestry(revision_id)
2366
assert source_ids[0] is None
2369
source_ids = self.source.all_revision_ids()
2370
# source_ids is the worst possible case we may need to pull.
2371
# now we want to filter source_ids against what we actually
2372
# have in target, but don't try to check for existence where we know
2373
# we do not have a revision as that would be pointless.
2374
target_ids = set(self.target.all_revision_ids())
2375
return [r for r in source_ids if (r not in target_ids)]
2247
2378
class InterModel1and2(InterRepository):
2299
2430
"""Be compatible with Knit1 source and Knit3 target"""
2300
2431
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit3
2302
from bzrlib.repofmt.knitrepo import RepositoryFormatKnit1, \
2303
RepositoryFormatKnit3
2304
return (isinstance(source._format, (RepositoryFormatKnit1)) and
2305
isinstance(target._format, (RepositoryFormatKnit3)))
2433
from bzrlib.repofmt.knitrepo import (RepositoryFormatKnit1,
2434
RepositoryFormatKnit3)
2435
from bzrlib.repofmt.pack_repo import (RepositoryFormatKnitPack1,
2436
RepositoryFormatKnitPack3)
2437
return (isinstance(source._format,
2438
(RepositoryFormatKnit1, RepositoryFormatKnitPack1)) and
2439
isinstance(target._format,
2440
(RepositoryFormatKnit3, RepositoryFormatKnitPack3))
2306
2442
except AttributeError: