/brz/remove-bazaar : revision 2592.6.20

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/repository.py

Committer: Robert Collins
Date: 2007-11-27 20:35:02 UTC
mfrom: (3036 +trunk)
mto: This revision was merged to the branch mainline in revision 3038.
Revision ID: robertc@robertcollins.net-20071127203502-280mxe1zvdlagrpe

Merge with bzr.dev.

files added:
bzrlib/switch.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/repository_implementations/test__generate_text_key_index.py

bzrlib/tests/repository_implementations/test_find_text_key_references.py

bzrlib/tests/test_switch.py

files renamed:
doc/developers/knitpack.txt => doc/developers/packrepo.txt

files modified:
NEWS

bzrlib/annotate.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lru_cache.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/sign_my_commits.py

bzrlib/smart/branch.py

bzrlib/smart/medium.py

bzrlib/smart/repository.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/ftp_server.py

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_check.py

bzrlib/tests/repository_implementations/test_check_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/remote.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/en/user-reference/hooks.txt

Show diffs side-by-side

added added

removed removed

bzrlib/repository.py

lazy_regex,

lockable_files,

lockdir,

lru_cache,

osutils,

registry,

remote,

revision as _mod_revision,

symbol_versioning,

transactions,

tsort,

ui,

)

from bzrlib.bundle import serializer

792

794

(format, data_list, StringIO(knit_bytes).read))

793

795

794

796

@needs_read_lock

795

def missing_revision_ids(self, other, revision_id=None):

797

def missing_revision_ids(self, other, revision_id=None, find_ghosts=True):

796

798

"""Return the revision ids that other has that this does not.

797

799

798

800

These are returned in topological order.

799

801

800

802

revision_id: only return revision ids included by revision_id.

801

803

"""

802

return InterRepository.get(other, self).missing_revision_ids(revision_id)

804

return InterRepository.get(other, self).missing_revision_ids(

805

revision_id, find_ghosts)

803

806

804

807

@staticmethod

805

808

def open(base):

1051

1054

@needs_write_lock

1052

1055

def store_revision_signature(self, gpg_strategy, plaintext, revision_id):

1053

1056

signature = gpg_strategy.sign(plaintext)

1057

self.add_signature_text(revision_id, signature)

1058

1059

@needs_write_lock

1060

def add_signature_text(self, revision_id, signature):

1054

1061

self._revision_store.add_revision_signature_text(revision_id,

1055

1062

signature,

1056

1063

self.get_transaction())

1057

1064

1058

def _find_file_ids_from_xml_inventory_lines(self, line_iterator,

1059

revision_ids):

1060

"""Helper routine for fileids_altered_by_revision_ids.

1065

def find_text_key_references(self):

1066

"""Find the text key references within the repository.

1067

1068

:return: a dictionary mapping (file_id, revision_id) tuples to altered file-ids to an iterable of

1069

revision_ids. Each altered file-ids has the exact revision_ids that

1070

altered it listed explicitly.

1071

:return: A dictionary mapping text keys ((fileid, revision_id) tuples)

1072

to whether they were referred to by the inventory of the

1073

revision_id that they contain. The inventory texts from all present

1074

revision ids are assessed to generate this report.

1075

"""

1076

revision_ids = self.all_revision_ids()

1077

w = self.get_inventory_weave()

1078

pb = ui.ui_factory.nested_progress_bar()

1079

try:

1080

return self._find_text_key_references_from_xml_inventory_lines(

1081

w.iter_lines_added_or_present_in_versions(revision_ids, pb=pb))

1082

finally:

1083

pb.finished()

1084

1085

def _find_text_key_references_from_xml_inventory_lines(self,

1086

line_iterator):

1087

"""Core routine for extracting references to texts from inventories.

1061

1088

1062

1089

This performs the translation of xml lines to revision ids.

1063

1090

1064

1091

:param line_iterator: An iterator of lines, origin_version_id

1065

:param revision_ids: The revision ids to filter for. This should be a

1066

set or other type which supports efficient __contains__ lookups, as

1067

the revision id from each parsed line will be looked up in the

1068

revision_ids filter.

1069

:return: a dictionary mapping altered file-ids to an iterable of

1070

revision_ids. Each altered file-ids has the exact revision_ids that

1071

altered it listed explicitly.

1092

:return: A dictionary mapping text keys ((fileid, revision_id) tuples)

1093

to whether they were referred to by the inventory of the

1094

revision_id that they contain. Note that if that revision_id was

1095

not part of the line_iterator's output then False will be given -

1096

even though it may actually refer to that key.

1072

1097

"""

1098

if not self._serializer.support_altered_by_hack:

1099

raise AssertionError(

1100

"_find_text_key_references_from_xml_inventory_lines only "

1101

"supported for branches which store inventory as unnested xml"

1102

", not on %r" % self)

1073

1103

result = {}

1074

1104

1075

1105

# this code needs to read every new line in every inventory for the

1115

1145

unescape_revid_cache[revision_id] = unescaped

1116

1146

revision_id = unescaped

1117

1147

1148

# Note that unconditionally unescaping means that we deserialise

1149

# every fileid, which for general 'pull' is not great, but we don't

1150

# really want to have some many fulltexts that this matters anyway.

1151

# RBC 20071114.

1152

try:

1153

file_id = unescape_fileid_cache[file_id]

1154

except KeyError:

1155

unescaped = unescape(file_id)

1156

unescape_fileid_cache[file_id] = unescaped

1157

file_id = unescaped

1158

1159

key = (file_id, revision_id)

1160

setdefault(key, False)

1161

if revision_id == version_id:

1162

result[key] = True

1163

return result

1164

1165

def _find_file_ids_from_xml_inventory_lines(self, line_iterator,

1166

revision_ids):

1167

"""Helper routine for fileids_altered_by_revision_ids.

1168

1169

This performs the translation of xml lines to revision ids.

1170

1171

:param line_iterator: An iterator of lines, origin_version_id

1172

:param revision_ids: The revision ids to filter for. This should be a

1173

set or other type which supports efficient __contains__ lookups, as

1174

the revision id from each parsed line will be looked up in the

1175

revision_ids filter.

1176

:return: a dictionary mapping altered file-ids to an iterable of

1177

revision_ids. Each altered file-ids has the exact revision_ids that

1178

altered it listed explicitly.

1179

"""

1180

result = {}

1181

setdefault = result.setdefault

1182

for file_id, revision_id in \

1183

self._find_text_key_references_from_xml_inventory_lines(

1184

line_iterator).iterkeys():

1118

1185

# once data is all ensured-consistent; then this is

1119

1186

# if revision_id == version_id

1120

1187

if revision_id in revision_ids:

1121

try:

1122

file_id = unescape_fileid_cache[file_id]

1123

except KeyError:

1124

unescaped = unescape(file_id)

1125

unescape_fileid_cache[file_id] = unescaped

1126

file_id = unescaped

1127

1188

setdefault(file_id, set()).add(revision_id)

1128

1189

return result

1129

1190

1135

1196

revision_ids. Each altered file-ids has the exact revision_ids that

1136

1197

altered it listed explicitly.

1137

1198

"""

1138

assert self._serializer.support_altered_by_hack, \

1139

("fileids_altered_by_revision_ids only supported for branches "

1140

"which store inventory as unnested xml, not on %r" % self)

1141

1199

selected_revision_ids = set(revision_ids)

1142

1200

w = self.get_inventory_weave()

1143

1201

pb = ui.ui_factory.nested_progress_bar()

1176

1234

raise errors.NoSuchIdInRepository(self, file_id)

1177

1235

yield callable_data, weave.get_lines(revision_id)

1178

1236

1237

def _generate_text_key_index(self):

1238

"""Generate a new text key index for the repository.

1239

1240

This is an expensive function that will take considerable time to run.

1241

1242

:return: A dict mapping text keys ((file_id, revision_id) tuples) to a

1243

list of parents, also text keys. When a given key has no parents,

1244

the parents list will be [NULL_REVISION].

1245

"""

1246

# All revisions, to find inventory parents.

1247

revision_graph = self.get_revision_graph_with_ghosts()

1248

ancestors = revision_graph.get_ancestors()

1249

text_key_references = self.find_text_key_references()

1250

pb = ui.ui_factory.nested_progress_bar()

1251

try:

1252

return self._do_generate_text_key_index(ancestors,

1253

text_key_references, pb)

1254

finally:

1255

pb.finished()

1256

1257

def _do_generate_text_key_index(self, ancestors, text_key_references, pb):

1258

"""Helper for _generate_text_key_index to avoid deep nesting."""

1259

revision_order = tsort.topo_sort(ancestors)

1260

invalid_keys = set()

1261

revision_keys = {}

1262

for revision_id in revision_order:

1263

revision_keys[revision_id] = set()

1264

text_count = len(text_key_references)

1265

# a cache of the text keys to allow reuse; costs a dict of all the

1266

# keys, but saves a 2-tuple for every child of a given key.

1267

text_key_cache = {}

1268

for text_key, valid in text_key_references.iteritems():

1269

if not valid:

1270

invalid_keys.add(text_key)

1271

else:

1272

revision_keys[text_key[1]].add(text_key)

1273

text_key_cache[text_key] = text_key

1274

del text_key_references

1275

text_index = {}

1276

text_graph = graph.Graph(graph.DictParentsProvider(text_index))

1277

NULL_REVISION = _mod_revision.NULL_REVISION

1278

# Set a cache with a size of 10 - this suffices for bzr.dev but may be

1279

# too small for large or very branchy trees. However, for 55K path

1280

# trees, it would be easy to use too much memory trivially. Ideally we

1281

# could gauge this by looking at available real memory etc, but this is

1282

# always a tricky proposition.

1283

inventory_cache = lru_cache.LRUCache(10)

1284

batch_size = 10 # should be ~150MB on a 55K path tree

1285

batch_count = len(revision_order) / batch_size + 1

1286

processed_texts = 0

1287

pb.update("Calculating text parents.", processed_texts, text_count)

1288

for offset in xrange(batch_count):

1289

to_query = revision_order[offset * batch_size:(offset + 1) *

1290

batch_size]

1291

if not to_query:

1292

break

1293

for rev_tree in self.revision_trees(to_query):

1294

revision_id = rev_tree.get_revision_id()

1295

parent_ids = ancestors[revision_id]

1296

for text_key in revision_keys[revision_id]:

1297

pb.update("Calculating text parents.", processed_texts)

1298

processed_texts += 1

1299

candidate_parents = []

1300

for parent_id in parent_ids:

1301

parent_text_key = (text_key[0], parent_id)

1302

try:

1303

check_parent = parent_text_key not in \

1304

revision_keys[parent_id]

1305

except KeyError:

1306

# the parent parent_id is a ghost:

1307

check_parent = False

1308

# truncate the derived graph against this ghost.

1309

parent_text_key = None

1310

if check_parent:

1311

# look at the parent commit details inventories to

1312

# determine possible candidates in the per file graph.

1313

# TODO: cache here.

1314

try:

1315

inv = inventory_cache[parent_id]

1316

except KeyError:

1317

inv = self.revision_tree(parent_id).inventory

1318

inventory_cache[parent_id] = inv

1319

parent_entry = inv._byid.get(text_key[0], None)

1320

if parent_entry is not None:

1321

parent_text_key = (

1322

text_key[0], parent_entry.revision)

1323

else:

1324

parent_text_key = None

1325

if parent_text_key is not None:

1326

candidate_parents.append(

1327

text_key_cache[parent_text_key])

1328

parent_heads = text_graph.heads(candidate_parents)

1329

new_parents = list(parent_heads)

1330

new_parents.sort(key=lambda x:candidate_parents.index(x))

1331

if new_parents == []:

1332

new_parents = [NULL_REVISION]

1333

text_index[text_key] = new_parents

1334

1335

for text_key in invalid_keys:

1336

text_index[text_key] = [NULL_REVISION]

1337

return text_index

1338

1179

1339

def item_keys_introduced_by(self, revision_ids, _files_pb=None):

1180

1340

"""Get an iterable listing the keys of all the data introduced by a set

1181

1341

of revision IDs.

1502

1662

[parents_provider, other_repository._make_parents_provider()])

1503

1663

return graph.Graph(parents_provider)

1504

1664

1505

def get_versioned_file_checker(self, revisions, revision_versions_cache):

1506

return VersionedFileChecker(revisions, revision_versions_cache, self)

1665

def get_versioned_file_checker(self):

1666

"""Return an object suitable for checking versioned files."""

1667

return VersionedFileChecker(self)

1507

1668

1508

1669

@needs_write_lock

1509

1670

def set_make_working_trees(self, new_value):

1628

1789

1629

1790

def install_revision(repository, rev, revision_tree):

1630

1791

"""Install all revision data into a repository."""

1792

install_revisions(repository, [(rev, revision_tree, None)])

1793

1794

1795

def install_revisions(repository, iterable):

1796

"""Install all revision data into a repository.

1797

1798

Accepts an iterable of revision, tree, signature tuples. The signature

1799

may be None.

1800

"""

1631

1801

repository.start_write_group()

1632

1802

try:

1633

_install_revision(repository, rev, revision_tree)

1803

for revision, revision_tree, signature in iterable:

1804

_install_revision(repository, revision, revision_tree, signature)

1634

1805

except:

1635

1806

repository.abort_write_group()

1636

1807

raise

1638

1809

repository.commit_write_group()

1639

1810

1640

1811

1641

def _install_revision(repository, rev, revision_tree):

1812

def _install_revision(repository, rev, revision_tree, signature):

1642

1813

"""Install all revision data into a repository."""

1643

1814

present_parents = []

1644

1815

parent_trees = {}

1683

1854

repository.add_inventory(rev.revision_id, inv, present_parents)

1684

1855

except errors.RevisionAlreadyPresent:

1685

1856

pass

1857

if signature is not None:

1858

repository.add_signature_text(rev.revision_id, signature)

1686

1859

repository.add_revision(rev.revision_id, rev, inv)

1687

1860

1688

1861

1983

2156

'RepositoryFormatKnit3',

1984

2157

)

1985

2158

2159

format_registry.register_lazy(

2160

'Bazaar Knit Repository Format 4 (bzr 1.0)\n',

2161

'bzrlib.repofmt.knitrepo',

2162

'RepositoryFormatKnit4',

2163

)

2164

1986

2165

# Pack-based formats. There is one format for pre-subtrees, and one for

1987

2166

# post-subtrees to allow ease of testing.

1988

2167

# NOTE: These are experimental in 0.92.

1996

2175

'bzrlib.repofmt.pack_repo',

1997

2176

'RepositoryFormatKnitPack3',

1998

2177

)

2178

format_registry.register_lazy(

2179

'Bazaar pack repository format 1 with rich root (needs bzr 1.0)\n',

2180

'bzrlib.repofmt.pack_repo',

2181

'RepositoryFormatKnitPack4',

2182

)

1999

2183

2000

2184

2001

2185

class InterRepository(InterObject):

2032

2216

raise NotImplementedError(self.fetch)

2033

2217

2034

2218

@needs_read_lock

2035

def missing_revision_ids(self, revision_id=None):

2219

def missing_revision_ids(self, revision_id=None, find_ghosts=True):

2036

2220

"""Return the revision ids that source has that target does not.

2037

2221

2038

2222

These are returned in topological order.

2199

2383

return f.count_copied, f.failed_revisions

2200

2384

2201

2385

@needs_read_lock

2202

def missing_revision_ids(self, revision_id=None):

2386

def missing_revision_ids(self, revision_id=None, find_ghosts=True):

2203

2387

"""See InterRepository.missing_revision_ids()."""

2204

2388

# we want all revisions to satisfy revision_id in source.

2205

2389

# but we don't want to stat every file here and there.

2277

2461

return f.count_copied, f.failed_revisions

2278

2462

2279

2463

@needs_read_lock

2280

def missing_revision_ids(self, revision_id=None):

2464

def missing_revision_ids(self, revision_id=None, find_ghosts=True):

2281

2465

"""See InterRepository.missing_revision_ids()."""

2282

2466

if revision_id is not None:

2283

2467

source_ids = self.source.get_ancestry(revision_id)

2354

2538

# sensibly detect 'new revisions' without doing a full index scan.

2355

2539

elif _mod_revision.is_null(revision_id):

2356

2540

# nothing to do:

2357

return

2541

return (0, [])

2358

2542

else:

2359

2543

try:

2360

2544

revision_ids = self.missing_revision_ids(revision_id,

2370

2554

# a pack creation, but for now it is simpler to think about as

2371

2555

# 'upload data, then repack if needed'.

2372

2556

self.target._pack_collection.autopack()

2373

return pack.get_revision_count()

2557

return (pack.get_revision_count(), [])

2374

2558

else:

2375

return 0

2559

return (0, [])

2376

2560

2377

2561

@needs_read_lock

2378

2562

def missing_revision_ids(self, revision_id=None, find_ghosts=True):

2399

2583

target_index.iter_entries(target_keys))

2400

2584

missing_revs.update(next_revs - have_revs)

2401

2585

searcher.stop_searching_any(have_revs)

2586

if next_revs - have_revs == set([revision_id]):

2587

# we saw the start rev itself, but no parents from it (or

2588

# next_revs would have been updated to e.g. set(). We remove

2589

# have_revs because if we found revision_id locally we

2590

# stop_searching at the first time around.

2591

raise errors.NoSuchRevision(self.source, revision_id)

2402

2592

return missing_revs

2403

2593

elif revision_id is not None:

2404

2594

source_ids = self.source.get_ancestry(revision_id)

2495

2685

return f.count_copied, f.failed_revisions

2496

2686

2497

2687

2688

class InterDifferingSerializer(InterKnitRepo):

2689

2690

@classmethod

2691

def _get_repo_format_to_test(self):

2692

return None

2693

2694

@staticmethod

2695

def is_compatible(source, target):

2696

"""Be compatible with Knit2 source and Knit3 target"""

2697

if source.supports_rich_root() != target.supports_rich_root():

2698

return False

2699

# Ideally, we'd support fetching if the source had no tree references

2700

# even if it supported them...

2701

if (getattr(source, '_format.supports_tree_reference', False) and

2702

not getattr(target, '_format.supports_tree_reference', False)):

2703

return False

2704

return True

2705

2706

@needs_write_lock

2707

def fetch(self, revision_id=None, pb=None, find_ghosts=False):

2708

"""See InterRepository.fetch()."""

2709

revision_ids = self.target.missing_revision_ids(self.source,

2710

revision_id)

2711

def revisions_iterator():

2712

for current_revision_id in revision_ids:

2713

revision = self.source.get_revision(current_revision_id)

2714

tree = self.source.revision_tree(current_revision_id)

2715

try:

2716

signature = self.source.get_signature_text(

2717

current_revision_id)

2718

except errors.NoSuchRevision:

2719

signature = None

2720

yield revision, tree, signature

2721

install_revisions(self.target, revisions_iterator())

2722

return len(revision_ids), 0

2723

2724

2498

2725

class InterRemoteToOther(InterRepository):

2499

2726

2500

2727

def __init__(self, source, target):

2564

2791

return None

2565

2792

2566

2793

2794

InterRepository.register_optimiser(InterDifferingSerializer)

2567

2795

InterRepository.register_optimiser(InterSameDataRepository)

2568

2796

InterRepository.register_optimiser(InterWeaveRepo)

2569

2797

InterRepository.register_optimiser(InterKnitRepo)

2656

2884

return _unescape_re.sub(_unescaper, data)

2657

2885

2658

2886

2659

class _RevisionTextVersionCache(object):

2660

"""A cache of the versionedfile versions for revision and file-id."""

2887

class VersionedFileChecker(object):

2661

2888

2662

2889

def __init__(self, repository):

2663

2890

self.repository = repository

2664

self.revision_versions = {}

2665

self.revision_parents = {}

2666

self.repo_graph = self.repository.get_graph()

2667

# XXX: RBC: I haven't tracked down what uses this, but it would be

2668

# better to use the headscache directly I think.

2669

self.heads = graph.HeadsCache(self.repo_graph).heads

2670

2671

def add_revision_text_versions(self, tree):

2672

"""Cache text version data from the supplied revision tree"""

2673

inv_revisions = {}

2674

for path, entry in tree.iter_entries_by_dir():

2675

inv_revisions[entry.file_id] = entry.revision

2676

self.revision_versions[tree.get_revision_id()] = inv_revisions

2677

return inv_revisions

2678

2679

def get_text_version(self, file_id, revision_id):

2680

"""Determine the text version for a given file-id and revision-id"""

2681

try:

2682

inv_revisions = self.revision_versions[revision_id]

2683

except KeyError:

2684

try:

2685

tree = self.repository.revision_tree(revision_id)

2686

except errors.RevisionNotPresent:

2687

self.revision_versions[revision_id] = inv_revisions = {}

2688

else:

2689

inv_revisions = self.add_revision_text_versions(tree)

2690

return inv_revisions.get(file_id)

2691

2692

def prepopulate_revs(self, revision_ids):

2693

# Filter out versions that we don't have an inventory for, so that the

2694

# revision_trees() call won't fail.

2695

inv_weave = self.repository.get_inventory_weave()

2696

revs = [r for r in revision_ids if inv_weave.has_version(r)]

2697

# XXX: this loop is very similar to

2698

# bzrlib.fetch.Inter1and2Helper.iter_rev_trees.

2699

while revs:

2700

mutter('%d revisions left to prepopulate', len(revs))

2701

for tree in self.repository.revision_trees(revs[:100]):

2702

if tree.inventory.revision_id is None:

2703

tree.inventory.revision_id = tree.get_revision_id()

2704

self.add_revision_text_versions(tree)

2705

revs = revs[100:]

2706

2707

def get_parents(self, revision_id):

2708

try:

2709

return self.revision_parents[revision_id]

2710

except KeyError:

2711

parents = self.repository.get_parents([revision_id])[0]

2712

self.revision_parents[revision_id] = parents

2713

return parents

2714

2715

def used_file_versions(self):

2716

"""Return a set of (revision_id, file_id) pairs for each file version

2717

referenced by any inventory cached by this _RevisionTextVersionCache.

2718

2719

If the entire repository has been cached, this can be used to find all

2720

file versions that are actually referenced by inventories. Thus any

2721

other file version is completely unused and can be removed safely.

2722

"""

2723

result = set()

2724

for inventory_summary in self.revision_versions.itervalues():

2725

result.update(inventory_summary.items())

2726

return result

2727

2728

2729

class VersionedFileChecker(object):

2730

2731

def __init__(self, planned_revisions, revision_versions, repository):

2732

self.planned_revisions = planned_revisions

2733

self.revision_versions = revision_versions

2734

self.repository = repository

2891

self.text_index = self.repository._generate_text_key_index()

2735

2892

2736

2893

def calculate_file_version_parents(self, revision_id, file_id):

2737

2894

"""Calculate the correct parents for a file version according to

2738

2895

the inventories.

2739

2896

"""

2740

text_revision = self.revision_versions.get_text_version(

2741

file_id, revision_id)

2742

if text_revision is None:

2743

return None

2744

parents_of_text_revision = self.revision_versions.get_parents(

2745

text_revision)

2746

parents_from_inventories = []

2747

for parent in parents_of_text_revision:

2748

if parent == _mod_revision.NULL_REVISION:

2749

continue

2750

introduced_in = self.revision_versions.get_text_version(file_id,

2751

parent)

2752

if introduced_in is not None:

2753

parents_from_inventories.append(introduced_in)

2754

heads = set(self.revision_versions.heads(parents_from_inventories))

2755

new_parents = []

2756

for parent in parents_from_inventories:

2757

if parent in heads and parent not in new_parents:

2758

new_parents.append(parent)

2759

return tuple(new_parents)

2897

parent_keys = self.text_index[(file_id, revision_id)]

2898

if parent_keys == [_mod_revision.NULL_REVISION]:

2899

return ()

2900

# strip the file_id, for the weave api

2901

return tuple([revision_id for file_id, revision_id in parent_keys])

2760

2902

2761

def check_file_version_parents(self, weave, file_id):

2903

def check_file_version_parents(self, weave, file_id, planned_revisions):

2762

2904

"""Check the parents stored in a versioned file are correct.

2763

2905

2764

2906

It also detects file versions that are not referenced by their

2772

2914

file, but not used by the corresponding inventory.

2773

2915

"""

2774

2916

wrong_parents = {}

2775

dangling_file_versions = set()

2776

for num, revision_id in enumerate(self.planned_revisions):

2777

correct_parents = self.calculate_file_version_parents(

2778

revision_id, file_id)

2779

if correct_parents is None:

2780

continue

2781

text_revision = self.revision_versions.get_text_version(

2782

file_id, revision_id)

2917

unused_versions = set()

2918

for num, revision_id in enumerate(planned_revisions):

2783

2919

try:

2784

knit_parents = tuple(weave.get_parents(revision_id))

2785

except errors.RevisionNotPresent:

2786

knit_parents = None

2787

if text_revision != revision_id:

2788

# This file version is not referenced by its corresponding

2789

# inventory!

2790

dangling_file_versions.add((file_id, revision_id))

2791

if correct_parents != knit_parents:

2792

wrong_parents[revision_id] = (knit_parents, correct_parents)

2793

return wrong_parents, dangling_file_versions

2920

correct_parents = self.calculate_file_version_parents(

2921

revision_id, file_id)

2922

except KeyError:

2923

# we were asked to investigate a non-existant version.

2924

unused_versions.add(revision_id)

2925

else:

2926

try:

2927

knit_parents = tuple(weave.get_parents(revision_id))

2928

except errors.RevisionNotPresent:

2929

knit_parents = None

2930

if correct_parents != knit_parents:

2931

wrong_parents[revision_id] = (knit_parents, correct_parents)

2932

return wrong_parents, unused_versions

Older »