1
# Copyright (C) 2007 Canonical Ltd
1
# Copyright (C) 2007, 2008 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
27
27
from bisect import bisect_right
28
28
from cStringIO import StringIO
31
32
from bzrlib.lazy_import import lazy_import
32
33
lazy_import(globals(), """
52
53
_newline_null_re = re.compile('[\n\0]')
56
def _has_key_from_parent_map(self, key):
57
"""Check if this index has one key.
59
If it's possible to check for multiple keys at once through
60
calling get_parent_map that should be faster.
62
return (key in self.get_parent_map([key]))
65
def _missing_keys_from_parent_map(self, keys):
66
return set(keys) - set(self.get_parent_map(keys))
55
69
class GraphIndexBuilder(object):
56
70
"""A builder that can build a GraphIndex.
85
99
self._nodes_by_key = None
86
100
self._key_length = key_elements
101
self._optimize_for_size = False
88
103
def _check_key(self, key):
89
104
"""Raise BadIndexKey if key is not a valid key for this index."""
95
110
if not element or _whitespace_re.search(element) is not None:
96
111
raise errors.BadIndexKey(element)
113
def _external_references(self):
114
"""Return references that are not present in this index.
118
# TODO: JAM 2008-11-21 This makes an assumption about how the reference
119
# lists are used. It is currently correct for pack-0.92 through
120
# 1.9, which use the node references (3rd column) second
121
# reference list as the compression parent. Perhaps this should
122
# be moved into something higher up the stack, since it
123
# makes assumptions about how the index is used.
124
if self.reference_lists > 1:
125
for node in self.iter_all_entries():
127
refs.update(node[3][1])
130
# If reference_lists == 0 there can be no external references, and
131
# if reference_lists == 1, then there isn't a place to store the
98
135
def _get_nodes_by_key(self):
99
136
if self._nodes_by_key is None:
100
137
nodes_by_key = {}
278
315
(len(result.getvalue()), expected_bytes))
318
def set_optimize(self, for_size=True):
319
"""Change how the builder tries to optimize the result.
321
:param for_size: Tell the builder to try and make the index as small as
325
# GraphIndexBuilder itself doesn't pay attention to the flag yet, but
327
self._optimize_for_size = for_size
282
330
class GraphIndex(object):
283
331
"""An index for data with embedded graphs.
1106
1154
in the index list.
1109
def __init__(self, indices):
1157
def __init__(self, indices, reload_func=None):
1110
1158
"""Create a CombinedGraphIndex backed by indices.
1112
1160
:param indices: An ordered list of indices to query for data.
1161
:param reload_func: A function to call if we find we are missing an
1162
index. Should have the form reload_func() => True/False to indicate
1163
if reloading actually changed anything.
1114
1165
self._indices = indices
1166
self._reload_func = reload_func
1116
1168
def __repr__(self):
1117
1169
return "%s(%s)" % (
1150
1202
found_parents[key] = parents
1151
1203
return found_parents
1205
has_key = _has_key_from_parent_map
1153
1207
def insert_index(self, pos, index):
1154
1208
"""Insert a new index in the list of indices to query.
1169
1223
the most efficient order for the index.
1171
1225
seen_keys = set()
1172
for index in self._indices:
1173
for node in index.iter_all_entries():
1174
if node[1] not in seen_keys:
1176
seen_keys.add(node[1])
1228
for index in self._indices:
1229
for node in index.iter_all_entries():
1230
if node[1] not in seen_keys:
1232
seen_keys.add(node[1])
1234
except errors.NoSuchFile:
1235
self._reload_or_raise()
1178
1237
def iter_entries(self, keys):
1179
1238
"""Iterate over keys within the index.
1187
1246
efficient order for the index.
1189
1248
keys = set(keys)
1190
for index in self._indices:
1251
for index in self._indices:
1254
for node in index.iter_entries(keys):
1255
keys.remove(node[1])
1193
for node in index.iter_entries(keys):
1194
keys.remove(node[1])
1258
except errors.NoSuchFile:
1259
self._reload_or_raise()
1197
1261
def iter_entries_prefix(self, keys):
1198
1262
"""Iterate over keys within the index using prefix matching.
1220
1284
seen_keys = set()
1221
for index in self._indices:
1222
for node in index.iter_entries_prefix(keys):
1223
if node[1] in seen_keys:
1225
seen_keys.add(node[1])
1287
for index in self._indices:
1288
for node in index.iter_entries_prefix(keys):
1289
if node[1] in seen_keys:
1291
seen_keys.add(node[1])
1294
except errors.NoSuchFile:
1295
self._reload_or_raise()
1228
1297
def key_count(self):
1229
1298
"""Return an estimate of the number of keys in this index.
1231
1300
For CombinedGraphIndex this is approximated by the sum of the keys of
1232
1301
the child indices. As child indices may have duplicate keys this can
1233
1302
have a maximum error of the number of child indices * largest number of
1234
1303
keys in any index.
1236
return sum((index.key_count() for index in self._indices), 0)
1307
return sum((index.key_count() for index in self._indices), 0)
1308
except errors.NoSuchFile:
1309
self._reload_or_raise()
1311
missing_keys = _missing_keys_from_parent_map
1313
def _reload_or_raise(self):
1314
"""We just got a NoSuchFile exception.
1316
Try to reload the indices, if it fails, just raise the current
1319
if self._reload_func is None:
1321
exc_type, exc_value, exc_traceback = sys.exc_info()
1322
trace.mutter('Trying to reload after getting exception: %s',
1324
if not self._reload_func():
1325
# We tried to reload, but nothing changed, so we fail anyway
1326
trace.mutter('_reload_func indicated nothing has changed.'
1327
' Raising original exception.')
1328
raise exc_type, exc_value, exc_traceback
1238
1330
def validate(self):
1239
1331
"""Validate that everything in the index can be accessed."""
1240
for index in self._indices:
1334
for index in self._indices:
1337
except errors.NoSuchFile:
1338
self._reload_or_raise()
1244
1341
class InMemoryGraphIndex(GraphIndexBuilder):