1
# Copyright (C) 2007-2010 Canonical Ltd
1
# Copyright (C) 2007-2011 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
27
29
from bisect import bisect_right
28
from cStringIO import StringIO
32
from bzrlib.lazy_import import lazy_import
33
from .lazy_import import lazy_import
33
34
lazy_import(globals(), """
34
from bzrlib import trace
35
from bzrlib.bisect_multi import bisect_multi_bytes
36
from bzrlib.revision import NULL_REVISION
37
from bzrlib.trace import mutter
37
revision as _mod_revision,
43
from bzrlib.static_tuple import StaticTuple
49
from .static_tuple import StaticTuple
45
51
_HEADER_READV = (0, 200)
46
52
_OPTION_KEY_ELEMENTS = "key_elements="
69
75
class GraphIndexBuilder(object):
70
76
"""A builder that can build a GraphIndex.
72
The resulting graph has the structure:
78
The resulting graph has the structure::
74
_SIGNATURE OPTIONS NODES NEWLINE
75
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
76
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
78
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
79
KEY := Not-whitespace-utf8
81
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
82
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
83
REFERENCE := DIGITS ; digits is the byte offset in the index of the
85
VALUE := no-newline-no-null-bytes
80
_SIGNATURE OPTIONS NODES NEWLINE
81
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
82
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
84
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
85
KEY := Not-whitespace-utf8
87
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
88
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
89
REFERENCE := DIGITS ; digits is the byte offset in the index of the
91
VALUE := no-newline-no-null-bytes
88
94
def __init__(self, reference_lists=0, key_elements=1):
184
190
:param value: The value associate with this key. Must not contain
185
191
newlines or null characters.
186
192
:return: (node_refs, absent_references)
187
node_refs basically a packed form of 'references' where all
189
absent_references reference keys that are not in self._nodes.
190
This may contain duplicates if the same key is
191
referenced in multiple lists.
194
* node_refs: basically a packed form of 'references' where all
196
* absent_references: reference keys that are not in self._nodes.
197
This may contain duplicates if the same key is referenced in
193
200
as_st = StaticTuple.from_sequence
194
201
self._check_key(key)
219
226
:param references: An iterable of iterables of keys. Each is a
220
227
reference to another key.
221
228
:param value: The value to associate with the key. It may be any
222
bytes as long as it does not contain \0 or \n.
229
bytes as long as it does not contain \\0 or \\n.
225
232
absent_references) = self._check_key_ref_value(key, references, value)
245
252
def finish(self):
255
:returns: cBytesIO holding the full context of the index as it
256
should be written to disk.
246
258
lines = [_SIGNATURE]
247
259
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
248
260
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
322
334
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
323
335
'\t'.join(flattened_references), value))
324
336
lines.append('\n')
325
result = StringIO(''.join(lines))
337
result = BytesIO(''.join(lines))
326
338
if expected_bytes and len(result.getvalue()) != expected_bytes:
327
339
raise errors.BzrError('Failed index creation. Internal error:'
328
340
' mismatched output length and expected length: %d %d' %
385
397
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
386
398
"""Open an index called name on transport.
388
:param transport: A bzrlib.transport.Transport.
400
:param transport: A breezy.transport.Transport.
389
401
:param name: A path to provide to transport API calls.
390
402
:param size: The size of the index in bytes. This is used for bisection
391
403
logic to perform partial index reads. While the size could be
423
435
def __eq__(self, other):
424
436
"""Equal when self and other were created with the same parameters."""
426
type(self) == type(other) and
438
isinstance(self, type(other)) and
427
439
self._transport == other._transport and
428
440
self._name == other._name and
429
441
self._size == other._size)
444
456
# We already did this
446
458
if 'index' in debug.debug_flags:
447
mutter('Reading entire index %s', self._transport.abspath(self._name))
459
trace.mutter('Reading entire index %s',
460
self._transport.abspath(self._name))
448
461
if stream is None:
449
462
stream = self._transport.get(self._name)
450
463
if self._base_offset != 0:
451
464
# This is wasteful, but it is better than dealing with
452
465
# adjusting all the offsets, etc.
453
stream = StringIO(stream.read()[self._base_offset:])
466
stream = BytesIO(stream.read()[self._base_offset:])
454
467
self._read_prefix(stream)
455
468
self._expected_elements = 3 + self._key_length
463
476
pos = stream.tell()
464
477
lines = stream.read().split('\n')
478
# GZ 2009-09-20: Should really use a try/finally block to ensure close
467
481
_, _, _, trailers = self._parse_lines(lines, pos)
670
684
if self._nodes is not None:
671
685
return self._iter_entries_from_total_buffer(keys)
673
return (result[1] for result in bisect_multi_bytes(
687
return (result[1] for result in bisect_multi.bisect_multi_bytes(
674
688
self._lookup_keys_via_location, self._size, keys))
676
690
def iter_entries_prefix(self, keys):
703
717
self._buffer_all()
704
718
if self._key_length == 1:
708
raise errors.BadIndexKey(key)
709
if len(key) != self._key_length:
710
raise errors.BadIndexKey(key)
720
_sanity_check_key(self, key)
711
721
if self.node_ref_lists:
712
722
value, node_refs = self._nodes[key]
713
723
yield self, key, value, node_refs
715
725
yield self, key, self._nodes[key]
717
727
nodes_by_key = self._get_nodes_by_key()
721
raise errors.BadIndexKey(key)
722
if len(key) != self._key_length:
723
raise errors.BadIndexKey(key)
724
# find what it refers to:
725
key_dict = nodes_by_key
727
# find the subdict whose contents should be returned.
729
while len(elements) and elements[0] is not None:
730
key_dict = key_dict[elements[0]]
733
# a non-existant lookup.
738
key_dict = dicts.pop(-1)
739
# can't be empty or would not exist
740
item, value = key_dict.iteritems().next()
741
if type(value) == dict:
743
dicts.extend(key_dict.itervalues())
746
for value in key_dict.itervalues():
747
# each value is the key:value:node refs tuple
749
yield (self, ) + value
751
# the last thing looked up was a terminal element
752
yield (self, ) + key_dict
728
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
754
731
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
755
732
"""See BTreeIndex._find_ancestors."""
788
765
:param location_keys: A list of location(byte offset), key tuples.
789
766
:return: A list of (location_key, result) tuples as expected by
790
bzrlib.bisect_multi.bisect_multi_bytes.
767
breezy.bisect_multi.bisect_multi_bytes.
792
769
# Possible improvements:
793
770
# - only bisect lookup each key once
1217
1194
# We read the whole range, most likely because the
1218
1195
# Transport upcast our readv ranges into one long request
1219
1196
# for enough total data to grab the whole index.
1220
self._buffer_all(StringIO(data))
1197
self._buffer_all(BytesIO(data))
1222
1199
if self._bisect_nodes is None:
1223
1200
# this must be the start
1287
1264
def get_parent_map(self, keys):
1288
1265
"""See graph.StackedParentsProvider.get_parent_map"""
1289
1266
search_keys = set(keys)
1290
if NULL_REVISION in search_keys:
1291
search_keys.discard(NULL_REVISION)
1292
found_parents = {NULL_REVISION:[]}
1267
if _mod_revision.NULL_REVISION in search_keys:
1268
search_keys.discard(_mod_revision.NULL_REVISION)
1269
found_parents = {_mod_revision.NULL_REVISION:[]}
1294
1271
found_parents = {}
1295
1272
for index, key, value, refs in self.iter_entries(search_keys):
1296
1273
parents = refs[0]
1297
1274
if not parents:
1298
parents = (NULL_REVISION,)
1275
parents = (_mod_revision.NULL_REVISION,)
1299
1276
found_parents[key] = parents
1300
1277
return found_parents
1302
has_key = _has_key_from_parent_map
1279
__contains__ = _has_key_from_parent_map
1304
1281
def insert_index(self, pos, index, name=None):
1305
1282
"""Insert a new index in the list of indices to query.
1333
1310
seen_keys.add(node[1])
1335
except errors.NoSuchFile:
1336
self._reload_or_raise()
1312
except errors.NoSuchFile as e:
1313
if not self._try_reload(e):
1338
1316
def iter_entries(self, keys):
1339
1317
"""Iterate over keys within the index.
1362
1340
hit_indices.append(index)
1364
except errors.NoSuchFile:
1365
self._reload_or_raise()
1342
except errors.NoSuchFile as e:
1343
if not self._try_reload(e):
1366
1345
self._move_to_front(hit_indices)
1368
1347
def iter_entries_prefix(self, keys):
1404
1383
hit_indices.append(index)
1406
except errors.NoSuchFile:
1407
self._reload_or_raise()
1385
except errors.NoSuchFile as e:
1386
if not self._try_reload(e):
1408
1388
self._move_to_front(hit_indices)
1410
1390
def _move_to_front(self, hit_indices):
1434
1414
indices_info = zip(self._index_names, self._indices)
1435
1415
if 'index' in debug.debug_flags:
1436
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1437
indices_info, hit_indices)
1416
indices_info = list(indices_info)
1417
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1418
'promoting %r', indices_info, hit_indices)
1439
1420
unhit_names = []
1440
1421
new_hit_indices = []
1457
1438
self._indices = new_hit_indices + unhit_indices
1458
1439
self._index_names = hit_names + unhit_names
1459
1440
if 'index' in debug.debug_flags:
1460
mutter('CombinedGraphIndex reordered: %r', self._indices)
1441
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1461
1442
return hit_names
1463
1444
def _move_to_front_by_name(self, hit_names):
1550
1531
return sum((index.key_count() for index in self._indices), 0)
1551
except errors.NoSuchFile:
1552
self._reload_or_raise()
1532
except errors.NoSuchFile as e:
1533
if not self._try_reload(e):
1554
1536
missing_keys = _missing_keys_from_parent_map
1556
def _reload_or_raise(self):
1538
def _try_reload(self, error):
1557
1539
"""We just got a NoSuchFile exception.
1559
1541
Try to reload the indices, if it fails, just raise the current
1562
1544
if self._reload_func is None:
1564
exc_type, exc_value, exc_traceback = sys.exc_info()
1565
trace.mutter('Trying to reload after getting exception: %s',
1546
trace.mutter('Trying to reload after getting exception: %s', error)
1567
1547
if not self._reload_func():
1568
1548
# We tried to reload, but nothing changed, so we fail anyway
1569
1549
trace.mutter('_reload_func indicated nothing has changed.'
1570
1550
' Raising original exception.')
1571
raise exc_type, exc_value, exc_traceback
1573
1554
def set_sibling_indices(self, sibling_combined_graph_indices):
1574
1555
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1582
1563
for index in self._indices:
1583
1564
index.validate()
1585
except errors.NoSuchFile:
1586
self._reload_or_raise()
1566
except errors.NoSuchFile as e:
1567
if not self._try_reload(e):
1589
1571
class InMemoryGraphIndex(GraphIndexBuilder):
1665
1647
will be returned, and every match that is in the index will be
1668
# XXX: To much duplication with the GraphIndex class; consider finding
1669
# a good place to pull out the actual common logic.
1670
1650
keys = set(keys)
1673
1653
if self._key_length == 1:
1674
1654
for key in keys:
1677
raise errors.BadIndexKey(key)
1678
if len(key) != self._key_length:
1679
raise errors.BadIndexKey(key)
1655
_sanity_check_key(self, key)
1680
1656
node = self._nodes[key]
1686
1662
yield self, key, node[2]
1688
1664
nodes_by_key = self._get_nodes_by_key()
1692
raise errors.BadIndexKey(key)
1693
if len(key) != self._key_length:
1694
raise errors.BadIndexKey(key)
1695
# find what it refers to:
1696
key_dict = nodes_by_key
1697
elements = list(key)
1698
# find the subdict to return
1700
while len(elements) and elements[0] is not None:
1701
key_dict = key_dict[elements[0]]
1704
# a non-existant lookup.
1709
key_dict = dicts.pop(-1)
1710
# can't be empty or would not exist
1711
item, value = key_dict.iteritems().next()
1712
if type(value) == dict:
1714
dicts.extend(key_dict.itervalues())
1717
for value in key_dict.itervalues():
1718
yield (self, ) + value
1720
yield (self, ) + key_dict
1665
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
1722
1668
def key_count(self):
1723
1669
"""Return an estimate of the number of keys in this index.
1855
1801
def validate(self):
1856
1802
"""Call the adapted's validate."""
1857
1803
self.adapted.validate()
1806
def _sanity_check_key(index_or_builder, key):
1807
"""Raise BadIndexKey if key cannot be used for prefix matching."""
1809
raise errors.BadIndexKey(key)
1810
if len(key) != index_or_builder._key_length:
1811
raise errors.BadIndexKey(key)
1814
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
1815
"""Helper for implementing prefix matching iterators."""
1817
_sanity_check_key(index_or_builder, key)
1818
# find what it refers to:
1819
key_dict = nodes_by_key
1820
elements = list(key)
1821
# find the subdict whose contents should be returned.
1823
while len(elements) and elements[0] is not None:
1824
key_dict = key_dict[elements[0]]
1827
# a non-existant lookup.
1832
values_view = viewvalues(dicts.pop())
1833
# can't be empty or would not exist
1834
value = next(iter(values_view))
1835
if isinstance(value, dict):
1836
# still descending, push values
1837
dicts.extend(values_view)
1839
# at leaf tuples, yield values
1840
for value in values_view:
1841
# each value is the key:value:node refs tuple
1843
yield (index_or_builder, ) + value
1845
# the last thing looked up was a terminal element
1846
yield (index_or_builder, ) + key_dict