1
# Copyright (C) 2007-2010 Canonical Ltd
1
# Copyright (C) 2007-2011 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
27
29
from bisect import bisect_right
28
from cStringIO import StringIO
32
from bzrlib.lazy_import import lazy_import
33
from .lazy_import import lazy_import
33
34
lazy_import(globals(), """
34
from bzrlib import trace
35
from bzrlib.bisect_multi import bisect_multi_bytes
36
from bzrlib.revision import NULL_REVISION
37
from bzrlib.trace import mutter
37
revision as _mod_revision,
43
from bzrlib.static_tuple import StaticTuple
50
from .static_tuple import StaticTuple
45
52
_HEADER_READV = (0, 200)
46
53
_OPTION_KEY_ELEMENTS = "key_elements="
69
76
class GraphIndexBuilder(object):
70
77
"""A builder that can build a GraphIndex.
72
The resulting graph has the structure:
79
The resulting graph has the structure::
74
_SIGNATURE OPTIONS NODES NEWLINE
75
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
76
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
78
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
79
KEY := Not-whitespace-utf8
81
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
82
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
83
REFERENCE := DIGITS ; digits is the byte offset in the index of the
85
VALUE := no-newline-no-null-bytes
81
_SIGNATURE OPTIONS NODES NEWLINE
82
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
83
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
85
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
86
KEY := Not-whitespace-utf8
88
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
89
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
90
REFERENCE := DIGITS ; digits is the byte offset in the index of the
92
VALUE := no-newline-no-null-bytes
88
95
def __init__(self, reference_lists=0, key_elements=1):
138
145
if self._nodes_by_key is None:
139
146
nodes_by_key = {}
140
147
if self.reference_lists:
141
for key, (absent, references, value) in self._nodes.iteritems():
148
for key, (absent, references, value) in viewitems(self._nodes):
144
151
key_dict = nodes_by_key
146
153
key_dict = key_dict.setdefault(subkey, {})
147
154
key_dict[key[-1]] = key, value, references
149
for key, (absent, references, value) in self._nodes.iteritems():
156
for key, (absent, references, value) in viewitems(self._nodes):
152
159
key_dict = nodes_by_key
184
191
:param value: The value associate with this key. Must not contain
185
192
newlines or null characters.
186
193
:return: (node_refs, absent_references)
187
node_refs basically a packed form of 'references' where all
189
absent_references reference keys that are not in self._nodes.
190
This may contain duplicates if the same key is
191
referenced in multiple lists.
195
* node_refs: basically a packed form of 'references' where all
197
* absent_references: reference keys that are not in self._nodes.
198
This may contain duplicates if the same key is referenced in
193
201
as_st = StaticTuple.from_sequence
194
202
self._check_key(key)
219
227
:param references: An iterable of iterables of keys. Each is a
220
228
reference to another key.
221
229
:param value: The value to associate with the key. It may be any
222
bytes as long as it does not contain \0 or \n.
230
bytes as long as it does not contain \\0 or \\n.
225
233
absent_references) = self._check_key_ref_value(key, references, value)
245
253
def finish(self):
256
:returns: cBytesIO holding the full context of the index as it
257
should be written to disk.
246
259
lines = [_SIGNATURE]
247
260
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
248
261
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
264
277
# forward sorted by key. In future we may consider topological sorting,
265
278
# at the cost of table scans for direct lookup, or a second index for
267
nodes = sorted(self._nodes.items())
280
nodes = sorted(viewitems(self._nodes))
268
281
# if we do not prepass, we don't know how long it will be up front.
269
282
expected_bytes = None
270
283
# we only need to pre-pass if we have reference lists at all.
322
335
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
323
336
'\t'.join(flattened_references), value))
324
337
lines.append('\n')
325
result = StringIO(''.join(lines))
338
result = BytesIO(''.join(lines))
326
339
if expected_bytes and len(result.getvalue()) != expected_bytes:
327
340
raise errors.BzrError('Failed index creation. Internal error:'
328
341
' mismatched output length and expected length: %d %d' %
385
398
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
386
399
"""Open an index called name on transport.
388
:param transport: A bzrlib.transport.Transport.
401
:param transport: A breezy.transport.Transport.
389
402
:param name: A path to provide to transport API calls.
390
403
:param size: The size of the index in bytes. This is used for bisection
391
404
logic to perform partial index reads. While the size could be
423
436
def __eq__(self, other):
424
437
"""Equal when self and other were created with the same parameters."""
426
type(self) == type(other) and
439
isinstance(self, type(other)) and
427
440
self._transport == other._transport and
428
441
self._name == other._name and
429
442
self._size == other._size)
444
457
# We already did this
446
459
if 'index' in debug.debug_flags:
447
mutter('Reading entire index %s', self._transport.abspath(self._name))
460
trace.mutter('Reading entire index %s',
461
self._transport.abspath(self._name))
448
462
if stream is None:
449
463
stream = self._transport.get(self._name)
450
464
if self._base_offset != 0:
451
465
# This is wasteful, but it is better than dealing with
452
466
# adjusting all the offsets, etc.
453
stream = StringIO(stream.read()[self._base_offset:])
467
stream = BytesIO(stream.read()[self._base_offset:])
454
468
self._read_prefix(stream)
455
469
self._expected_elements = 3 + self._key_length
463
477
pos = stream.tell()
464
478
lines = stream.read().split('\n')
479
# GZ 2009-09-20: Should really use a try/finally block to ensure close
467
482
_, _, _, trailers = self._parse_lines(lines, pos)
468
for key, absent, references, value in self._keys_by_offset.itervalues():
483
for key, absent, references, value in viewvalues(self._keys_by_offset):
471
486
# resolve references:
496
511
% (ref_list_num, self.node_ref_lists))
498
513
nodes = self._nodes
499
for key, (value, ref_lists) in nodes.iteritems():
514
for key, (value, ref_lists) in viewitems(nodes):
500
515
ref_list = ref_lists[ref_list_num]
501
516
refs.update([ref for ref in ref_list if ref not in nodes])
505
520
if self._nodes_by_key is None:
506
521
nodes_by_key = {}
507
522
if self.node_ref_lists:
508
for key, (value, references) in self._nodes.iteritems():
523
for key, (value, references) in viewitems(self._nodes):
509
524
key_dict = nodes_by_key
510
525
for subkey in key[:-1]:
511
526
key_dict = key_dict.setdefault(subkey, {})
512
527
key_dict[key[-1]] = key, value, references
514
for key, value in self._nodes.iteritems():
529
for key, value in viewitems(self._nodes):
515
530
key_dict = nodes_by_key
516
531
for subkey in key[:-1]:
517
532
key_dict = key_dict.setdefault(subkey, {})
534
549
if self._nodes is None:
535
550
self._buffer_all()
536
551
if self.node_ref_lists:
537
for key, (value, node_ref_lists) in self._nodes.iteritems():
552
for key, (value, node_ref_lists) in viewitems(self._nodes):
538
553
yield self, key, value, node_ref_lists
540
for key, value in self._nodes.iteritems():
555
for key, value in viewitems(self._nodes):
541
556
yield self, key, value
543
558
def _read_prefix(self, stream):
670
685
if self._nodes is not None:
671
686
return self._iter_entries_from_total_buffer(keys)
673
return (result[1] for result in bisect_multi_bytes(
688
return (result[1] for result in bisect_multi.bisect_multi_bytes(
674
689
self._lookup_keys_via_location, self._size, keys))
676
691
def iter_entries_prefix(self, keys):
703
718
self._buffer_all()
704
719
if self._key_length == 1:
708
raise errors.BadIndexKey(key)
709
if len(key) != self._key_length:
710
raise errors.BadIndexKey(key)
721
_sanity_check_key(self, key)
711
722
if self.node_ref_lists:
712
723
value, node_refs = self._nodes[key]
713
724
yield self, key, value, node_refs
715
726
yield self, key, self._nodes[key]
717
728
nodes_by_key = self._get_nodes_by_key()
721
raise errors.BadIndexKey(key)
722
if len(key) != self._key_length:
723
raise errors.BadIndexKey(key)
724
# find what it refers to:
725
key_dict = nodes_by_key
727
# find the subdict whose contents should be returned.
729
while len(elements) and elements[0] is not None:
730
key_dict = key_dict[elements[0]]
733
# a non-existant lookup.
738
key_dict = dicts.pop(-1)
739
# can't be empty or would not exist
740
item, value = key_dict.iteritems().next()
741
if type(value) == dict:
743
dicts.extend(key_dict.itervalues())
746
for value in key_dict.itervalues():
747
# each value is the key:value:node refs tuple
749
yield (self, ) + value
751
# the last thing looked up was a terminal element
752
yield (self, ) + key_dict
729
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
754
732
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
755
733
"""See BTreeIndex._find_ancestors."""
788
766
:param location_keys: A list of location(byte offset), key tuples.
789
767
:return: A list of (location_key, result) tuples as expected by
790
bzrlib.bisect_multi.bisect_multi_bytes.
768
breezy.bisect_multi.bisect_multi_bytes.
792
770
# Possible improvements:
793
771
# - only bisect lookup each key once
1217
1195
# We read the whole range, most likely because the
1218
1196
# Transport upcast our readv ranges into one long request
1219
1197
# for enough total data to grab the whole index.
1220
self._buffer_all(StringIO(data))
1198
self._buffer_all(BytesIO(data))
1222
1200
if self._bisect_nodes is None:
1223
1201
# this must be the start
1287
1265
def get_parent_map(self, keys):
1288
1266
"""See graph.StackedParentsProvider.get_parent_map"""
1289
1267
search_keys = set(keys)
1290
if NULL_REVISION in search_keys:
1291
search_keys.discard(NULL_REVISION)
1292
found_parents = {NULL_REVISION:[]}
1268
if _mod_revision.NULL_REVISION in search_keys:
1269
search_keys.discard(_mod_revision.NULL_REVISION)
1270
found_parents = {_mod_revision.NULL_REVISION:[]}
1294
1272
found_parents = {}
1295
1273
for index, key, value, refs in self.iter_entries(search_keys):
1296
1274
parents = refs[0]
1297
1275
if not parents:
1298
parents = (NULL_REVISION,)
1276
parents = (_mod_revision.NULL_REVISION,)
1299
1277
found_parents[key] = parents
1300
1278
return found_parents
1302
has_key = _has_key_from_parent_map
1280
__contains__ = _has_key_from_parent_map
1304
1282
def insert_index(self, pos, index, name=None):
1305
1283
"""Insert a new index in the list of indices to query.
1333
1311
seen_keys.add(node[1])
1335
except errors.NoSuchFile:
1336
self._reload_or_raise()
1313
except errors.NoSuchFile as e:
1314
if not self._try_reload(e):
1338
1317
def iter_entries(self, keys):
1339
1318
"""Iterate over keys within the index.
1362
1341
hit_indices.append(index)
1364
except errors.NoSuchFile:
1365
self._reload_or_raise()
1343
except errors.NoSuchFile as e:
1344
if not self._try_reload(e):
1366
1346
self._move_to_front(hit_indices)
1368
1348
def iter_entries_prefix(self, keys):
1404
1384
hit_indices.append(index)
1406
except errors.NoSuchFile:
1407
self._reload_or_raise()
1386
except errors.NoSuchFile as e:
1387
if not self._try_reload(e):
1408
1389
self._move_to_front(hit_indices)
1410
1391
def _move_to_front(self, hit_indices):
1434
1415
indices_info = zip(self._index_names, self._indices)
1435
1416
if 'index' in debug.debug_flags:
1436
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1437
indices_info, hit_indices)
1417
indices_info = list(indices_info)
1418
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1419
'promoting %r', indices_info, hit_indices)
1439
1421
unhit_names = []
1440
1422
new_hit_indices = []
1457
1439
self._indices = new_hit_indices + unhit_indices
1458
1440
self._index_names = hit_names + unhit_names
1459
1441
if 'index' in debug.debug_flags:
1460
mutter('CombinedGraphIndex reordered: %r', self._indices)
1442
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1461
1443
return hit_names
1463
1445
def _move_to_front_by_name(self, hit_names):
1550
1532
return sum((index.key_count() for index in self._indices), 0)
1551
except errors.NoSuchFile:
1552
self._reload_or_raise()
1533
except errors.NoSuchFile as e:
1534
if not self._try_reload(e):
1554
1537
missing_keys = _missing_keys_from_parent_map
1556
def _reload_or_raise(self):
1539
def _try_reload(self, error):
1557
1540
"""We just got a NoSuchFile exception.
1559
1542
Try to reload the indices, if it fails, just raise the current
1562
1545
if self._reload_func is None:
1564
exc_type, exc_value, exc_traceback = sys.exc_info()
1565
trace.mutter('Trying to reload after getting exception: %s',
1547
trace.mutter('Trying to reload after getting exception: %s', error)
1567
1548
if not self._reload_func():
1568
1549
# We tried to reload, but nothing changed, so we fail anyway
1569
1550
trace.mutter('_reload_func indicated nothing has changed.'
1570
1551
' Raising original exception.')
1571
raise exc_type, exc_value, exc_traceback
1573
1555
def set_sibling_indices(self, sibling_combined_graph_indices):
1574
1556
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1582
1564
for index in self._indices:
1583
1565
index.validate()
1585
except errors.NoSuchFile:
1586
self._reload_or_raise()
1567
except errors.NoSuchFile as e:
1568
if not self._try_reload(e):
1589
1572
class InMemoryGraphIndex(GraphIndexBuilder):
1617
1600
trace.mutter_callsite(3,
1618
1601
"iter_all_entries scales with size of history.")
1619
1602
if self.reference_lists:
1620
for key, (absent, references, value) in self._nodes.iteritems():
1603
for key, (absent, references, value) in viewitems(self._nodes):
1622
1605
yield self, key, value, references
1624
for key, (absent, references, value) in self._nodes.iteritems():
1607
for key, (absent, references, value) in viewitems(self._nodes):
1626
1609
yield self, key, value
1665
1648
will be returned, and every match that is in the index will be
1668
# XXX: To much duplication with the GraphIndex class; consider finding
1669
# a good place to pull out the actual common logic.
1670
1651
keys = set(keys)
1673
1654
if self._key_length == 1:
1674
1655
for key in keys:
1677
raise errors.BadIndexKey(key)
1678
if len(key) != self._key_length:
1679
raise errors.BadIndexKey(key)
1656
_sanity_check_key(self, key)
1680
1657
node = self._nodes[key]
1686
1663
yield self, key, node[2]
1688
1665
nodes_by_key = self._get_nodes_by_key()
1692
raise errors.BadIndexKey(key)
1693
if len(key) != self._key_length:
1694
raise errors.BadIndexKey(key)
1695
# find what it refers to:
1696
key_dict = nodes_by_key
1697
elements = list(key)
1698
# find the subdict to return
1700
while len(elements) and elements[0] is not None:
1701
key_dict = key_dict[elements[0]]
1704
# a non-existant lookup.
1709
key_dict = dicts.pop(-1)
1710
# can't be empty or would not exist
1711
item, value = key_dict.iteritems().next()
1712
if type(value) == dict:
1714
dicts.extend(key_dict.itervalues())
1717
for value in key_dict.itervalues():
1718
yield (self, ) + value
1720
yield (self, ) + key_dict
1666
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
1722
1669
def key_count(self):
1723
1670
"""Return an estimate of the number of keys in this index.
1855
1802
def validate(self):
1856
1803
"""Call the adapted's validate."""
1857
1804
self.adapted.validate()
1807
def _sanity_check_key(index_or_builder, key):
1808
"""Raise BadIndexKey if key cannot be used for prefix matching."""
1810
raise errors.BadIndexKey(key)
1811
if len(key) != index_or_builder._key_length:
1812
raise errors.BadIndexKey(key)
1815
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
1816
"""Helper for implementing prefix matching iterators."""
1818
_sanity_check_key(index_or_builder, key)
1819
# find what it refers to:
1820
key_dict = nodes_by_key
1821
elements = list(key)
1822
# find the subdict whose contents should be returned.
1824
while len(elements) and elements[0] is not None:
1825
key_dict = key_dict[elements[0]]
1828
# a non-existant lookup.
1833
values_view = viewvalues(dicts.pop())
1834
# can't be empty or would not exist
1835
value = next(iter(values_view))
1836
if isinstance(value, dict):
1837
# still descending, push values
1838
dicts.extend(values_view)
1840
# at leaf tuples, yield values
1841
for value in values_view:
1842
# each value is the key:value:node refs tuple
1844
yield (index_or_builder, ) + value
1846
# the last thing looked up was a terminal element
1847
yield (index_or_builder, ) + key_dict