29
29
from bisect import bisect_right
30
from cStringIO import StringIO
32
from ..lazy_import import lazy_import
34
from bzrlib.lazy_import import lazy_import
33
35
lazy_import(globals(), """
36
38
revision as _mod_revision,
44
from ..sixish import (
49
from ..static_tuple import StaticTuple
46
from bzrlib.static_tuple import StaticTuple
51
48
_HEADER_READV = (0, 200)
52
_OPTION_KEY_ELEMENTS = b"key_elements="
54
_OPTION_NODE_REFS = b"node_ref_lists="
55
_SIGNATURE = b"Bazaar Graph Index 1\n"
58
class BadIndexFormatSignature(errors.BzrError):
60
_fmt = "%(value)s is not an index of type %(_type)s."
62
def __init__(self, value, _type):
63
errors.BzrError.__init__(self)
68
class BadIndexData(errors.BzrError):
70
_fmt = "Error in data for index %(value)s."
72
def __init__(self, value):
73
errors.BzrError.__init__(self)
77
class BadIndexDuplicateKey(errors.BzrError):
79
_fmt = "The key '%(key)s' is already in index '%(index)s'."
81
def __init__(self, key, index):
82
errors.BzrError.__init__(self)
87
class BadIndexKey(errors.BzrError):
89
_fmt = "The key '%(key)s' is not a valid key."
91
def __init__(self, key):
92
errors.BzrError.__init__(self)
96
class BadIndexOptions(errors.BzrError):
98
_fmt = "Could not parse options for index %(value)s."
100
def __init__(self, value):
101
errors.BzrError.__init__(self)
105
class BadIndexValue(errors.BzrError):
107
_fmt = "The value '%(value)s' is not a valid value."
109
def __init__(self, value):
110
errors.BzrError.__init__(self)
114
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r\x00 ]')
115
_newline_null_re = re.compile(b'[\n\0]')
49
_OPTION_KEY_ELEMENTS = "key_elements="
51
_OPTION_NODE_REFS = "node_ref_lists="
52
_SIGNATURE = "Bazaar Graph Index 1\n"
55
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
56
_newline_null_re = re.compile('[\n\0]')
118
59
def _has_key_from_parent_map(self, key):
167
108
def _check_key(self, key):
168
109
"""Raise BadIndexKey if key is not a valid key for this index."""
169
110
if type(key) not in (tuple, StaticTuple):
170
raise BadIndexKey(key)
111
raise errors.BadIndexKey(key)
171
112
if self._key_length != len(key):
172
raise BadIndexKey(key)
113
raise errors.BadIndexKey(key)
173
114
for element in key:
174
if not element or type(element) != bytes or _whitespace_re.search(element) is not None:
175
raise BadIndexKey(key)
115
if not element or _whitespace_re.search(element) is not None:
116
raise errors.BadIndexKey(element)
177
118
def _external_references(self):
178
119
"""Return references that are not present in this index.
208
149
key_dict = key_dict.setdefault(subkey, {})
209
150
key_dict[key[-1]] = key, value, references
211
for key, (absent, references, value) in viewitems(self._nodes):
152
for key, (absent, references, value) in self._nodes.iteritems():
214
155
key_dict = nodes_by_key
256
197
as_st = StaticTuple.from_sequence
257
198
self._check_key(key)
258
199
if _newline_null_re.search(value) is not None:
259
raise BadIndexValue(value)
200
raise errors.BadIndexValue(value)
260
201
if len(references) != self.reference_lists:
261
raise BadIndexValue(references)
202
raise errors.BadIndexValue(references)
263
204
absent_references = []
264
205
for reference_list in references:
288
229
absent_references) = self._check_key_ref_value(key, references, value)
289
230
if key in self._nodes and self._nodes[key][0] != 'a':
290
raise BadIndexDuplicateKey(key, self)
231
raise errors.BadIndexDuplicateKey(key, self)
291
232
for reference in absent_references:
292
233
# There may be duplicates, but I don't think it is worth worrying
308
249
def finish(self):
309
250
"""Finish the index.
311
:returns: cBytesIO holding the full context of the index as it
252
:returns: cStringIO holding the full context of the index as it
312
253
should be written to disk.
314
255
lines = [_SIGNATURE]
315
lines.append(b'%s%d\n' % (_OPTION_NODE_REFS, self.reference_lists))
316
lines.append(b'%s%d\n' % (_OPTION_KEY_ELEMENTS, self._key_length))
256
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
257
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
317
258
key_count = len(self._nodes) - len(self._absent_keys)
318
lines.append(b'%s%d\n' % (_OPTION_LEN, key_count))
259
lines.append(_OPTION_LEN + str(key_count) + '\n')
319
260
prefix_length = sum(len(x) for x in lines)
320
261
# references are byte offsets. To avoid having to do nasty
321
262
# polynomial work to resolve offsets (references to later in the
332
273
# forward sorted by key. In future we may consider topological sorting,
333
274
# at the cost of table scans for direct lookup, or a second index for
335
nodes = sorted(viewitems(self._nodes))
276
nodes = sorted(self._nodes.items())
336
277
# if we do not prepass, we don't know how long it will be up front.
337
278
expected_bytes = None
338
279
# we only need to pre-pass if we have reference lists at all.
378
319
for key, non_ref_bytes, total_references in key_offset_info:
379
320
key_addresses[key] = non_ref_bytes + total_references*digits
381
format_string = b'%%0%dd' % digits
322
format_string = '%%0%sd' % digits
382
323
for key, (absent, references, value) in nodes:
383
324
flattened_references = []
384
325
for ref_list in references:
385
326
ref_addresses = []
386
327
for reference in ref_list:
387
328
ref_addresses.append(format_string % key_addresses[reference])
388
flattened_references.append(b'\r'.join(ref_addresses))
389
string_key = b'\x00'.join(key)
390
lines.append(b"%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
391
b'\t'.join(flattened_references), value))
393
result = BytesIO(b''.join(lines))
329
flattened_references.append('\r'.join(ref_addresses))
330
string_key = '\x00'.join(key)
331
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
332
'\t'.join(flattened_references), value))
334
result = StringIO(''.join(lines))
394
335
if expected_bytes and len(result.getvalue()) != expected_bytes:
395
336
raise errors.BzrError('Failed index creation. Internal error:'
396
337
' mismatched output length and expected length: %d %d' %
453
394
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
454
395
"""Open an index called name on transport.
456
:param transport: A breezy.transport.Transport.
397
:param transport: A bzrlib.transport.Transport.
457
398
:param name: A path to provide to transport API calls.
458
399
:param size: The size of the index in bytes. This is used for bisection
459
400
logic to perform partial index reads. While the size could be
491
432
def __eq__(self, other):
492
433
"""Equal when self and other were created with the same parameters."""
494
isinstance(self, type(other)) and
435
type(self) == type(other) and
495
436
self._transport == other._transport and
496
437
self._name == other._name and
497
438
self._size == other._size)
519
460
if self._base_offset != 0:
520
461
# This is wasteful, but it is better than dealing with
521
462
# adjusting all the offsets, etc.
522
stream = BytesIO(stream.read()[self._base_offset:])
463
stream = StringIO(stream.read()[self._base_offset:])
523
464
self._read_prefix(stream)
524
465
self._expected_elements = 3 + self._key_length
530
471
self._nodes_by_key = None
532
473
pos = stream.tell()
533
lines = stream.read().split(b'\n')
474
lines = stream.read().split('\n')
534
475
# GZ 2009-09-20: Should really use a try/finally block to ensure close
537
478
_, _, _, trailers = self._parse_lines(lines, pos)
538
for key, absent, references, value in viewvalues(self._keys_by_offset):
479
for key, absent, references, value in self._keys_by_offset.itervalues():
541
482
# resolve references:
566
507
% (ref_list_num, self.node_ref_lists))
568
509
nodes = self._nodes
569
for key, (value, ref_lists) in viewitems(nodes):
510
for key, (value, ref_lists) in nodes.iteritems():
570
511
ref_list = ref_lists[ref_list_num]
571
512
refs.update([ref for ref in ref_list if ref not in nodes])
575
516
if self._nodes_by_key is None:
576
517
nodes_by_key = {}
577
518
if self.node_ref_lists:
578
for key, (value, references) in viewitems(self._nodes):
519
for key, (value, references) in self._nodes.iteritems():
579
520
key_dict = nodes_by_key
580
521
for subkey in key[:-1]:
581
522
key_dict = key_dict.setdefault(subkey, {})
582
523
key_dict[key[-1]] = key, value, references
584
for key, value in viewitems(self._nodes):
525
for key, value in self._nodes.iteritems():
585
526
key_dict = nodes_by_key
586
527
for subkey in key[:-1]:
587
528
key_dict = key_dict.setdefault(subkey, {})
604
545
if self._nodes is None:
605
546
self._buffer_all()
606
547
if self.node_ref_lists:
607
for key, (value, node_ref_lists) in viewitems(self._nodes):
548
for key, (value, node_ref_lists) in self._nodes.iteritems():
608
549
yield self, key, value, node_ref_lists
610
for key, value in viewitems(self._nodes):
551
for key, value in self._nodes.iteritems():
611
552
yield self, key, value
613
554
def _read_prefix(self, stream):
614
555
signature = stream.read(len(self._signature()))
615
556
if not signature == self._signature():
616
raise BadIndexFormatSignature(self._name, GraphIndex)
557
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
617
558
options_line = stream.readline()
618
559
if not options_line.startswith(_OPTION_NODE_REFS):
619
raise BadIndexOptions(self)
560
raise errors.BadIndexOptions(self)
621
562
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
622
563
except ValueError:
623
raise BadIndexOptions(self)
564
raise errors.BadIndexOptions(self)
624
565
options_line = stream.readline()
625
566
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
626
raise BadIndexOptions(self)
567
raise errors.BadIndexOptions(self)
628
569
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
629
570
except ValueError:
630
raise BadIndexOptions(self)
571
raise errors.BadIndexOptions(self)
631
572
options_line = stream.readline()
632
573
if not options_line.startswith(_OPTION_LEN):
633
raise BadIndexOptions(self)
574
raise errors.BadIndexOptions(self)
635
576
self._key_count = int(options_line[len(_OPTION_LEN):-1])
636
577
except ValueError:
637
raise BadIndexOptions(self)
578
raise errors.BadIndexOptions(self)
639
580
def _resolve_references(self, references):
640
581
"""Return the resolved key references for references.
773
714
self._buffer_all()
774
715
if self._key_length == 1:
776
_sanity_check_key(self, key)
719
raise errors.BadIndexKey(key)
720
if len(key) != self._key_length:
721
raise errors.BadIndexKey(key)
777
722
if self.node_ref_lists:
778
723
value, node_refs = self._nodes[key]
779
724
yield self, key, value, node_refs
781
726
yield self, key, self._nodes[key]
783
728
nodes_by_key = self._get_nodes_by_key()
784
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
732
raise errors.BadIndexKey(key)
733
if len(key) != self._key_length:
734
raise errors.BadIndexKey(key)
735
# find what it refers to:
736
key_dict = nodes_by_key
738
# find the subdict whose contents should be returned.
740
while len(elements) and elements[0] is not None:
741
key_dict = key_dict[elements[0]]
744
# a non-existant lookup.
749
key_dict = dicts.pop(-1)
750
# can't be empty or would not exist
751
item, value = key_dict.iteritems().next()
752
if type(value) == dict:
754
dicts.extend(key_dict.itervalues())
757
for value in key_dict.itervalues():
758
# each value is the key:value:node refs tuple
760
yield (self, ) + value
762
# the last thing looked up was a terminal element
763
yield (self, ) + key_dict
787
765
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
788
766
"""See BTreeIndex._find_ancestors."""
821
799
:param location_keys: A list of location(byte offset), key tuples.
822
800
:return: A list of (location_key, result) tuples as expected by
823
breezy.bisect_multi.bisect_multi_bytes.
801
bzrlib.bisect_multi.bisect_multi_bytes.
825
803
# Possible improvements:
826
804
# - only bisect lookup each key once
962
940
signature = bytes[0:len(self._signature())]
963
941
if not signature == self._signature():
964
raise BadIndexFormatSignature(self._name, GraphIndex)
942
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
965
943
lines = bytes[len(self._signature()):].splitlines()
966
944
options_line = lines[0]
967
945
if not options_line.startswith(_OPTION_NODE_REFS):
968
raise BadIndexOptions(self)
946
raise errors.BadIndexOptions(self)
970
948
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
971
949
except ValueError:
972
raise BadIndexOptions(self)
950
raise errors.BadIndexOptions(self)
973
951
options_line = lines[1]
974
952
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
975
raise BadIndexOptions(self)
953
raise errors.BadIndexOptions(self)
977
955
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
978
956
except ValueError:
979
raise BadIndexOptions(self)
957
raise errors.BadIndexOptions(self)
980
958
options_line = lines[2]
981
959
if not options_line.startswith(_OPTION_LEN):
982
raise BadIndexOptions(self)
960
raise errors.BadIndexOptions(self)
984
962
self._key_count = int(options_line[len(_OPTION_LEN):])
985
963
except ValueError:
986
raise BadIndexOptions(self)
964
raise errors.BadIndexOptions(self)
987
965
# calculate the bytes we have processed
988
966
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
989
967
len(lines[2]) + 3)
1140
1118
raise AssertionError("%s %s" % (self._size, pos))
1143
elements = line.split(b'\0')
1121
elements = line.split('\0')
1144
1122
if len(elements) != self._expected_elements:
1145
raise BadIndexData(self)
1123
raise errors.BadIndexData(self)
1146
1124
# keys are tuples. Each element is a string that may occur many
1147
1125
# times, so we intern them to save space. AB, RC, 200807
1148
1126
key = tuple([intern(element) for element in elements[:self._key_length]])
1250
1228
# We read the whole range, most likely because the
1251
1229
# Transport upcast our readv ranges into one long request
1252
1230
# for enough total data to grab the whole index.
1253
self._buffer_all(BytesIO(data))
1231
self._buffer_all(StringIO(data))
1255
1233
if self._bisect_nodes is None:
1256
1234
# this must be the start
1332
1310
found_parents[key] = parents
1333
1311
return found_parents
1335
__contains__ = _has_key_from_parent_map
1313
has_key = _has_key_from_parent_map
1337
1315
def insert_index(self, pos, index, name=None):
1338
1316
"""Insert a new index in the list of indices to query.
1396
1373
hit_indices.append(index)
1398
except errors.NoSuchFile as e:
1399
if not self._try_reload(e):
1375
except errors.NoSuchFile:
1376
self._reload_or_raise()
1401
1377
self._move_to_front(hit_indices)
1403
1379
def iter_entries_prefix(self, keys):
1439
1415
hit_indices.append(index)
1441
except errors.NoSuchFile as e:
1442
if not self._try_reload(e):
1417
except errors.NoSuchFile:
1418
self._reload_or_raise()
1444
1419
self._move_to_front(hit_indices)
1446
1421
def _move_to_front(self, hit_indices):
1470
1445
indices_info = zip(self._index_names, self._indices)
1471
1446
if 'index' in debug.debug_flags:
1472
indices_info = list(indices_info)
1473
1447
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1474
1448
'promoting %r', indices_info, hit_indices)
1587
1561
return sum((index.key_count() for index in self._indices), 0)
1588
except errors.NoSuchFile as e:
1589
if not self._try_reload(e):
1562
except errors.NoSuchFile:
1563
self._reload_or_raise()
1592
1565
missing_keys = _missing_keys_from_parent_map
1594
def _try_reload(self, error):
1567
def _reload_or_raise(self):
1595
1568
"""We just got a NoSuchFile exception.
1597
1570
Try to reload the indices, if it fails, just raise the current
1600
1573
if self._reload_func is None:
1602
trace.mutter('Trying to reload after getting exception: %s', error)
1575
exc_type, exc_value, exc_traceback = sys.exc_info()
1576
trace.mutter('Trying to reload after getting exception: %s',
1603
1578
if not self._reload_func():
1604
1579
# We tried to reload, but nothing changed, so we fail anyway
1605
1580
trace.mutter('_reload_func indicated nothing has changed.'
1606
1581
' Raising original exception.')
1582
raise exc_type, exc_value, exc_traceback
1610
1584
def set_sibling_indices(self, sibling_combined_graph_indices):
1611
1585
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1655
1628
trace.mutter_callsite(3,
1656
1629
"iter_all_entries scales with size of history.")
1657
1630
if self.reference_lists:
1658
for key, (absent, references, value) in viewitems(self._nodes):
1631
for key, (absent, references, value) in self._nodes.iteritems():
1660
1633
yield self, key, value, references
1662
for key, (absent, references, value) in viewitems(self._nodes):
1635
for key, (absent, references, value) in self._nodes.iteritems():
1664
1637
yield self, key, value
1703
1676
will be returned, and every match that is in the index will be
1679
# XXX: To much duplication with the GraphIndex class; consider finding
1680
# a good place to pull out the actual common logic.
1706
1681
keys = set(keys)
1709
1684
if self._key_length == 1:
1710
1685
for key in keys:
1711
_sanity_check_key(self, key)
1688
raise errors.BadIndexKey(key)
1689
if len(key) != self._key_length:
1690
raise errors.BadIndexKey(key)
1712
1691
node = self._nodes[key]
1718
1697
yield self, key, node[2]
1720
1699
nodes_by_key = self._get_nodes_by_key()
1721
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
1703
raise errors.BadIndexKey(key)
1704
if len(key) != self._key_length:
1705
raise errors.BadIndexKey(key)
1706
# find what it refers to:
1707
key_dict = nodes_by_key
1708
elements = list(key)
1709
# find the subdict to return
1711
while len(elements) and elements[0] is not None:
1712
key_dict = key_dict[elements[0]]
1715
# a non-existant lookup.
1720
key_dict = dicts.pop(-1)
1721
# can't be empty or would not exist
1722
item, value = key_dict.iteritems().next()
1723
if type(value) == dict:
1725
dicts.extend(key_dict.itervalues())
1728
for value in key_dict.itervalues():
1729
yield (self, ) + value
1731
yield (self, ) + key_dict
1724
1733
def key_count(self):
1725
1734
"""Return an estimate of the number of keys in this index.
1794
1803
for node in an_iter:
1796
1805
if node[1][:self.prefix_len] != self.prefix:
1797
raise BadIndexData(self)
1806
raise errors.BadIndexData(self)
1798
1807
for ref_list in node[3]:
1799
1808
for ref_node in ref_list:
1800
1809
if ref_node[:self.prefix_len] != self.prefix:
1801
raise BadIndexData(self)
1810
raise errors.BadIndexData(self)
1802
1811
yield node[0], node[1][self.prefix_len:], node[2], (
1803
1812
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
1804
1813
for ref_list in node[3]))
1857
1866
def validate(self):
1858
1867
"""Call the adapted's validate."""
1859
1868
self.adapted.validate()
1862
def _sanity_check_key(index_or_builder, key):
1863
"""Raise BadIndexKey if key cannot be used for prefix matching."""
1865
raise BadIndexKey(key)
1866
if len(key) != index_or_builder._key_length:
1867
raise BadIndexKey(key)
1870
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
1871
"""Helper for implementing prefix matching iterators."""
1873
_sanity_check_key(index_or_builder, key)
1874
# find what it refers to:
1875
key_dict = nodes_by_key
1876
elements = list(key)
1877
# find the subdict whose contents should be returned.
1879
while len(elements) and elements[0] is not None:
1880
key_dict = key_dict[elements[0]]
1883
# a non-existant lookup.
1888
values_view = viewvalues(dicts.pop())
1889
# can't be empty or would not exist
1890
value = next(iter(values_view))
1891
if isinstance(value, dict):
1892
# still descending, push values
1893
dicts.extend(values_view)
1895
# at leaf tuples, yield values
1896
for value in values_view:
1897
# each value is the key:value:node refs tuple
1899
yield (index_or_builder, ) + value
1901
# the last thing looked up was a terminal element
1902
yield (index_or_builder, ) + key_dict