27
27
from bisect import bisect_right
28
from cStringIO import StringIO
28
from io import BytesIO
32
from bzrlib.lazy_import import lazy_import
31
from ..lazy_import import lazy_import
33
32
lazy_import(globals(), """
34
from bzrlib import trace
35
from bzrlib.bisect_multi import bisect_multi_bytes
36
from bzrlib.revision import NULL_REVISION
37
from bzrlib.trace import mutter
35
revision as _mod_revision,
43
from bzrlib.static_tuple import StaticTuple
43
from ..static_tuple import StaticTuple
45
45
_HEADER_READV = (0, 200)
46
_OPTION_KEY_ELEMENTS = "key_elements="
48
_OPTION_NODE_REFS = "node_ref_lists="
49
_SIGNATURE = "Bazaar Graph Index 1\n"
52
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
53
_newline_null_re = re.compile('[\n\0]')
46
_OPTION_KEY_ELEMENTS = b"key_elements="
48
_OPTION_NODE_REFS = b"node_ref_lists="
49
_SIGNATURE = b"Bazaar Graph Index 1\n"
52
class BadIndexFormatSignature(errors.BzrError):
54
_fmt = "%(value)s is not an index of type %(_type)s."
56
def __init__(self, value, _type):
57
errors.BzrError.__init__(self)
62
class BadIndexData(errors.BzrError):
64
_fmt = "Error in data for index %(value)s."
66
def __init__(self, value):
67
errors.BzrError.__init__(self)
71
class BadIndexDuplicateKey(errors.BzrError):
73
_fmt = "The key '%(key)s' is already in index '%(index)s'."
75
def __init__(self, key, index):
76
errors.BzrError.__init__(self)
81
class BadIndexKey(errors.BzrError):
83
_fmt = "The key '%(key)s' is not a valid key."
85
def __init__(self, key):
86
errors.BzrError.__init__(self)
90
class BadIndexOptions(errors.BzrError):
92
_fmt = "Could not parse options for index %(value)s."
94
def __init__(self, value):
95
errors.BzrError.__init__(self)
99
class BadIndexValue(errors.BzrError):
101
_fmt = "The value '%(value)s' is not a valid value."
103
def __init__(self, value):
104
errors.BzrError.__init__(self)
108
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r\x00 ]')
109
_newline_null_re = re.compile(b'[\n\0]')
56
112
def _has_key_from_parent_map(self, key):
69
125
class GraphIndexBuilder(object):
70
126
"""A builder that can build a GraphIndex.
72
The resulting graph has the structure:
128
The resulting graph has the structure::
74
_SIGNATURE OPTIONS NODES NEWLINE
75
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
76
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
78
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
79
KEY := Not-whitespace-utf8
81
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
82
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
83
REFERENCE := DIGITS ; digits is the byte offset in the index of the
85
VALUE := no-newline-no-null-bytes
130
_SIGNATURE OPTIONS NODES NEWLINE
131
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
132
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
134
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
135
KEY := Not-whitespace-utf8
137
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
138
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
139
REFERENCE := DIGITS ; digits is the byte offset in the index of the
141
VALUE := no-newline-no-null-bytes
88
144
def __init__(self, reference_lists=0, key_elements=1):
105
161
def _check_key(self, key):
106
162
"""Raise BadIndexKey if key is not a valid key for this index."""
107
163
if type(key) not in (tuple, StaticTuple):
108
raise errors.BadIndexKey(key)
164
raise BadIndexKey(key)
109
165
if self._key_length != len(key):
110
raise errors.BadIndexKey(key)
166
raise BadIndexKey(key)
111
167
for element in key:
112
if not element or _whitespace_re.search(element) is not None:
113
raise errors.BadIndexKey(element)
168
if not element or type(element) != bytes or _whitespace_re.search(element) is not None:
169
raise BadIndexKey(key)
115
171
def _external_references(self):
116
172
"""Return references that are not present in this index.
146
202
key_dict = key_dict.setdefault(subkey, {})
147
203
key_dict[key[-1]] = key, value, references
149
for key, (absent, references, value) in self._nodes.iteritems():
205
for key, (absent, references, value) in self._nodes.items():
152
208
key_dict = nodes_by_key
184
240
:param value: The value associate with this key. Must not contain
185
241
newlines or null characters.
186
242
:return: (node_refs, absent_references)
187
node_refs basically a packed form of 'references' where all
189
absent_references reference keys that are not in self._nodes.
190
This may contain duplicates if the same key is
191
referenced in multiple lists.
244
* node_refs: basically a packed form of 'references' where all
246
* absent_references: reference keys that are not in self._nodes.
247
This may contain duplicates if the same key is referenced in
193
250
as_st = StaticTuple.from_sequence
194
251
self._check_key(key)
195
252
if _newline_null_re.search(value) is not None:
196
raise errors.BadIndexValue(value)
253
raise BadIndexValue(value)
197
254
if len(references) != self.reference_lists:
198
raise errors.BadIndexValue(references)
255
raise BadIndexValue(references)
200
257
absent_references = []
201
258
for reference_list in references:
219
276
:param references: An iterable of iterables of keys. Each is a
220
277
reference to another key.
221
278
:param value: The value to associate with the key. It may be any
222
bytes as long as it does not contain \0 or \n.
279
bytes as long as it does not contain \\0 or \\n.
225
282
absent_references) = self._check_key_ref_value(key, references, value)
226
if key in self._nodes and self._nodes[key][0] != 'a':
227
raise errors.BadIndexDuplicateKey(key, self)
283
if key in self._nodes and self._nodes[key][0] != b'a':
284
raise BadIndexDuplicateKey(key, self)
228
285
for reference in absent_references:
229
286
# There may be duplicates, but I don't think it is worth worrying
231
self._nodes[reference] = ('a', (), '')
288
self._nodes[reference] = (b'a', (), b'')
232
289
self._absent_keys.update(absent_references)
233
290
self._absent_keys.discard(key)
234
self._nodes[key] = ('', node_refs, value)
291
self._nodes[key] = (b'', node_refs, value)
235
292
if self._nodes_by_key is not None and self._key_length > 1:
236
293
self._update_nodes_by_key(key, value, node_refs)
241
298
This is a no-op, but we need the api to conform to a generic 'Index'
245
302
def finish(self):
305
:returns: cBytesIO holding the full context of the index as it
306
should be written to disk.
246
308
lines = [_SIGNATURE]
247
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
248
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
309
lines.append(b'%s%d\n' % (_OPTION_NODE_REFS, self.reference_lists))
310
lines.append(b'%s%d\n' % (_OPTION_KEY_ELEMENTS, self._key_length))
249
311
key_count = len(self._nodes) - len(self._absent_keys)
250
lines.append(_OPTION_LEN + str(key_count) + '\n')
312
lines.append(b'%s%d\n' % (_OPTION_LEN, key_count))
251
313
prefix_length = sum(len(x) for x in lines)
252
314
# references are byte offsets. To avoid having to do nasty
253
315
# polynomial work to resolve offsets (references to later in the
300
362
non_ref_bytes += len(ref_list) - 1
301
363
# how many digits are needed to represent the total byte count?
303
possible_total_bytes = non_ref_bytes + total_references*digits
365
possible_total_bytes = non_ref_bytes + total_references * digits
304
366
while 10 ** digits < possible_total_bytes:
306
possible_total_bytes = non_ref_bytes + total_references*digits
307
expected_bytes = possible_total_bytes + 1 # terminating newline
368
possible_total_bytes = non_ref_bytes + total_references * digits
369
expected_bytes = possible_total_bytes + 1 # terminating newline
308
370
# resolve key addresses.
309
371
key_addresses = {}
310
372
for key, non_ref_bytes, total_references in key_offset_info:
311
key_addresses[key] = non_ref_bytes + total_references*digits
373
key_addresses[key] = non_ref_bytes + total_references * digits
313
format_string = '%%0%sd' % digits
375
format_string = b'%%0%dd' % digits
314
376
for key, (absent, references, value) in nodes:
315
377
flattened_references = []
316
378
for ref_list in references:
317
379
ref_addresses = []
318
380
for reference in ref_list:
319
ref_addresses.append(format_string % key_addresses[reference])
320
flattened_references.append('\r'.join(ref_addresses))
321
string_key = '\x00'.join(key)
322
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
323
'\t'.join(flattened_references), value))
325
result = StringIO(''.join(lines))
381
ref_addresses.append(format_string %
382
key_addresses[reference])
383
flattened_references.append(b'\r'.join(ref_addresses))
384
string_key = b'\x00'.join(key)
385
lines.append(b"%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
386
b'\t'.join(flattened_references), value))
388
result = BytesIO(b''.join(lines))
326
389
if expected_bytes and len(result.getvalue()) != expected_bytes:
327
390
raise errors.BzrError('Failed index creation. Internal error:'
328
' mismatched output length and expected length: %d %d' %
329
(len(result.getvalue()), expected_bytes))
391
' mismatched output length and expected length: %d %d' %
392
(len(result.getvalue()), expected_bytes))
332
395
def set_optimize(self, for_size=None, combine_backing_indices=None):
385
448
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
386
449
"""Open an index called name on transport.
388
:param transport: A bzrlib.transport.Transport.
451
:param transport: A breezy.transport.Transport.
389
452
:param name: A path to provide to transport API calls.
390
453
:param size: The size of the index in bytes. This is used for bisection
391
454
logic to perform partial index reads. While the size could be
431
494
def __ne__(self, other):
432
495
return not self.__eq__(other)
497
def __lt__(self, other):
498
# We don't really care about the order, just that there is an order.
499
if (not isinstance(other, GraphIndex) and
500
not isinstance(other, InMemoryGraphIndex)):
501
raise TypeError(other)
502
return hash(self) < hash(other)
505
return hash((type(self), self._transport, self._name, self._size))
434
507
def __repr__(self):
435
508
return "%s(%r)" % (self.__class__.__name__,
436
self._transport.abspath(self._name))
509
self._transport.abspath(self._name))
438
511
def _buffer_all(self, stream=None):
439
512
"""Buffer all the index data.
444
517
# We already did this
446
519
if 'index' in debug.debug_flags:
447
mutter('Reading entire index %s', self._transport.abspath(self._name))
520
trace.mutter('Reading entire index %s',
521
self._transport.abspath(self._name))
448
522
if stream is None:
449
523
stream = self._transport.get(self._name)
450
524
if self._base_offset != 0:
451
525
# This is wasteful, but it is better than dealing with
452
526
# adjusting all the offsets, etc.
453
stream = StringIO(stream.read()[self._base_offset:])
454
self._read_prefix(stream)
455
self._expected_elements = 3 + self._key_length
457
# raw data keyed by offset
458
self._keys_by_offset = {}
459
# ready-to-return key:value or key:value, node_ref_lists
461
self._nodes_by_key = None
464
lines = stream.read().split('\n')
527
stream = BytesIO(stream.read()[self._base_offset:])
529
self._read_prefix(stream)
530
self._expected_elements = 3 + self._key_length
532
# raw data keyed by offset
533
self._keys_by_offset = {}
534
# ready-to-return key:value or key:value, node_ref_lists
536
self._nodes_by_key = None
539
lines = stream.read().split(b'\n')
467
543
_, _, _, trailers = self._parse_lines(lines, pos)
468
for key, absent, references, value in self._keys_by_offset.itervalues():
544
for key, absent, references, value in self._keys_by_offset.values():
471
547
# resolve references:
493
569
self._buffer_all()
494
570
if ref_list_num + 1 > self.node_ref_lists:
495
571
raise ValueError('No ref list %d, index has %d ref lists'
496
% (ref_list_num, self.node_ref_lists))
572
% (ref_list_num, self.node_ref_lists))
498
574
nodes = self._nodes
499
for key, (value, ref_lists) in nodes.iteritems():
575
for key, (value, ref_lists) in nodes.items():
500
576
ref_list = ref_lists[ref_list_num]
501
577
refs.update([ref for ref in ref_list if ref not in nodes])
505
581
if self._nodes_by_key is None:
506
582
nodes_by_key = {}
507
583
if self.node_ref_lists:
508
for key, (value, references) in self._nodes.iteritems():
584
for key, (value, references) in self._nodes.items():
509
585
key_dict = nodes_by_key
510
586
for subkey in key[:-1]:
511
587
key_dict = key_dict.setdefault(subkey, {})
512
588
key_dict[key[-1]] = key, value, references
514
for key, value in self._nodes.iteritems():
590
for key, value in self._nodes.items():
515
591
key_dict = nodes_by_key
516
592
for subkey in key[:-1]:
517
593
key_dict = key_dict.setdefault(subkey, {})
531
607
if 'evil' in debug.debug_flags:
532
608
trace.mutter_callsite(3,
533
"iter_all_entries scales with size of history.")
609
"iter_all_entries scales with size of history.")
534
610
if self._nodes is None:
535
611
self._buffer_all()
536
612
if self.node_ref_lists:
537
for key, (value, node_ref_lists) in self._nodes.iteritems():
613
for key, (value, node_ref_lists) in self._nodes.items():
538
614
yield self, key, value, node_ref_lists
540
for key, value in self._nodes.iteritems():
616
for key, value in self._nodes.items():
541
617
yield self, key, value
543
619
def _read_prefix(self, stream):
544
620
signature = stream.read(len(self._signature()))
545
621
if not signature == self._signature():
546
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
622
raise BadIndexFormatSignature(self._name, GraphIndex)
547
623
options_line = stream.readline()
548
624
if not options_line.startswith(_OPTION_NODE_REFS):
549
raise errors.BadIndexOptions(self)
625
raise BadIndexOptions(self)
551
627
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
552
628
except ValueError:
553
raise errors.BadIndexOptions(self)
629
raise BadIndexOptions(self)
554
630
options_line = stream.readline()
555
631
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
556
raise errors.BadIndexOptions(self)
632
raise BadIndexOptions(self)
558
634
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
559
635
except ValueError:
560
raise errors.BadIndexOptions(self)
636
raise BadIndexOptions(self)
561
637
options_line = stream.readline()
562
638
if not options_line.startswith(_OPTION_LEN):
563
raise errors.BadIndexOptions(self)
639
raise BadIndexOptions(self)
565
641
self._key_count = int(options_line[len(_OPTION_LEN):-1])
566
642
except ValueError:
567
raise errors.BadIndexOptions(self)
643
raise BadIndexOptions(self)
569
645
def _resolve_references(self, references):
570
646
"""Return the resolved key references for references.
580
656
for ref_list in references:
581
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
658
tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
582
659
return tuple(node_refs)
584
def _find_index(self, range_map, key):
662
def _find_index(range_map, key):
585
663
"""Helper for the _parsed_*_index calls.
587
665
Given a range map - [(start, end), ...], finds the index of the range
670
748
if self._nodes is not None:
671
749
return self._iter_entries_from_total_buffer(keys)
673
return (result[1] for result in bisect_multi_bytes(
751
return (result[1] for result in bisect_multi.bisect_multi_bytes(
674
752
self._lookup_keys_via_location, self._size, keys))
676
754
def iter_entries_prefix(self, keys):
703
781
self._buffer_all()
704
782
if self._key_length == 1:
708
raise errors.BadIndexKey(key)
709
if len(key) != self._key_length:
710
raise errors.BadIndexKey(key)
784
_sanity_check_key(self, key)
711
785
if self.node_ref_lists:
712
786
value, node_refs = self._nodes[key]
713
787
yield self, key, value, node_refs
715
789
yield self, key, self._nodes[key]
717
791
nodes_by_key = self._get_nodes_by_key()
721
raise errors.BadIndexKey(key)
722
if len(key) != self._key_length:
723
raise errors.BadIndexKey(key)
724
# find what it refers to:
725
key_dict = nodes_by_key
727
# find the subdict whose contents should be returned.
729
while len(elements) and elements[0] is not None:
730
key_dict = key_dict[elements[0]]
733
# a non-existant lookup.
738
key_dict = dicts.pop(-1)
739
# can't be empty or would not exist
740
item, value = key_dict.iteritems().next()
741
if type(value) == dict:
743
dicts.extend(key_dict.itervalues())
746
for value in key_dict.itervalues():
747
# each value is the key:value:node refs tuple
749
yield (self, ) + value
751
# the last thing looked up was a terminal element
752
yield (self, ) + key_dict
792
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
754
795
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
755
796
"""See BTreeIndex._find_ancestors."""
819
860
index = self._parsed_byte_index(location)
820
861
if (len(self._parsed_byte_map) and
821
862
self._parsed_byte_map[index][0] <= location and
822
self._parsed_byte_map[index][1] > location):
863
self._parsed_byte_map[index][1] > location):
823
864
# the byte region has been parsed, so no read is needed.
837
878
# _read_and_parse triggered a _buffer_all because we requested the
838
879
# whole data range
839
880
for location, key in location_keys:
840
if key not in self._nodes: # not present
881
if key not in self._nodes: # not present
841
882
result.append(((location, key), False))
842
883
elif self.node_ref_lists:
843
884
value, refs = self._nodes[key]
844
885
result.append(((location, key),
845
(self, key, value, refs)))
886
(self, key, value, refs)))
847
888
result.append(((location, key),
848
(self, key, self._nodes[key])))
889
(self, key, self._nodes[key])))
850
891
# generate results:
851
892
# - figure out <, >, missing, present
870
911
pending_references.append((location, key))
872
913
result.append(((location, key), (self, key,
873
value, self._resolve_references(refs))))
914
value, self._resolve_references(refs))))
875
916
result.append(((location, key),
876
(self, key, self._bisect_nodes[key])))
917
(self, key, self._bisect_nodes[key])))
879
920
# has the region the key should be in, been parsed?
916
957
# answer key references we had to look-up-late.
917
958
value, refs = self._bisect_nodes[key]
918
959
result.append(((location, key), (self, key,
919
value, self._resolve_references(refs))))
960
value, self._resolve_references(refs))))
922
963
def _parse_header_from_bytes(self, bytes):
929
970
signature = bytes[0:len(self._signature())]
930
971
if not signature == self._signature():
931
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
972
raise BadIndexFormatSignature(self._name, GraphIndex)
932
973
lines = bytes[len(self._signature()):].splitlines()
933
974
options_line = lines[0]
934
975
if not options_line.startswith(_OPTION_NODE_REFS):
935
raise errors.BadIndexOptions(self)
976
raise BadIndexOptions(self)
937
978
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
938
979
except ValueError:
939
raise errors.BadIndexOptions(self)
980
raise BadIndexOptions(self)
940
981
options_line = lines[1]
941
982
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
942
raise errors.BadIndexOptions(self)
983
raise BadIndexOptions(self)
944
985
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
945
986
except ValueError:
946
raise errors.BadIndexOptions(self)
987
raise BadIndexOptions(self)
947
988
options_line = lines[2]
948
989
if not options_line.startswith(_OPTION_LEN):
949
raise errors.BadIndexOptions(self)
990
raise BadIndexOptions(self)
951
992
self._key_count = int(options_line[len(_OPTION_LEN):])
952
993
except ValueError:
953
raise errors.BadIndexOptions(self)
994
raise BadIndexOptions(self)
954
995
# calculate the bytes we have processed
955
996
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
957
self._parsed_bytes(0, None, header_end, None)
998
self._parsed_bytes(0, (), header_end, ())
958
999
# setup parsing state
959
1000
self._expected_elements = 3 + self._key_length
960
1001
# raw data keyed by offset
1060
1101
if not start_adjacent:
1061
1102
# work around python bug in rfind
1062
1103
if trim_start is None:
1063
trim_start = data.find('\n') + 1
1104
trim_start = data.find(b'\n') + 1
1065
trim_start = data.find('\n', trim_start) + 1
1106
trim_start = data.find(b'\n', trim_start) + 1
1066
1107
if not (trim_start != 0):
1067
1108
raise AssertionError('no \n was present')
1068
1109
# print 'removing start', offset, trim_start, repr(data[:trim_start])
1069
1110
if not end_adjacent:
1070
1111
# work around python bug in rfind
1071
1112
if trim_end is None:
1072
trim_end = data.rfind('\n') + 1
1113
trim_end = data.rfind(b'\n') + 1
1074
trim_end = data.rfind('\n', None, trim_end) + 1
1115
trim_end = data.rfind(b'\n', None, trim_end) + 1
1075
1116
if not (trim_end != 0):
1076
1117
raise AssertionError('no \n was present')
1077
1118
# print 'removing end', offset, trim_end, repr(data[trim_end:])
1079
1120
trimmed_data = data[trim_start:trim_end]
1080
1121
if not (trimmed_data):
1081
1122
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
1082
% (trim_start, trim_end, offset, offset + len(data)))
1123
% (trim_start, trim_end, offset, offset + len(data)))
1084
1125
offset += trim_start
1085
1126
# print "parsing", repr(trimmed_data)
1086
1127
# splitlines mangles the \r delimiters.. don't use it.
1087
lines = trimmed_data.split('\n')
1128
lines = trimmed_data.split(b'\n')
1090
1131
first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
1091
1132
for key, value in nodes:
1092
1133
self._bisect_nodes[key] = value
1093
1134
self._parsed_bytes(offset, first_key,
1094
offset + len(trimmed_data), last_key)
1135
offset + len(trimmed_data), last_key)
1095
1136
return offset + len(trimmed_data), last_segment
1097
1138
def _parse_lines(self, lines, pos):
1102
1143
for line in lines:
1104
1145
# must be at the end
1106
1147
if not (self._size == pos + 1):
1107
1148
raise AssertionError("%s %s" % (self._size, pos))
1110
elements = line.split('\0')
1151
elements = line.split(b'\0')
1111
1152
if len(elements) != self._expected_elements:
1112
raise errors.BadIndexData(self)
1153
raise BadIndexData(self)
1113
1154
# keys are tuples. Each element is a string that may occur many
1114
1155
# times, so we intern them to save space. AB, RC, 200807
1115
key = tuple([intern(element) for element in elements[:self._key_length]])
1156
key = tuple([element for element in elements[:self._key_length]])
1116
1157
if first_key is None:
1117
1158
first_key = key
1118
1159
absent, references, value = elements[-3:]
1120
for ref_string in references.split('\t'):
1161
for ref_string in references.split(b'\t'):
1121
1162
ref_lists.append(tuple([
1122
int(ref) for ref in ref_string.split('\r') if ref
1163
int(ref) for ref in ref_string.split(b'\r') if ref
1124
1165
ref_lists = tuple(ref_lists)
1125
1166
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
1126
pos += len(line) + 1 # +1 for the \n
1167
pos += len(line) + 1 # +1 for the \n
1129
1170
if self.node_ref_lists:
1158
1199
# combine two regions
1159
1200
if (index + 1 < len(self._parsed_byte_map) and
1160
1201
self._parsed_byte_map[index][1] == start and
1161
self._parsed_byte_map[index + 1][0] == end):
1202
self._parsed_byte_map[index + 1][0] == end):
1162
1203
# combine two regions
1163
1204
self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],
1164
self._parsed_byte_map[index + 1][1])
1205
self._parsed_byte_map[index + 1][1])
1165
1206
self._parsed_key_map[index] = (self._parsed_key_map[index][0],
1166
self._parsed_key_map[index + 1][1])
1207
self._parsed_key_map[index + 1][1])
1167
1208
del self._parsed_byte_map[index + 1]
1168
1209
del self._parsed_key_map[index + 1]
1169
1210
elif self._parsed_byte_map[index][1] == start:
1173
1214
self._parsed_key_map[index] = (
1174
1215
self._parsed_key_map[index][0], end_key)
1175
1216
elif (index + 1 < len(self._parsed_byte_map) and
1176
self._parsed_byte_map[index + 1][0] == end):
1217
self._parsed_byte_map[index + 1][0] == end):
1177
1218
# extend the higher entry
1178
1219
self._parsed_byte_map[index + 1] = (
1179
1220
start, self._parsed_byte_map[index + 1][1])
1200
1241
base_offset = self._base_offset
1201
1242
if base_offset != 0:
1202
1243
# Rewrite the ranges for the offset
1203
readv_ranges = [(start+base_offset, size)
1244
readv_ranges = [(start + base_offset, size)
1204
1245
for start, size in readv_ranges]
1205
1246
readv_data = self._transport.readv(self._name, readv_ranges, True,
1206
self._size + self._base_offset)
1247
self._size + self._base_offset)
1208
1249
for offset, data in readv_data:
1209
1250
offset -= base_offset
1217
1258
# We read the whole range, most likely because the
1218
1259
# Transport upcast our readv ranges into one long request
1219
1260
# for enough total data to grab the whole index.
1220
self._buffer_all(StringIO(data))
1261
self._buffer_all(BytesIO(data))
1222
1263
if self._bisect_nodes is None:
1223
1264
# this must be the start
1249
1290
performance significantly. For example, if one index is on local disk and a
1250
1291
second on a remote server, the local disk index should be before the other
1251
1292
in the index list.
1253
1294
Also, queries tend to need results from the same indices as previous
1254
1295
queries. So the indices will be reordered after every query to put the
1255
1296
indices that had the result(s) of that query first (while otherwise
1287
1328
def get_parent_map(self, keys):
1288
1329
"""See graph.StackedParentsProvider.get_parent_map"""
1289
1330
search_keys = set(keys)
1290
if NULL_REVISION in search_keys:
1291
search_keys.discard(NULL_REVISION)
1292
found_parents = {NULL_REVISION:[]}
1331
if _mod_revision.NULL_REVISION in search_keys:
1332
search_keys.discard(_mod_revision.NULL_REVISION)
1333
found_parents = {_mod_revision.NULL_REVISION: []}
1294
1335
found_parents = {}
1295
1336
for index, key, value, refs in self.iter_entries(search_keys):
1296
1337
parents = refs[0]
1297
1338
if not parents:
1298
parents = (NULL_REVISION,)
1339
parents = (_mod_revision.NULL_REVISION,)
1299
1340
found_parents[key] = parents
1300
1341
return found_parents
1302
has_key = _has_key_from_parent_map
1343
__contains__ = _has_key_from_parent_map
1304
1345
def insert_index(self, pos, index, name=None):
1305
1346
"""Insert a new index in the list of indices to query.
1429
1473
def _move_to_front_by_index(self, hit_indices):
1430
1474
"""Core logic for _move_to_front.
1432
1476
Returns a list of names corresponding to the hit_indices param.
1434
1478
indices_info = zip(self._index_names, self._indices)
1435
1479
if 'index' in debug.debug_flags:
1436
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1437
indices_info, hit_indices)
1480
indices_info = list(indices_info)
1481
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1482
'promoting %r', indices_info, hit_indices)
1439
1484
unhit_names = []
1440
1485
new_hit_indices = []
1447
1492
if len(new_hit_indices) == len(hit_indices):
1448
1493
# We've found all of the hit entries, everything else is
1450
unhit_names.extend(self._index_names[offset+1:])
1451
unhit_indices.extend(self._indices[offset+1:])
1495
unhit_names.extend(self._index_names[offset + 1:])
1496
unhit_indices.extend(self._indices[offset + 1:])
1454
1499
unhit_names.append(name)
1457
1502
self._indices = new_hit_indices + unhit_indices
1458
1503
self._index_names = hit_names + unhit_names
1459
1504
if 'index' in debug.debug_flags:
1460
mutter('CombinedGraphIndex reordered: %r', self._indices)
1505
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1461
1506
return hit_names
1463
1508
def _move_to_front_by_name(self, hit_names):
1516
1561
# CombinedGraphIndex does not know what the ref lists
1518
1563
search_keys = index._find_ancestors(search_keys,
1519
ref_list_num, parent_map, index_missing_keys)
1564
ref_list_num, parent_map, index_missing_keys)
1520
1565
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1521
1566
# sub_generation, len(search_keys),
1522
1567
# len(parent_map), len(index_missing_keys))
1550
1595
return sum((index.key_count() for index in self._indices), 0)
1551
except errors.NoSuchFile:
1552
self._reload_or_raise()
1596
except errors.NoSuchFile as e:
1597
if not self._try_reload(e):
1554
1600
missing_keys = _missing_keys_from_parent_map
1556
def _reload_or_raise(self):
1602
def _try_reload(self, error):
1557
1603
"""We just got a NoSuchFile exception.
1559
1605
Try to reload the indices, if it fails, just raise the current
1562
1608
if self._reload_func is None:
1564
exc_type, exc_value, exc_traceback = sys.exc_info()
1565
trace.mutter('Trying to reload after getting exception: %s',
1611
'Trying to reload after getting exception: %s', str(error))
1567
1612
if not self._reload_func():
1568
1613
# We tried to reload, but nothing changed, so we fail anyway
1569
1614
trace.mutter('_reload_func indicated nothing has changed.'
1570
1615
' Raising original exception.')
1571
raise exc_type, exc_value, exc_traceback
1573
1619
def set_sibling_indices(self, sibling_combined_graph_indices):
1574
1620
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1616
1663
if 'evil' in debug.debug_flags:
1617
1664
trace.mutter_callsite(3,
1618
"iter_all_entries scales with size of history.")
1665
"iter_all_entries scales with size of history.")
1619
1666
if self.reference_lists:
1620
for key, (absent, references, value) in self._nodes.iteritems():
1667
for key, (absent, references, value) in self._nodes.items():
1622
1669
yield self, key, value, references
1624
for key, (absent, references, value) in self._nodes.iteritems():
1671
for key, (absent, references, value) in self._nodes.items():
1626
1673
yield self, key, value
1665
1712
will be returned, and every match that is in the index will be
1668
# XXX: To much duplication with the GraphIndex class; consider finding
1669
# a good place to pull out the actual common logic.
1670
1715
keys = set(keys)
1673
1718
if self._key_length == 1:
1674
1719
for key in keys:
1677
raise errors.BadIndexKey(key)
1678
if len(key) != self._key_length:
1679
raise errors.BadIndexKey(key)
1720
_sanity_check_key(self, key)
1680
1721
node = self._nodes[key]
1686
1727
yield self, key, node[2]
1688
1729
nodes_by_key = self._get_nodes_by_key()
1692
raise errors.BadIndexKey(key)
1693
if len(key) != self._key_length:
1694
raise errors.BadIndexKey(key)
1695
# find what it refers to:
1696
key_dict = nodes_by_key
1697
elements = list(key)
1698
# find the subdict to return
1700
while len(elements) and elements[0] is not None:
1701
key_dict = key_dict[elements[0]]
1704
# a non-existant lookup.
1709
key_dict = dicts.pop(-1)
1710
# can't be empty or would not exist
1711
item, value = key_dict.iteritems().next()
1712
if type(value) == dict:
1714
dicts.extend(key_dict.itervalues())
1717
for value in key_dict.itervalues():
1718
yield (self, ) + value
1720
yield (self, ) + key_dict
1730
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
1722
1733
def key_count(self):
1723
1734
"""Return an estimate of the number of keys in this index.
1729
1740
def validate(self):
1730
1741
"""In memory index's have no known corruption at the moment."""
1743
def __lt__(self, other):
1744
# We don't really care about the order, just that there is an order.
1745
if (not isinstance(other, GraphIndex) and
1746
not isinstance(other, InMemoryGraphIndex)):
1747
raise TypeError(other)
1748
return hash(self) < hash(other)
1733
1751
class GraphIndexPrefixAdapter(object):
1734
1752
"""An adapter between GraphIndex with different key lengths.
1743
1761
def __init__(self, adapted, prefix, missing_key_length,
1744
add_nodes_callback=None):
1762
add_nodes_callback=None):
1745
1763
"""Construct an adapter against adapted with prefix."""
1746
1764
self.adapted = adapted
1747
self.prefix_key = prefix + (None,)*missing_key_length
1765
self.prefix_key = prefix + (None,) * missing_key_length
1748
1766
self.prefix = prefix
1749
1767
self.prefix_len = len(prefix)
1750
1768
self.add_nodes_callback = add_nodes_callback
1763
1781
for (key, value, node_refs) in nodes:
1764
1782
adjusted_references = (
1765
1783
tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
1766
for ref_list in node_refs))
1784
for ref_list in node_refs))
1767
1785
translated_nodes.append((self.prefix + key, value,
1768
adjusted_references))
1786
adjusted_references))
1769
1787
except ValueError:
1770
1788
# XXX: TODO add an explicit interface for getting the reference list
1771
1789
# status, to handle this bit of user-friendliness in the API more
1792
1810
for node in an_iter:
1794
1812
if node[1][:self.prefix_len] != self.prefix:
1795
raise errors.BadIndexData(self)
1813
raise BadIndexData(self)
1796
1814
for ref_list in node[3]:
1797
1815
for ref_node in ref_list:
1798
1816
if ref_node[:self.prefix_len] != self.prefix:
1799
raise errors.BadIndexData(self)
1817
raise BadIndexData(self)
1800
1818
yield node[0], node[1][self.prefix_len:], node[2], (
1801
1819
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
1802
for ref_list in node[3]))
1820
for ref_list in node[3]))
1804
1822
def iter_all_entries(self):
1805
1823
"""Iterate over all keys within the index
1855
1873
def validate(self):
1856
1874
"""Call the adapted's validate."""
1857
1875
self.adapted.validate()
1878
def _sanity_check_key(index_or_builder, key):
1879
"""Raise BadIndexKey if key cannot be used for prefix matching."""
1881
raise BadIndexKey(key)
1882
if len(key) != index_or_builder._key_length:
1883
raise BadIndexKey(key)
1886
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
1887
"""Helper for implementing prefix matching iterators."""
1889
_sanity_check_key(index_or_builder, key)
1890
# find what it refers to:
1891
key_dict = nodes_by_key
1892
elements = list(key)
1893
# find the subdict whose contents should be returned.
1895
while len(elements) and elements[0] is not None:
1896
key_dict = key_dict[elements[0]]
1899
# a non-existant lookup.
1904
values_view = dicts.pop().values()
1905
# can't be empty or would not exist
1906
value = next(iter(values_view))
1907
if isinstance(value, dict):
1908
# still descending, push values
1909
dicts.extend(values_view)
1911
# at leaf tuples, yield values
1912
for value in values_view:
1913
# each value is the key:value:node refs tuple
1915
yield (index_or_builder, ) + value
1917
# the last thing looked up was a terminal element
1918
yield (index_or_builder, ) + key_dict