27
29
from bisect import bisect_right
28
from io import BytesIO
30
from cStringIO import StringIO
31
from ..lazy_import import lazy_import
34
from brzlib.lazy_import import lazy_import
32
35
lazy_import(globals(), """
35
38
revision as _mod_revision,
43
from ..static_tuple import StaticTuple
46
from brzlib.static_tuple import StaticTuple
45
48
_HEADER_READV = (0, 200)
46
_OPTION_KEY_ELEMENTS = b"key_elements="
48
_OPTION_NODE_REFS = b"node_ref_lists="
49
_SIGNATURE = b"Bazaar Graph Index 1\n"
52
class BadIndexFormatSignature(errors.BzrError):
54
_fmt = "%(value)s is not an index of type %(_type)s."
56
def __init__(self, value, _type):
57
errors.BzrError.__init__(self)
62
class BadIndexData(errors.BzrError):
64
_fmt = "Error in data for index %(value)s."
66
def __init__(self, value):
67
errors.BzrError.__init__(self)
71
class BadIndexDuplicateKey(errors.BzrError):
73
_fmt = "The key '%(key)s' is already in index '%(index)s'."
75
def __init__(self, key, index):
76
errors.BzrError.__init__(self)
81
class BadIndexKey(errors.BzrError):
83
_fmt = "The key '%(key)s' is not a valid key."
85
def __init__(self, key):
86
errors.BzrError.__init__(self)
90
class BadIndexOptions(errors.BzrError):
92
_fmt = "Could not parse options for index %(value)s."
94
def __init__(self, value):
95
errors.BzrError.__init__(self)
99
class BadIndexValue(errors.BzrError):
101
_fmt = "The value '%(value)s' is not a valid value."
103
def __init__(self, value):
104
errors.BzrError.__init__(self)
108
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r\x00 ]')
109
_newline_null_re = re.compile(b'[\n\0]')
49
_OPTION_KEY_ELEMENTS = "key_elements="
51
_OPTION_NODE_REFS = "node_ref_lists="
52
_SIGNATURE = "Bazaar Graph Index 1\n"
55
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
56
_newline_null_re = re.compile('[\n\0]')
112
59
def _has_key_from_parent_map(self, key):
161
108
def _check_key(self, key):
162
109
"""Raise BadIndexKey if key is not a valid key for this index."""
163
110
if type(key) not in (tuple, StaticTuple):
164
raise BadIndexKey(key)
111
raise errors.BadIndexKey(key)
165
112
if self._key_length != len(key):
166
raise BadIndexKey(key)
113
raise errors.BadIndexKey(key)
167
114
for element in key:
168
if not element or type(element) != bytes or _whitespace_re.search(element) is not None:
169
raise BadIndexKey(key)
115
if not element or _whitespace_re.search(element) is not None:
116
raise errors.BadIndexKey(element)
171
118
def _external_references(self):
172
119
"""Return references that are not present in this index.
202
149
key_dict = key_dict.setdefault(subkey, {})
203
150
key_dict[key[-1]] = key, value, references
205
for key, (absent, references, value) in self._nodes.items():
152
for key, (absent, references, value) in self._nodes.iteritems():
208
155
key_dict = nodes_by_key
250
197
as_st = StaticTuple.from_sequence
251
198
self._check_key(key)
252
199
if _newline_null_re.search(value) is not None:
253
raise BadIndexValue(value)
200
raise errors.BadIndexValue(value)
254
201
if len(references) != self.reference_lists:
255
raise BadIndexValue(references)
202
raise errors.BadIndexValue(references)
257
204
absent_references = []
258
205
for reference_list in references:
282
229
absent_references) = self._check_key_ref_value(key, references, value)
283
if key in self._nodes and self._nodes[key][0] != b'a':
284
raise BadIndexDuplicateKey(key, self)
230
if key in self._nodes and self._nodes[key][0] != 'a':
231
raise errors.BadIndexDuplicateKey(key, self)
285
232
for reference in absent_references:
286
233
# There may be duplicates, but I don't think it is worth worrying
288
self._nodes[reference] = (b'a', (), b'')
235
self._nodes[reference] = ('a', (), '')
289
236
self._absent_keys.update(absent_references)
290
237
self._absent_keys.discard(key)
291
self._nodes[key] = (b'', node_refs, value)
238
self._nodes[key] = ('', node_refs, value)
292
239
if self._nodes_by_key is not None and self._key_length > 1:
293
240
self._update_nodes_by_key(key, value, node_refs)
298
245
This is a no-op, but we need the api to conform to a generic 'Index'
302
249
def finish(self):
303
250
"""Finish the index.
305
:returns: cBytesIO holding the full context of the index as it
252
:returns: cStringIO holding the full context of the index as it
306
253
should be written to disk.
308
255
lines = [_SIGNATURE]
309
lines.append(b'%s%d\n' % (_OPTION_NODE_REFS, self.reference_lists))
310
lines.append(b'%s%d\n' % (_OPTION_KEY_ELEMENTS, self._key_length))
256
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
257
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
311
258
key_count = len(self._nodes) - len(self._absent_keys)
312
lines.append(b'%s%d\n' % (_OPTION_LEN, key_count))
259
lines.append(_OPTION_LEN + str(key_count) + '\n')
313
260
prefix_length = sum(len(x) for x in lines)
314
261
# references are byte offsets. To avoid having to do nasty
315
262
# polynomial work to resolve offsets (references to later in the
362
309
non_ref_bytes += len(ref_list) - 1
363
310
# how many digits are needed to represent the total byte count?
365
possible_total_bytes = non_ref_bytes + total_references * digits
312
possible_total_bytes = non_ref_bytes + total_references*digits
366
313
while 10 ** digits < possible_total_bytes:
368
possible_total_bytes = non_ref_bytes + total_references * digits
369
expected_bytes = possible_total_bytes + 1 # terminating newline
315
possible_total_bytes = non_ref_bytes + total_references*digits
316
expected_bytes = possible_total_bytes + 1 # terminating newline
370
317
# resolve key addresses.
371
318
key_addresses = {}
372
319
for key, non_ref_bytes, total_references in key_offset_info:
373
key_addresses[key] = non_ref_bytes + total_references * digits
320
key_addresses[key] = non_ref_bytes + total_references*digits
375
format_string = b'%%0%dd' % digits
322
format_string = '%%0%sd' % digits
376
323
for key, (absent, references, value) in nodes:
377
324
flattened_references = []
378
325
for ref_list in references:
379
326
ref_addresses = []
380
327
for reference in ref_list:
381
ref_addresses.append(format_string %
382
key_addresses[reference])
383
flattened_references.append(b'\r'.join(ref_addresses))
384
string_key = b'\x00'.join(key)
385
lines.append(b"%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
386
b'\t'.join(flattened_references), value))
388
result = BytesIO(b''.join(lines))
328
ref_addresses.append(format_string % key_addresses[reference])
329
flattened_references.append('\r'.join(ref_addresses))
330
string_key = '\x00'.join(key)
331
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
332
'\t'.join(flattened_references), value))
334
result = StringIO(''.join(lines))
389
335
if expected_bytes and len(result.getvalue()) != expected_bytes:
390
336
raise errors.BzrError('Failed index creation. Internal error:'
391
' mismatched output length and expected length: %d %d' %
392
(len(result.getvalue()), expected_bytes))
337
' mismatched output length and expected length: %d %d' %
338
(len(result.getvalue()), expected_bytes))
395
341
def set_optimize(self, for_size=None, combine_backing_indices=None):
448
394
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
449
395
"""Open an index called name on transport.
451
:param transport: A breezy.transport.Transport.
397
:param transport: A brzlib.transport.Transport.
452
398
:param name: A path to provide to transport API calls.
453
399
:param size: The size of the index in bytes. This is used for bisection
454
400
logic to perform partial index reads. While the size could be
486
432
def __eq__(self, other):
487
433
"""Equal when self and other were created with the same parameters."""
489
isinstance(self, type(other)) and
435
type(self) == type(other) and
490
436
self._transport == other._transport and
491
437
self._name == other._name and
492
438
self._size == other._size)
494
440
def __ne__(self, other):
495
441
return not self.__eq__(other)
497
def __lt__(self, other):
498
# We don't really care about the order, just that there is an order.
499
if (not isinstance(other, GraphIndex) and
500
not isinstance(other, InMemoryGraphIndex)):
501
raise TypeError(other)
502
return hash(self) < hash(other)
505
return hash((type(self), self._transport, self._name, self._size))
507
443
def __repr__(self):
508
444
return "%s(%r)" % (self.__class__.__name__,
509
self._transport.abspath(self._name))
445
self._transport.abspath(self._name))
511
447
def _buffer_all(self, stream=None):
512
448
"""Buffer all the index data.
519
455
if 'index' in debug.debug_flags:
520
456
trace.mutter('Reading entire index %s',
521
self._transport.abspath(self._name))
457
self._transport.abspath(self._name))
522
458
if stream is None:
523
459
stream = self._transport.get(self._name)
524
460
if self._base_offset != 0:
525
461
# This is wasteful, but it is better than dealing with
526
462
# adjusting all the offsets, etc.
527
stream = BytesIO(stream.read()[self._base_offset:])
529
self._read_prefix(stream)
530
self._expected_elements = 3 + self._key_length
532
# raw data keyed by offset
533
self._keys_by_offset = {}
534
# ready-to-return key:value or key:value, node_ref_lists
536
self._nodes_by_key = None
539
lines = stream.read().split(b'\n')
463
stream = StringIO(stream.read()[self._base_offset:])
464
self._read_prefix(stream)
465
self._expected_elements = 3 + self._key_length
467
# raw data keyed by offset
468
self._keys_by_offset = {}
469
# ready-to-return key:value or key:value, node_ref_lists
471
self._nodes_by_key = None
474
lines = stream.read().split('\n')
475
# GZ 2009-09-20: Should really use a try/finally block to ensure close
543
478
_, _, _, trailers = self._parse_lines(lines, pos)
544
for key, absent, references, value in self._keys_by_offset.values():
479
for key, absent, references, value in self._keys_by_offset.itervalues():
547
482
# resolve references:
569
504
self._buffer_all()
570
505
if ref_list_num + 1 > self.node_ref_lists:
571
506
raise ValueError('No ref list %d, index has %d ref lists'
572
% (ref_list_num, self.node_ref_lists))
507
% (ref_list_num, self.node_ref_lists))
574
509
nodes = self._nodes
575
for key, (value, ref_lists) in nodes.items():
510
for key, (value, ref_lists) in nodes.iteritems():
576
511
ref_list = ref_lists[ref_list_num]
577
512
refs.update([ref for ref in ref_list if ref not in nodes])
581
516
if self._nodes_by_key is None:
582
517
nodes_by_key = {}
583
518
if self.node_ref_lists:
584
for key, (value, references) in self._nodes.items():
519
for key, (value, references) in self._nodes.iteritems():
585
520
key_dict = nodes_by_key
586
521
for subkey in key[:-1]:
587
522
key_dict = key_dict.setdefault(subkey, {})
588
523
key_dict[key[-1]] = key, value, references
590
for key, value in self._nodes.items():
525
for key, value in self._nodes.iteritems():
591
526
key_dict = nodes_by_key
592
527
for subkey in key[:-1]:
593
528
key_dict = key_dict.setdefault(subkey, {})
607
542
if 'evil' in debug.debug_flags:
608
543
trace.mutter_callsite(3,
609
"iter_all_entries scales with size of history.")
544
"iter_all_entries scales with size of history.")
610
545
if self._nodes is None:
611
546
self._buffer_all()
612
547
if self.node_ref_lists:
613
for key, (value, node_ref_lists) in self._nodes.items():
548
for key, (value, node_ref_lists) in self._nodes.iteritems():
614
549
yield self, key, value, node_ref_lists
616
for key, value in self._nodes.items():
551
for key, value in self._nodes.iteritems():
617
552
yield self, key, value
619
554
def _read_prefix(self, stream):
620
555
signature = stream.read(len(self._signature()))
621
556
if not signature == self._signature():
622
raise BadIndexFormatSignature(self._name, GraphIndex)
557
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
623
558
options_line = stream.readline()
624
559
if not options_line.startswith(_OPTION_NODE_REFS):
625
raise BadIndexOptions(self)
560
raise errors.BadIndexOptions(self)
627
562
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
628
563
except ValueError:
629
raise BadIndexOptions(self)
564
raise errors.BadIndexOptions(self)
630
565
options_line = stream.readline()
631
566
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
632
raise BadIndexOptions(self)
567
raise errors.BadIndexOptions(self)
634
569
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
635
570
except ValueError:
636
raise BadIndexOptions(self)
571
raise errors.BadIndexOptions(self)
637
572
options_line = stream.readline()
638
573
if not options_line.startswith(_OPTION_LEN):
639
raise BadIndexOptions(self)
574
raise errors.BadIndexOptions(self)
641
576
self._key_count = int(options_line[len(_OPTION_LEN):-1])
642
577
except ValueError:
643
raise BadIndexOptions(self)
578
raise errors.BadIndexOptions(self)
645
580
def _resolve_references(self, references):
646
581
"""Return the resolved key references for references.
656
591
for ref_list in references:
658
tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
592
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
659
593
return tuple(node_refs)
662
def _find_index(range_map, key):
595
def _find_index(self, range_map, key):
663
596
"""Helper for the _parsed_*_index calls.
665
598
Given a range map - [(start, end), ...], finds the index of the range
781
714
self._buffer_all()
782
715
if self._key_length == 1:
784
_sanity_check_key(self, key)
719
raise errors.BadIndexKey(key)
720
if len(key) != self._key_length:
721
raise errors.BadIndexKey(key)
785
722
if self.node_ref_lists:
786
723
value, node_refs = self._nodes[key]
787
724
yield self, key, value, node_refs
789
726
yield self, key, self._nodes[key]
791
728
nodes_by_key = self._get_nodes_by_key()
792
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
732
raise errors.BadIndexKey(key)
733
if len(key) != self._key_length:
734
raise errors.BadIndexKey(key)
735
# find what it refers to:
736
key_dict = nodes_by_key
738
# find the subdict whose contents should be returned.
740
while len(elements) and elements[0] is not None:
741
key_dict = key_dict[elements[0]]
744
# a non-existant lookup.
749
key_dict = dicts.pop(-1)
750
# can't be empty or would not exist
751
item, value = key_dict.iteritems().next()
752
if type(value) == dict:
754
dicts.extend(key_dict.itervalues())
757
for value in key_dict.itervalues():
758
# each value is the key:value:node refs tuple
760
yield (self, ) + value
762
# the last thing looked up was a terminal element
763
yield (self, ) + key_dict
795
765
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
796
766
"""See BTreeIndex._find_ancestors."""
829
799
:param location_keys: A list of location(byte offset), key tuples.
830
800
:return: A list of (location_key, result) tuples as expected by
831
breezy.bisect_multi.bisect_multi_bytes.
801
brzlib.bisect_multi.bisect_multi_bytes.
833
803
# Possible improvements:
834
804
# - only bisect lookup each key once
860
830
index = self._parsed_byte_index(location)
861
831
if (len(self._parsed_byte_map) and
862
832
self._parsed_byte_map[index][0] <= location and
863
self._parsed_byte_map[index][1] > location):
833
self._parsed_byte_map[index][1] > location):
864
834
# the byte region has been parsed, so no read is needed.
878
848
# _read_and_parse triggered a _buffer_all because we requested the
879
849
# whole data range
880
850
for location, key in location_keys:
881
if key not in self._nodes: # not present
851
if key not in self._nodes: # not present
882
852
result.append(((location, key), False))
883
853
elif self.node_ref_lists:
884
854
value, refs = self._nodes[key]
885
855
result.append(((location, key),
886
(self, key, value, refs)))
856
(self, key, value, refs)))
888
858
result.append(((location, key),
889
(self, key, self._nodes[key])))
859
(self, key, self._nodes[key])))
891
861
# generate results:
892
862
# - figure out <, >, missing, present
911
881
pending_references.append((location, key))
913
883
result.append(((location, key), (self, key,
914
value, self._resolve_references(refs))))
884
value, self._resolve_references(refs))))
916
886
result.append(((location, key),
917
(self, key, self._bisect_nodes[key])))
887
(self, key, self._bisect_nodes[key])))
920
890
# has the region the key should be in, been parsed?
957
927
# answer key references we had to look-up-late.
958
928
value, refs = self._bisect_nodes[key]
959
929
result.append(((location, key), (self, key,
960
value, self._resolve_references(refs))))
930
value, self._resolve_references(refs))))
963
933
def _parse_header_from_bytes(self, bytes):
970
940
signature = bytes[0:len(self._signature())]
971
941
if not signature == self._signature():
972
raise BadIndexFormatSignature(self._name, GraphIndex)
942
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
973
943
lines = bytes[len(self._signature()):].splitlines()
974
944
options_line = lines[0]
975
945
if not options_line.startswith(_OPTION_NODE_REFS):
976
raise BadIndexOptions(self)
946
raise errors.BadIndexOptions(self)
978
948
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
979
949
except ValueError:
980
raise BadIndexOptions(self)
950
raise errors.BadIndexOptions(self)
981
951
options_line = lines[1]
982
952
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
983
raise BadIndexOptions(self)
953
raise errors.BadIndexOptions(self)
985
955
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
986
956
except ValueError:
987
raise BadIndexOptions(self)
957
raise errors.BadIndexOptions(self)
988
958
options_line = lines[2]
989
959
if not options_line.startswith(_OPTION_LEN):
990
raise BadIndexOptions(self)
960
raise errors.BadIndexOptions(self)
992
962
self._key_count = int(options_line[len(_OPTION_LEN):])
993
963
except ValueError:
994
raise BadIndexOptions(self)
964
raise errors.BadIndexOptions(self)
995
965
# calculate the bytes we have processed
996
966
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
998
self._parsed_bytes(0, (), header_end, ())
968
self._parsed_bytes(0, None, header_end, None)
999
969
# setup parsing state
1000
970
self._expected_elements = 3 + self._key_length
1001
971
# raw data keyed by offset
1101
1071
if not start_adjacent:
1102
1072
# work around python bug in rfind
1103
1073
if trim_start is None:
1104
trim_start = data.find(b'\n') + 1
1074
trim_start = data.find('\n') + 1
1106
trim_start = data.find(b'\n', trim_start) + 1
1076
trim_start = data.find('\n', trim_start) + 1
1107
1077
if not (trim_start != 0):
1108
1078
raise AssertionError('no \n was present')
1109
1079
# print 'removing start', offset, trim_start, repr(data[:trim_start])
1110
1080
if not end_adjacent:
1111
1081
# work around python bug in rfind
1112
1082
if trim_end is None:
1113
trim_end = data.rfind(b'\n') + 1
1083
trim_end = data.rfind('\n') + 1
1115
trim_end = data.rfind(b'\n', None, trim_end) + 1
1085
trim_end = data.rfind('\n', None, trim_end) + 1
1116
1086
if not (trim_end != 0):
1117
1087
raise AssertionError('no \n was present')
1118
1088
# print 'removing end', offset, trim_end, repr(data[trim_end:])
1120
1090
trimmed_data = data[trim_start:trim_end]
1121
1091
if not (trimmed_data):
1122
1092
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
1123
% (trim_start, trim_end, offset, offset + len(data)))
1093
% (trim_start, trim_end, offset, offset + len(data)))
1125
1095
offset += trim_start
1126
1096
# print "parsing", repr(trimmed_data)
1127
1097
# splitlines mangles the \r delimiters.. don't use it.
1128
lines = trimmed_data.split(b'\n')
1098
lines = trimmed_data.split('\n')
1131
1101
first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
1132
1102
for key, value in nodes:
1133
1103
self._bisect_nodes[key] = value
1134
1104
self._parsed_bytes(offset, first_key,
1135
offset + len(trimmed_data), last_key)
1105
offset + len(trimmed_data), last_key)
1136
1106
return offset + len(trimmed_data), last_segment
1138
1108
def _parse_lines(self, lines, pos):
1143
1113
for line in lines:
1145
1115
# must be at the end
1147
1117
if not (self._size == pos + 1):
1148
1118
raise AssertionError("%s %s" % (self._size, pos))
1151
elements = line.split(b'\0')
1121
elements = line.split('\0')
1152
1122
if len(elements) != self._expected_elements:
1153
raise BadIndexData(self)
1123
raise errors.BadIndexData(self)
1154
1124
# keys are tuples. Each element is a string that may occur many
1155
1125
# times, so we intern them to save space. AB, RC, 200807
1156
key = tuple([element for element in elements[:self._key_length]])
1126
key = tuple([intern(element) for element in elements[:self._key_length]])
1157
1127
if first_key is None:
1158
1128
first_key = key
1159
1129
absent, references, value = elements[-3:]
1161
for ref_string in references.split(b'\t'):
1131
for ref_string in references.split('\t'):
1162
1132
ref_lists.append(tuple([
1163
int(ref) for ref in ref_string.split(b'\r') if ref
1133
int(ref) for ref in ref_string.split('\r') if ref
1165
1135
ref_lists = tuple(ref_lists)
1166
1136
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
1167
pos += len(line) + 1 # +1 for the \n
1137
pos += len(line) + 1 # +1 for the \n
1170
1140
if self.node_ref_lists:
1199
1169
# combine two regions
1200
1170
if (index + 1 < len(self._parsed_byte_map) and
1201
1171
self._parsed_byte_map[index][1] == start and
1202
self._parsed_byte_map[index + 1][0] == end):
1172
self._parsed_byte_map[index + 1][0] == end):
1203
1173
# combine two regions
1204
1174
self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],
1205
self._parsed_byte_map[index + 1][1])
1175
self._parsed_byte_map[index + 1][1])
1206
1176
self._parsed_key_map[index] = (self._parsed_key_map[index][0],
1207
self._parsed_key_map[index + 1][1])
1177
self._parsed_key_map[index + 1][1])
1208
1178
del self._parsed_byte_map[index + 1]
1209
1179
del self._parsed_key_map[index + 1]
1210
1180
elif self._parsed_byte_map[index][1] == start:
1214
1184
self._parsed_key_map[index] = (
1215
1185
self._parsed_key_map[index][0], end_key)
1216
1186
elif (index + 1 < len(self._parsed_byte_map) and
1217
self._parsed_byte_map[index + 1][0] == end):
1187
self._parsed_byte_map[index + 1][0] == end):
1218
1188
# extend the higher entry
1219
1189
self._parsed_byte_map[index + 1] = (
1220
1190
start, self._parsed_byte_map[index + 1][1])
1241
1211
base_offset = self._base_offset
1242
1212
if base_offset != 0:
1243
1213
# Rewrite the ranges for the offset
1244
readv_ranges = [(start + base_offset, size)
1214
readv_ranges = [(start+base_offset, size)
1245
1215
for start, size in readv_ranges]
1246
1216
readv_data = self._transport.readv(self._name, readv_ranges, True,
1247
self._size + self._base_offset)
1217
self._size + self._base_offset)
1249
1219
for offset, data in readv_data:
1250
1220
offset -= base_offset
1258
1228
# We read the whole range, most likely because the
1259
1229
# Transport upcast our readv ranges into one long request
1260
1230
# for enough total data to grab the whole index.
1261
self._buffer_all(BytesIO(data))
1231
self._buffer_all(StringIO(data))
1263
1233
if self._bisect_nodes is None:
1264
1234
# this must be the start
1290
1260
performance significantly. For example, if one index is on local disk and a
1291
1261
second on a remote server, the local disk index should be before the other
1292
1262
in the index list.
1294
1264
Also, queries tend to need results from the same indices as previous
1295
1265
queries. So the indices will be reordered after every query to put the
1296
1266
indices that had the result(s) of that query first (while otherwise
1330
1300
search_keys = set(keys)
1331
1301
if _mod_revision.NULL_REVISION in search_keys:
1332
1302
search_keys.discard(_mod_revision.NULL_REVISION)
1333
found_parents = {_mod_revision.NULL_REVISION: []}
1303
found_parents = {_mod_revision.NULL_REVISION:[]}
1335
1305
found_parents = {}
1336
1306
for index, key, value, refs in self.iter_entries(search_keys):
1340
1310
found_parents[key] = parents
1341
1311
return found_parents
1343
__contains__ = _has_key_from_parent_map
1313
has_key = _has_key_from_parent_map
1345
1315
def insert_index(self, pos, index, name=None):
1346
1316
"""Insert a new index in the list of indices to query.
1404
1373
hit_indices.append(index)
1406
except errors.NoSuchFile as e:
1407
if not self._try_reload(e):
1375
except errors.NoSuchFile:
1376
self._reload_or_raise()
1409
1377
self._move_to_front(hit_indices)
1411
1379
def iter_entries_prefix(self, keys):
1447
1415
hit_indices.append(index)
1449
except errors.NoSuchFile as e:
1450
if not self._try_reload(e):
1417
except errors.NoSuchFile:
1418
self._reload_or_raise()
1452
1419
self._move_to_front(hit_indices)
1454
1421
def _move_to_front(self, hit_indices):
1473
1440
def _move_to_front_by_index(self, hit_indices):
1474
1441
"""Core logic for _move_to_front.
1476
1443
Returns a list of names corresponding to the hit_indices param.
1478
1445
indices_info = zip(self._index_names, self._indices)
1479
1446
if 'index' in debug.debug_flags:
1480
indices_info = list(indices_info)
1481
1447
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1482
1448
'promoting %r', indices_info, hit_indices)
1492
1458
if len(new_hit_indices) == len(hit_indices):
1493
1459
# We've found all of the hit entries, everything else is
1495
unhit_names.extend(self._index_names[offset + 1:])
1496
unhit_indices.extend(self._indices[offset + 1:])
1461
unhit_names.extend(self._index_names[offset+1:])
1462
unhit_indices.extend(self._indices[offset+1:])
1499
1465
unhit_names.append(name)
1561
1527
# CombinedGraphIndex does not know what the ref lists
1563
1529
search_keys = index._find_ancestors(search_keys,
1564
ref_list_num, parent_map, index_missing_keys)
1530
ref_list_num, parent_map, index_missing_keys)
1565
1531
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1566
1532
# sub_generation, len(search_keys),
1567
1533
# len(parent_map), len(index_missing_keys))
1595
1561
return sum((index.key_count() for index in self._indices), 0)
1596
except errors.NoSuchFile as e:
1597
if not self._try_reload(e):
1562
except errors.NoSuchFile:
1563
self._reload_or_raise()
1600
1565
missing_keys = _missing_keys_from_parent_map
1602
def _try_reload(self, error):
1567
def _reload_or_raise(self):
1603
1568
"""We just got a NoSuchFile exception.
1605
1570
Try to reload the indices, if it fails, just raise the current
1608
1573
if self._reload_func is None:
1611
'Trying to reload after getting exception: %s', str(error))
1575
exc_type, exc_value, exc_traceback = sys.exc_info()
1576
trace.mutter('Trying to reload after getting exception: %s',
1612
1578
if not self._reload_func():
1613
1579
# We tried to reload, but nothing changed, so we fail anyway
1614
1580
trace.mutter('_reload_func indicated nothing has changed.'
1615
1581
' Raising original exception.')
1582
raise exc_type, exc_value, exc_traceback
1619
1584
def set_sibling_indices(self, sibling_combined_graph_indices):
1620
1585
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1663
1627
if 'evil' in debug.debug_flags:
1664
1628
trace.mutter_callsite(3,
1665
"iter_all_entries scales with size of history.")
1629
"iter_all_entries scales with size of history.")
1666
1630
if self.reference_lists:
1667
for key, (absent, references, value) in self._nodes.items():
1631
for key, (absent, references, value) in self._nodes.iteritems():
1669
1633
yield self, key, value, references
1671
for key, (absent, references, value) in self._nodes.items():
1635
for key, (absent, references, value) in self._nodes.iteritems():
1673
1637
yield self, key, value
1712
1676
will be returned, and every match that is in the index will be
1679
# XXX: To much duplication with the GraphIndex class; consider finding
1680
# a good place to pull out the actual common logic.
1715
1681
keys = set(keys)
1718
1684
if self._key_length == 1:
1719
1685
for key in keys:
1720
_sanity_check_key(self, key)
1688
raise errors.BadIndexKey(key)
1689
if len(key) != self._key_length:
1690
raise errors.BadIndexKey(key)
1721
1691
node = self._nodes[key]
1727
1697
yield self, key, node[2]
1729
1699
nodes_by_key = self._get_nodes_by_key()
1730
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
1703
raise errors.BadIndexKey(key)
1704
if len(key) != self._key_length:
1705
raise errors.BadIndexKey(key)
1706
# find what it refers to:
1707
key_dict = nodes_by_key
1708
elements = list(key)
1709
# find the subdict to return
1711
while len(elements) and elements[0] is not None:
1712
key_dict = key_dict[elements[0]]
1715
# a non-existant lookup.
1720
key_dict = dicts.pop(-1)
1721
# can't be empty or would not exist
1722
item, value = key_dict.iteritems().next()
1723
if type(value) == dict:
1725
dicts.extend(key_dict.itervalues())
1728
for value in key_dict.itervalues():
1729
yield (self, ) + value
1731
yield (self, ) + key_dict
1733
1733
def key_count(self):
1734
1734
"""Return an estimate of the number of keys in this index.
1740
1740
def validate(self):
1741
1741
"""In memory index's have no known corruption at the moment."""
1743
def __lt__(self, other):
1744
# We don't really care about the order, just that there is an order.
1745
if (not isinstance(other, GraphIndex) and
1746
not isinstance(other, InMemoryGraphIndex)):
1747
raise TypeError(other)
1748
return hash(self) < hash(other)
1751
1744
class GraphIndexPrefixAdapter(object):
1752
1745
"""An adapter between GraphIndex with different key lengths.
1761
1754
def __init__(self, adapted, prefix, missing_key_length,
1762
add_nodes_callback=None):
1755
add_nodes_callback=None):
1763
1756
"""Construct an adapter against adapted with prefix."""
1764
1757
self.adapted = adapted
1765
self.prefix_key = prefix + (None,) * missing_key_length
1758
self.prefix_key = prefix + (None,)*missing_key_length
1766
1759
self.prefix = prefix
1767
1760
self.prefix_len = len(prefix)
1768
1761
self.add_nodes_callback = add_nodes_callback
1781
1774
for (key, value, node_refs) in nodes:
1782
1775
adjusted_references = (
1783
1776
tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
1784
for ref_list in node_refs))
1777
for ref_list in node_refs))
1785
1778
translated_nodes.append((self.prefix + key, value,
1786
adjusted_references))
1779
adjusted_references))
1787
1780
except ValueError:
1788
1781
# XXX: TODO add an explicit interface for getting the reference list
1789
1782
# status, to handle this bit of user-friendliness in the API more
1810
1803
for node in an_iter:
1812
1805
if node[1][:self.prefix_len] != self.prefix:
1813
raise BadIndexData(self)
1806
raise errors.BadIndexData(self)
1814
1807
for ref_list in node[3]:
1815
1808
for ref_node in ref_list:
1816
1809
if ref_node[:self.prefix_len] != self.prefix:
1817
raise BadIndexData(self)
1810
raise errors.BadIndexData(self)
1818
1811
yield node[0], node[1][self.prefix_len:], node[2], (
1819
1812
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
1820
for ref_list in node[3]))
1813
for ref_list in node[3]))
1822
1815
def iter_all_entries(self):
1823
1816
"""Iterate over all keys within the index
1873
1866
def validate(self):
1874
1867
"""Call the adapted's validate."""
1875
1868
self.adapted.validate()
1878
def _sanity_check_key(index_or_builder, key):
1879
"""Raise BadIndexKey if key cannot be used for prefix matching."""
1881
raise BadIndexKey(key)
1882
if len(key) != index_or_builder._key_length:
1883
raise BadIndexKey(key)
1886
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
1887
"""Helper for implementing prefix matching iterators."""
1889
_sanity_check_key(index_or_builder, key)
1890
# find what it refers to:
1891
key_dict = nodes_by_key
1892
elements = list(key)
1893
# find the subdict whose contents should be returned.
1895
while len(elements) and elements[0] is not None:
1896
key_dict = key_dict[elements[0]]
1899
# a non-existant lookup.
1904
values_view = dicts.pop().values()
1905
# can't be empty or would not exist
1906
value = next(iter(values_view))
1907
if isinstance(value, dict):
1908
# still descending, push values
1909
dicts.extend(values_view)
1911
# at leaf tuples, yield values
1912
for value in values_view:
1913
# each value is the key:value:node refs tuple
1915
yield (index_or_builder, ) + value
1917
# the last thing looked up was a terminal element
1918
yield (index_or_builder, ) + key_dict