29
27
from bisect import bisect_right
28
from cStringIO import StringIO
32
from ..lazy_import import lazy_import
32
from bzrlib.lazy_import import lazy_import
33
33
lazy_import(globals(), """
36
revision as _mod_revision,
34
from bzrlib import trace
35
from bzrlib.bisect_multi import bisect_multi_bytes
36
from bzrlib.revision import NULL_REVISION
37
from bzrlib.trace import mutter
44
from ..sixish import (
51
from ..static_tuple import StaticTuple
43
from bzrlib.static_tuple import StaticTuple
53
45
_HEADER_READV = (0, 200)
54
_OPTION_KEY_ELEMENTS = b"key_elements="
56
_OPTION_NODE_REFS = b"node_ref_lists="
57
_SIGNATURE = b"Bazaar Graph Index 1\n"
60
class BadIndexFormatSignature(errors.BzrError):
62
_fmt = "%(value)s is not an index of type %(_type)s."
64
def __init__(self, value, _type):
65
errors.BzrError.__init__(self)
70
class BadIndexData(errors.BzrError):
72
_fmt = "Error in data for index %(value)s."
74
def __init__(self, value):
75
errors.BzrError.__init__(self)
79
class BadIndexDuplicateKey(errors.BzrError):
81
_fmt = "The key '%(key)s' is already in index '%(index)s'."
83
def __init__(self, key, index):
84
errors.BzrError.__init__(self)
89
class BadIndexKey(errors.BzrError):
91
_fmt = "The key '%(key)s' is not a valid key."
93
def __init__(self, key):
94
errors.BzrError.__init__(self)
98
class BadIndexOptions(errors.BzrError):
100
_fmt = "Could not parse options for index %(value)s."
102
def __init__(self, value):
103
errors.BzrError.__init__(self)
107
class BadIndexValue(errors.BzrError):
109
_fmt = "The value '%(value)s' is not a valid value."
111
def __init__(self, value):
112
errors.BzrError.__init__(self)
116
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r\x00 ]')
117
_newline_null_re = re.compile(b'[\n\0]')
46
_OPTION_KEY_ELEMENTS = "key_elements="
48
_OPTION_NODE_REFS = "node_ref_lists="
49
_SIGNATURE = "Bazaar Graph Index 1\n"
52
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
53
_newline_null_re = re.compile('[\n\0]')
120
56
def _has_key_from_parent_map(self, key):
133
69
class GraphIndexBuilder(object):
134
70
"""A builder that can build a GraphIndex.
136
The resulting graph has the structure::
72
The resulting graph has the structure:
138
_SIGNATURE OPTIONS NODES NEWLINE
139
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
140
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
142
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
143
KEY := Not-whitespace-utf8
145
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
146
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
147
REFERENCE := DIGITS ; digits is the byte offset in the index of the
149
VALUE := no-newline-no-null-bytes
74
_SIGNATURE OPTIONS NODES NEWLINE
75
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
76
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
78
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
79
KEY := Not-whitespace-utf8
81
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
82
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
83
REFERENCE := DIGITS ; digits is the byte offset in the index of the
85
VALUE := no-newline-no-null-bytes
152
88
def __init__(self, reference_lists=0, key_elements=1):
169
105
def _check_key(self, key):
170
106
"""Raise BadIndexKey if key is not a valid key for this index."""
171
107
if type(key) not in (tuple, StaticTuple):
172
raise BadIndexKey(key)
108
raise errors.BadIndexKey(key)
173
109
if self._key_length != len(key):
174
raise BadIndexKey(key)
110
raise errors.BadIndexKey(key)
175
111
for element in key:
176
if not element or type(element) != bytes or _whitespace_re.search(element) is not None:
177
raise BadIndexKey(key)
112
if not element or _whitespace_re.search(element) is not None:
113
raise errors.BadIndexKey(element)
179
115
def _external_references(self):
180
116
"""Return references that are not present in this index.
210
146
key_dict = key_dict.setdefault(subkey, {})
211
147
key_dict[key[-1]] = key, value, references
213
for key, (absent, references, value) in viewitems(self._nodes):
149
for key, (absent, references, value) in self._nodes.iteritems():
216
152
key_dict = nodes_by_key
248
184
:param value: The value associate with this key. Must not contain
249
185
newlines or null characters.
250
186
:return: (node_refs, absent_references)
252
* node_refs: basically a packed form of 'references' where all
254
* absent_references: reference keys that are not in self._nodes.
255
This may contain duplicates if the same key is referenced in
187
node_refs basically a packed form of 'references' where all
189
absent_references reference keys that are not in self._nodes.
190
This may contain duplicates if the same key is
191
referenced in multiple lists.
258
193
as_st = StaticTuple.from_sequence
259
194
self._check_key(key)
260
195
if _newline_null_re.search(value) is not None:
261
raise BadIndexValue(value)
196
raise errors.BadIndexValue(value)
262
197
if len(references) != self.reference_lists:
263
raise BadIndexValue(references)
198
raise errors.BadIndexValue(references)
265
200
absent_references = []
266
201
for reference_list in references:
284
219
:param references: An iterable of iterables of keys. Each is a
285
220
reference to another key.
286
221
:param value: The value to associate with the key. It may be any
287
bytes as long as it does not contain \\0 or \\n.
222
bytes as long as it does not contain \0 or \n.
290
225
absent_references) = self._check_key_ref_value(key, references, value)
291
if key in self._nodes and self._nodes[key][0] != b'a':
292
raise BadIndexDuplicateKey(key, self)
226
if key in self._nodes and self._nodes[key][0] != 'a':
227
raise errors.BadIndexDuplicateKey(key, self)
293
228
for reference in absent_references:
294
229
# There may be duplicates, but I don't think it is worth worrying
296
self._nodes[reference] = (b'a', (), b'')
231
self._nodes[reference] = ('a', (), '')
297
232
self._absent_keys.update(absent_references)
298
233
self._absent_keys.discard(key)
299
self._nodes[key] = (b'', node_refs, value)
234
self._nodes[key] = ('', node_refs, value)
300
235
if self._nodes_by_key is not None and self._key_length > 1:
301
236
self._update_nodes_by_key(key, value, node_refs)
306
241
This is a no-op, but we need the api to conform to a generic 'Index'
310
245
def finish(self):
313
:returns: cBytesIO holding the full context of the index as it
314
should be written to disk.
316
246
lines = [_SIGNATURE]
317
lines.append(b'%s%d\n' % (_OPTION_NODE_REFS, self.reference_lists))
318
lines.append(b'%s%d\n' % (_OPTION_KEY_ELEMENTS, self._key_length))
247
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
248
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
319
249
key_count = len(self._nodes) - len(self._absent_keys)
320
lines.append(b'%s%d\n' % (_OPTION_LEN, key_count))
250
lines.append(_OPTION_LEN + str(key_count) + '\n')
321
251
prefix_length = sum(len(x) for x in lines)
322
252
# references are byte offsets. To avoid having to do nasty
323
253
# polynomial work to resolve offsets (references to later in the
334
264
# forward sorted by key. In future we may consider topological sorting,
335
265
# at the cost of table scans for direct lookup, or a second index for
337
nodes = sorted(viewitems(self._nodes))
267
nodes = sorted(self._nodes.items())
338
268
# if we do not prepass, we don't know how long it will be up front.
339
269
expected_bytes = None
340
270
# we only need to pre-pass if we have reference lists at all.
370
300
non_ref_bytes += len(ref_list) - 1
371
301
# how many digits are needed to represent the total byte count?
373
possible_total_bytes = non_ref_bytes + total_references * digits
303
possible_total_bytes = non_ref_bytes + total_references*digits
374
304
while 10 ** digits < possible_total_bytes:
376
possible_total_bytes = non_ref_bytes + total_references * digits
377
expected_bytes = possible_total_bytes + 1 # terminating newline
306
possible_total_bytes = non_ref_bytes + total_references*digits
307
expected_bytes = possible_total_bytes + 1 # terminating newline
378
308
# resolve key addresses.
379
309
key_addresses = {}
380
310
for key, non_ref_bytes, total_references in key_offset_info:
381
key_addresses[key] = non_ref_bytes + total_references * digits
311
key_addresses[key] = non_ref_bytes + total_references*digits
383
format_string = b'%%0%dd' % digits
313
format_string = '%%0%sd' % digits
384
314
for key, (absent, references, value) in nodes:
385
315
flattened_references = []
386
316
for ref_list in references:
387
317
ref_addresses = []
388
318
for reference in ref_list:
389
ref_addresses.append(format_string %
390
key_addresses[reference])
391
flattened_references.append(b'\r'.join(ref_addresses))
392
string_key = b'\x00'.join(key)
393
lines.append(b"%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
394
b'\t'.join(flattened_references), value))
396
result = BytesIO(b''.join(lines))
319
ref_addresses.append(format_string % key_addresses[reference])
320
flattened_references.append('\r'.join(ref_addresses))
321
string_key = '\x00'.join(key)
322
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
323
'\t'.join(flattened_references), value))
325
result = StringIO(''.join(lines))
397
326
if expected_bytes and len(result.getvalue()) != expected_bytes:
398
327
raise errors.BzrError('Failed index creation. Internal error:'
399
' mismatched output length and expected length: %d %d' %
400
(len(result.getvalue()), expected_bytes))
328
' mismatched output length and expected length: %d %d' %
329
(len(result.getvalue()), expected_bytes))
403
332
def set_optimize(self, for_size=None, combine_backing_indices=None):
456
385
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
457
386
"""Open an index called name on transport.
459
:param transport: A breezy.transport.Transport.
388
:param transport: A bzrlib.transport.Transport.
460
389
:param name: A path to provide to transport API calls.
461
390
:param size: The size of the index in bytes. This is used for bisection
462
391
logic to perform partial index reads. While the size could be
502
431
def __ne__(self, other):
503
432
return not self.__eq__(other)
505
def __lt__(self, other):
506
# We don't really care about the order, just that there is an order.
507
if (not isinstance(other, GraphIndex) and
508
not isinstance(other, InMemoryGraphIndex)):
509
raise TypeError(other)
510
return hash(self) < hash(other)
513
return hash((type(self), self._transport, self._name, self._size))
515
434
def __repr__(self):
516
435
return "%s(%r)" % (self.__class__.__name__,
517
self._transport.abspath(self._name))
436
self._transport.abspath(self._name))
519
438
def _buffer_all(self, stream=None):
520
439
"""Buffer all the index data.
525
444
# We already did this
527
446
if 'index' in debug.debug_flags:
528
trace.mutter('Reading entire index %s',
529
self._transport.abspath(self._name))
447
mutter('Reading entire index %s', self._transport.abspath(self._name))
530
448
if stream is None:
531
449
stream = self._transport.get(self._name)
532
450
if self._base_offset != 0:
533
451
# This is wasteful, but it is better than dealing with
534
452
# adjusting all the offsets, etc.
535
stream = BytesIO(stream.read()[self._base_offset:])
537
self._read_prefix(stream)
538
self._expected_elements = 3 + self._key_length
540
# raw data keyed by offset
541
self._keys_by_offset = {}
542
# ready-to-return key:value or key:value, node_ref_lists
544
self._nodes_by_key = None
547
lines = stream.read().split(b'\n')
453
stream = StringIO(stream.read()[self._base_offset:])
454
self._read_prefix(stream)
455
self._expected_elements = 3 + self._key_length
457
# raw data keyed by offset
458
self._keys_by_offset = {}
459
# ready-to-return key:value or key:value, node_ref_lists
461
self._nodes_by_key = None
464
lines = stream.read().split('\n')
551
467
_, _, _, trailers = self._parse_lines(lines, pos)
552
for key, absent, references, value in viewvalues(self._keys_by_offset):
468
for key, absent, references, value in self._keys_by_offset.itervalues():
555
471
# resolve references:
577
493
self._buffer_all()
578
494
if ref_list_num + 1 > self.node_ref_lists:
579
495
raise ValueError('No ref list %d, index has %d ref lists'
580
% (ref_list_num, self.node_ref_lists))
496
% (ref_list_num, self.node_ref_lists))
582
498
nodes = self._nodes
583
for key, (value, ref_lists) in viewitems(nodes):
499
for key, (value, ref_lists) in nodes.iteritems():
584
500
ref_list = ref_lists[ref_list_num]
585
501
refs.update([ref for ref in ref_list if ref not in nodes])
589
505
if self._nodes_by_key is None:
590
506
nodes_by_key = {}
591
507
if self.node_ref_lists:
592
for key, (value, references) in viewitems(self._nodes):
508
for key, (value, references) in self._nodes.iteritems():
593
509
key_dict = nodes_by_key
594
510
for subkey in key[:-1]:
595
511
key_dict = key_dict.setdefault(subkey, {})
596
512
key_dict[key[-1]] = key, value, references
598
for key, value in viewitems(self._nodes):
514
for key, value in self._nodes.iteritems():
599
515
key_dict = nodes_by_key
600
516
for subkey in key[:-1]:
601
517
key_dict = key_dict.setdefault(subkey, {})
615
531
if 'evil' in debug.debug_flags:
616
532
trace.mutter_callsite(3,
617
"iter_all_entries scales with size of history.")
533
"iter_all_entries scales with size of history.")
618
534
if self._nodes is None:
619
535
self._buffer_all()
620
536
if self.node_ref_lists:
621
for key, (value, node_ref_lists) in viewitems(self._nodes):
537
for key, (value, node_ref_lists) in self._nodes.iteritems():
622
538
yield self, key, value, node_ref_lists
624
for key, value in viewitems(self._nodes):
540
for key, value in self._nodes.iteritems():
625
541
yield self, key, value
627
543
def _read_prefix(self, stream):
628
544
signature = stream.read(len(self._signature()))
629
545
if not signature == self._signature():
630
raise BadIndexFormatSignature(self._name, GraphIndex)
546
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
631
547
options_line = stream.readline()
632
548
if not options_line.startswith(_OPTION_NODE_REFS):
633
raise BadIndexOptions(self)
549
raise errors.BadIndexOptions(self)
635
551
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
636
552
except ValueError:
637
raise BadIndexOptions(self)
553
raise errors.BadIndexOptions(self)
638
554
options_line = stream.readline()
639
555
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
640
raise BadIndexOptions(self)
556
raise errors.BadIndexOptions(self)
642
558
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
643
559
except ValueError:
644
raise BadIndexOptions(self)
560
raise errors.BadIndexOptions(self)
645
561
options_line = stream.readline()
646
562
if not options_line.startswith(_OPTION_LEN):
647
raise BadIndexOptions(self)
563
raise errors.BadIndexOptions(self)
649
565
self._key_count = int(options_line[len(_OPTION_LEN):-1])
650
566
except ValueError:
651
raise BadIndexOptions(self)
567
raise errors.BadIndexOptions(self)
653
569
def _resolve_references(self, references):
654
570
"""Return the resolved key references for references.
664
580
for ref_list in references:
666
tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
581
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
667
582
return tuple(node_refs)
670
def _find_index(range_map, key):
584
def _find_index(self, range_map, key):
671
585
"""Helper for the _parsed_*_index calls.
673
587
Given a range map - [(start, end), ...], finds the index of the range
756
670
if self._nodes is not None:
757
671
return self._iter_entries_from_total_buffer(keys)
759
return (result[1] for result in bisect_multi.bisect_multi_bytes(
673
return (result[1] for result in bisect_multi_bytes(
760
674
self._lookup_keys_via_location, self._size, keys))
762
676
def iter_entries_prefix(self, keys):
789
703
self._buffer_all()
790
704
if self._key_length == 1:
792
_sanity_check_key(self, key)
708
raise errors.BadIndexKey(key)
709
if len(key) != self._key_length:
710
raise errors.BadIndexKey(key)
793
711
if self.node_ref_lists:
794
712
value, node_refs = self._nodes[key]
795
713
yield self, key, value, node_refs
797
715
yield self, key, self._nodes[key]
799
717
nodes_by_key = self._get_nodes_by_key()
800
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
721
raise errors.BadIndexKey(key)
722
if len(key) != self._key_length:
723
raise errors.BadIndexKey(key)
724
# find what it refers to:
725
key_dict = nodes_by_key
727
# find the subdict whose contents should be returned.
729
while len(elements) and elements[0] is not None:
730
key_dict = key_dict[elements[0]]
733
# a non-existant lookup.
738
key_dict = dicts.pop(-1)
739
# can't be empty or would not exist
740
item, value = key_dict.iteritems().next()
741
if type(value) == dict:
743
dicts.extend(key_dict.itervalues())
746
for value in key_dict.itervalues():
747
# each value is the key:value:node refs tuple
749
yield (self, ) + value
751
# the last thing looked up was a terminal element
752
yield (self, ) + key_dict
803
754
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
804
755
"""See BTreeIndex._find_ancestors."""
868
819
index = self._parsed_byte_index(location)
869
820
if (len(self._parsed_byte_map) and
870
821
self._parsed_byte_map[index][0] <= location and
871
self._parsed_byte_map[index][1] > location):
822
self._parsed_byte_map[index][1] > location):
872
823
# the byte region has been parsed, so no read is needed.
886
837
# _read_and_parse triggered a _buffer_all because we requested the
887
838
# whole data range
888
839
for location, key in location_keys:
889
if key not in self._nodes: # not present
840
if key not in self._nodes: # not present
890
841
result.append(((location, key), False))
891
842
elif self.node_ref_lists:
892
843
value, refs = self._nodes[key]
893
844
result.append(((location, key),
894
(self, key, value, refs)))
845
(self, key, value, refs)))
896
847
result.append(((location, key),
897
(self, key, self._nodes[key])))
848
(self, key, self._nodes[key])))
899
850
# generate results:
900
851
# - figure out <, >, missing, present
919
870
pending_references.append((location, key))
921
872
result.append(((location, key), (self, key,
922
value, self._resolve_references(refs))))
873
value, self._resolve_references(refs))))
924
875
result.append(((location, key),
925
(self, key, self._bisect_nodes[key])))
876
(self, key, self._bisect_nodes[key])))
928
879
# has the region the key should be in, been parsed?
965
916
# answer key references we had to look-up-late.
966
917
value, refs = self._bisect_nodes[key]
967
918
result.append(((location, key), (self, key,
968
value, self._resolve_references(refs))))
919
value, self._resolve_references(refs))))
971
922
def _parse_header_from_bytes(self, bytes):
978
929
signature = bytes[0:len(self._signature())]
979
930
if not signature == self._signature():
980
raise BadIndexFormatSignature(self._name, GraphIndex)
931
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
981
932
lines = bytes[len(self._signature()):].splitlines()
982
933
options_line = lines[0]
983
934
if not options_line.startswith(_OPTION_NODE_REFS):
984
raise BadIndexOptions(self)
935
raise errors.BadIndexOptions(self)
986
937
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
987
938
except ValueError:
988
raise BadIndexOptions(self)
939
raise errors.BadIndexOptions(self)
989
940
options_line = lines[1]
990
941
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
991
raise BadIndexOptions(self)
942
raise errors.BadIndexOptions(self)
993
944
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
994
945
except ValueError:
995
raise BadIndexOptions(self)
946
raise errors.BadIndexOptions(self)
996
947
options_line = lines[2]
997
948
if not options_line.startswith(_OPTION_LEN):
998
raise BadIndexOptions(self)
949
raise errors.BadIndexOptions(self)
1000
951
self._key_count = int(options_line[len(_OPTION_LEN):])
1001
952
except ValueError:
1002
raise BadIndexOptions(self)
953
raise errors.BadIndexOptions(self)
1003
954
# calculate the bytes we have processed
1004
955
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
1006
self._parsed_bytes(0, (), header_end, ())
957
self._parsed_bytes(0, None, header_end, None)
1007
958
# setup parsing state
1008
959
self._expected_elements = 3 + self._key_length
1009
960
# raw data keyed by offset
1109
1060
if not start_adjacent:
1110
1061
# work around python bug in rfind
1111
1062
if trim_start is None:
1112
trim_start = data.find(b'\n') + 1
1063
trim_start = data.find('\n') + 1
1114
trim_start = data.find(b'\n', trim_start) + 1
1065
trim_start = data.find('\n', trim_start) + 1
1115
1066
if not (trim_start != 0):
1116
1067
raise AssertionError('no \n was present')
1117
1068
# print 'removing start', offset, trim_start, repr(data[:trim_start])
1118
1069
if not end_adjacent:
1119
1070
# work around python bug in rfind
1120
1071
if trim_end is None:
1121
trim_end = data.rfind(b'\n') + 1
1072
trim_end = data.rfind('\n') + 1
1123
trim_end = data.rfind(b'\n', None, trim_end) + 1
1074
trim_end = data.rfind('\n', None, trim_end) + 1
1124
1075
if not (trim_end != 0):
1125
1076
raise AssertionError('no \n was present')
1126
1077
# print 'removing end', offset, trim_end, repr(data[trim_end:])
1128
1079
trimmed_data = data[trim_start:trim_end]
1129
1080
if not (trimmed_data):
1130
1081
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
1131
% (trim_start, trim_end, offset, offset + len(data)))
1082
% (trim_start, trim_end, offset, offset + len(data)))
1133
1084
offset += trim_start
1134
1085
# print "parsing", repr(trimmed_data)
1135
1086
# splitlines mangles the \r delimiters.. don't use it.
1136
lines = trimmed_data.split(b'\n')
1087
lines = trimmed_data.split('\n')
1139
1090
first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
1140
1091
for key, value in nodes:
1141
1092
self._bisect_nodes[key] = value
1142
1093
self._parsed_bytes(offset, first_key,
1143
offset + len(trimmed_data), last_key)
1094
offset + len(trimmed_data), last_key)
1144
1095
return offset + len(trimmed_data), last_segment
1146
1097
def _parse_lines(self, lines, pos):
1151
1102
for line in lines:
1153
1104
# must be at the end
1155
1106
if not (self._size == pos + 1):
1156
1107
raise AssertionError("%s %s" % (self._size, pos))
1159
elements = line.split(b'\0')
1110
elements = line.split('\0')
1160
1111
if len(elements) != self._expected_elements:
1161
raise BadIndexData(self)
1112
raise errors.BadIndexData(self)
1162
1113
# keys are tuples. Each element is a string that may occur many
1163
1114
# times, so we intern them to save space. AB, RC, 200807
1164
key = tuple([bytesintern(element)
1165
for element in elements[:self._key_length]])
1115
key = tuple([intern(element) for element in elements[:self._key_length]])
1166
1116
if first_key is None:
1167
1117
first_key = key
1168
1118
absent, references, value = elements[-3:]
1170
for ref_string in references.split(b'\t'):
1120
for ref_string in references.split('\t'):
1171
1121
ref_lists.append(tuple([
1172
int(ref) for ref in ref_string.split(b'\r') if ref
1122
int(ref) for ref in ref_string.split('\r') if ref
1174
1124
ref_lists = tuple(ref_lists)
1175
1125
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
1176
pos += len(line) + 1 # +1 for the \n
1126
pos += len(line) + 1 # +1 for the \n
1179
1129
if self.node_ref_lists:
1208
1158
# combine two regions
1209
1159
if (index + 1 < len(self._parsed_byte_map) and
1210
1160
self._parsed_byte_map[index][1] == start and
1211
self._parsed_byte_map[index + 1][0] == end):
1161
self._parsed_byte_map[index + 1][0] == end):
1212
1162
# combine two regions
1213
1163
self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],
1214
self._parsed_byte_map[index + 1][1])
1164
self._parsed_byte_map[index + 1][1])
1215
1165
self._parsed_key_map[index] = (self._parsed_key_map[index][0],
1216
self._parsed_key_map[index + 1][1])
1166
self._parsed_key_map[index + 1][1])
1217
1167
del self._parsed_byte_map[index + 1]
1218
1168
del self._parsed_key_map[index + 1]
1219
1169
elif self._parsed_byte_map[index][1] == start:
1223
1173
self._parsed_key_map[index] = (
1224
1174
self._parsed_key_map[index][0], end_key)
1225
1175
elif (index + 1 < len(self._parsed_byte_map) and
1226
self._parsed_byte_map[index + 1][0] == end):
1176
self._parsed_byte_map[index + 1][0] == end):
1227
1177
# extend the higher entry
1228
1178
self._parsed_byte_map[index + 1] = (
1229
1179
start, self._parsed_byte_map[index + 1][1])
1250
1200
base_offset = self._base_offset
1251
1201
if base_offset != 0:
1252
1202
# Rewrite the ranges for the offset
1253
readv_ranges = [(start + base_offset, size)
1203
readv_ranges = [(start+base_offset, size)
1254
1204
for start, size in readv_ranges]
1255
1205
readv_data = self._transport.readv(self._name, readv_ranges, True,
1256
self._size + self._base_offset)
1206
self._size + self._base_offset)
1258
1208
for offset, data in readv_data:
1259
1209
offset -= base_offset
1267
1217
# We read the whole range, most likely because the
1268
1218
# Transport upcast our readv ranges into one long request
1269
1219
# for enough total data to grab the whole index.
1270
self._buffer_all(BytesIO(data))
1220
self._buffer_all(StringIO(data))
1272
1222
if self._bisect_nodes is None:
1273
1223
# this must be the start
1299
1249
performance significantly. For example, if one index is on local disk and a
1300
1250
second on a remote server, the local disk index should be before the other
1301
1251
in the index list.
1303
1253
Also, queries tend to need results from the same indices as previous
1304
1254
queries. So the indices will be reordered after every query to put the
1305
1255
indices that had the result(s) of that query first (while otherwise
1337
1287
def get_parent_map(self, keys):
1338
1288
"""See graph.StackedParentsProvider.get_parent_map"""
1339
1289
search_keys = set(keys)
1340
if _mod_revision.NULL_REVISION in search_keys:
1341
search_keys.discard(_mod_revision.NULL_REVISION)
1342
found_parents = {_mod_revision.NULL_REVISION: []}
1290
if NULL_REVISION in search_keys:
1291
search_keys.discard(NULL_REVISION)
1292
found_parents = {NULL_REVISION:[]}
1344
1294
found_parents = {}
1345
1295
for index, key, value, refs in self.iter_entries(search_keys):
1346
1296
parents = refs[0]
1347
1297
if not parents:
1348
parents = (_mod_revision.NULL_REVISION,)
1298
parents = (NULL_REVISION,)
1349
1299
found_parents[key] = parents
1350
1300
return found_parents
1352
__contains__ = _has_key_from_parent_map
1302
has_key = _has_key_from_parent_map
1354
1304
def insert_index(self, pos, index, name=None):
1355
1305
"""Insert a new index in the list of indices to query.
1482
1429
def _move_to_front_by_index(self, hit_indices):
1483
1430
"""Core logic for _move_to_front.
1485
1432
Returns a list of names corresponding to the hit_indices param.
1487
1434
indices_info = zip(self._index_names, self._indices)
1488
1435
if 'index' in debug.debug_flags:
1489
indices_info = list(indices_info)
1490
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1491
'promoting %r', indices_info, hit_indices)
1436
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1437
indices_info, hit_indices)
1493
1439
unhit_names = []
1494
1440
new_hit_indices = []
1501
1447
if len(new_hit_indices) == len(hit_indices):
1502
1448
# We've found all of the hit entries, everything else is
1504
unhit_names.extend(self._index_names[offset + 1:])
1505
unhit_indices.extend(self._indices[offset + 1:])
1450
unhit_names.extend(self._index_names[offset+1:])
1451
unhit_indices.extend(self._indices[offset+1:])
1508
1454
unhit_names.append(name)
1511
1457
self._indices = new_hit_indices + unhit_indices
1512
1458
self._index_names = hit_names + unhit_names
1513
1459
if 'index' in debug.debug_flags:
1514
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1460
mutter('CombinedGraphIndex reordered: %r', self._indices)
1515
1461
return hit_names
1517
1463
def _move_to_front_by_name(self, hit_names):
1570
1516
# CombinedGraphIndex does not know what the ref lists
1572
1518
search_keys = index._find_ancestors(search_keys,
1573
ref_list_num, parent_map, index_missing_keys)
1519
ref_list_num, parent_map, index_missing_keys)
1574
1520
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1575
1521
# sub_generation, len(search_keys),
1576
1522
# len(parent_map), len(index_missing_keys))
1604
1550
return sum((index.key_count() for index in self._indices), 0)
1605
except errors.NoSuchFile as e:
1606
if not self._try_reload(e):
1551
except errors.NoSuchFile:
1552
self._reload_or_raise()
1609
1554
missing_keys = _missing_keys_from_parent_map
1611
def _try_reload(self, error):
1556
def _reload_or_raise(self):
1612
1557
"""We just got a NoSuchFile exception.
1614
1559
Try to reload the indices, if it fails, just raise the current
1617
1562
if self._reload_func is None:
1620
'Trying to reload after getting exception: %s', str(error))
1564
exc_type, exc_value, exc_traceback = sys.exc_info()
1565
trace.mutter('Trying to reload after getting exception: %s',
1621
1567
if not self._reload_func():
1622
1568
# We tried to reload, but nothing changed, so we fail anyway
1623
1569
trace.mutter('_reload_func indicated nothing has changed.'
1624
1570
' Raising original exception.')
1571
raise exc_type, exc_value, exc_traceback
1628
1573
def set_sibling_indices(self, sibling_combined_graph_indices):
1629
1574
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1672
1616
if 'evil' in debug.debug_flags:
1673
1617
trace.mutter_callsite(3,
1674
"iter_all_entries scales with size of history.")
1618
"iter_all_entries scales with size of history.")
1675
1619
if self.reference_lists:
1676
for key, (absent, references, value) in viewitems(self._nodes):
1620
for key, (absent, references, value) in self._nodes.iteritems():
1678
1622
yield self, key, value, references
1680
for key, (absent, references, value) in viewitems(self._nodes):
1624
for key, (absent, references, value) in self._nodes.iteritems():
1682
1626
yield self, key, value
1721
1665
will be returned, and every match that is in the index will be
1668
# XXX: To much duplication with the GraphIndex class; consider finding
1669
# a good place to pull out the actual common logic.
1724
1670
keys = set(keys)
1727
1673
if self._key_length == 1:
1728
1674
for key in keys:
1729
_sanity_check_key(self, key)
1677
raise errors.BadIndexKey(key)
1678
if len(key) != self._key_length:
1679
raise errors.BadIndexKey(key)
1730
1680
node = self._nodes[key]
1736
1686
yield self, key, node[2]
1738
1688
nodes_by_key = self._get_nodes_by_key()
1739
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
1692
raise errors.BadIndexKey(key)
1693
if len(key) != self._key_length:
1694
raise errors.BadIndexKey(key)
1695
# find what it refers to:
1696
key_dict = nodes_by_key
1697
elements = list(key)
1698
# find the subdict to return
1700
while len(elements) and elements[0] is not None:
1701
key_dict = key_dict[elements[0]]
1704
# a non-existant lookup.
1709
key_dict = dicts.pop(-1)
1710
# can't be empty or would not exist
1711
item, value = key_dict.iteritems().next()
1712
if type(value) == dict:
1714
dicts.extend(key_dict.itervalues())
1717
for value in key_dict.itervalues():
1718
yield (self, ) + value
1720
yield (self, ) + key_dict
1742
1722
def key_count(self):
1743
1723
"""Return an estimate of the number of keys in this index.
1749
1729
def validate(self):
1750
1730
"""In memory index's have no known corruption at the moment."""
1752
def __lt__(self, other):
1753
# We don't really care about the order, just that there is an order.
1754
if (not isinstance(other, GraphIndex) and
1755
not isinstance(other, InMemoryGraphIndex)):
1756
raise TypeError(other)
1757
return hash(self) < hash(other)
1760
1733
class GraphIndexPrefixAdapter(object):
1761
1734
"""An adapter between GraphIndex with different key lengths.
1770
1743
def __init__(self, adapted, prefix, missing_key_length,
1771
add_nodes_callback=None):
1744
add_nodes_callback=None):
1772
1745
"""Construct an adapter against adapted with prefix."""
1773
1746
self.adapted = adapted
1774
self.prefix_key = prefix + (None,) * missing_key_length
1747
self.prefix_key = prefix + (None,)*missing_key_length
1775
1748
self.prefix = prefix
1776
1749
self.prefix_len = len(prefix)
1777
1750
self.add_nodes_callback = add_nodes_callback
1790
1763
for (key, value, node_refs) in nodes:
1791
1764
adjusted_references = (
1792
1765
tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
1793
for ref_list in node_refs))
1766
for ref_list in node_refs))
1794
1767
translated_nodes.append((self.prefix + key, value,
1795
adjusted_references))
1768
adjusted_references))
1796
1769
except ValueError:
1797
1770
# XXX: TODO add an explicit interface for getting the reference list
1798
1771
# status, to handle this bit of user-friendliness in the API more
1819
1792
for node in an_iter:
1821
1794
if node[1][:self.prefix_len] != self.prefix:
1822
raise BadIndexData(self)
1795
raise errors.BadIndexData(self)
1823
1796
for ref_list in node[3]:
1824
1797
for ref_node in ref_list:
1825
1798
if ref_node[:self.prefix_len] != self.prefix:
1826
raise BadIndexData(self)
1799
raise errors.BadIndexData(self)
1827
1800
yield node[0], node[1][self.prefix_len:], node[2], (
1828
1801
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
1829
for ref_list in node[3]))
1802
for ref_list in node[3]))
1831
1804
def iter_all_entries(self):
1832
1805
"""Iterate over all keys within the index
1882
1855
def validate(self):
1883
1856
"""Call the adapted's validate."""
1884
1857
self.adapted.validate()
1887
def _sanity_check_key(index_or_builder, key):
1888
"""Raise BadIndexKey if key cannot be used for prefix matching."""
1890
raise BadIndexKey(key)
1891
if len(key) != index_or_builder._key_length:
1892
raise BadIndexKey(key)
1895
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
1896
"""Helper for implementing prefix matching iterators."""
1898
_sanity_check_key(index_or_builder, key)
1899
# find what it refers to:
1900
key_dict = nodes_by_key
1901
elements = list(key)
1902
# find the subdict whose contents should be returned.
1904
while len(elements) and elements[0] is not None:
1905
key_dict = key_dict[elements[0]]
1908
# a non-existant lookup.
1913
values_view = viewvalues(dicts.pop())
1914
# can't be empty or would not exist
1915
value = next(iter(values_view))
1916
if isinstance(value, dict):
1917
# still descending, push values
1918
dicts.extend(values_view)
1920
# at leaf tuples, yield values
1921
for value in values_view:
1922
# each value is the key:value:node refs tuple
1924
yield (index_or_builder, ) + value
1926
# the last thing looked up was a terminal element
1927
yield (index_or_builder, ) + key_dict