27
29
from bisect import bisect_right
28
from cStringIO import StringIO
32
from bzrlib.lazy_import import lazy_import
32
from ..lazy_import import lazy_import
33
33
lazy_import(globals(), """
34
from bzrlib import trace
35
from bzrlib.bisect_multi import bisect_multi_bytes
36
from bzrlib.revision import NULL_REVISION
37
from bzrlib.trace import mutter
36
revision as _mod_revision,
43
from bzrlib.static_tuple import StaticTuple
44
from ..sixish import (
51
from ..static_tuple import StaticTuple
45
53
_HEADER_READV = (0, 200)
46
_OPTION_KEY_ELEMENTS = "key_elements="
48
_OPTION_NODE_REFS = "node_ref_lists="
49
_SIGNATURE = "Bazaar Graph Index 1\n"
52
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
53
_newline_null_re = re.compile('[\n\0]')
54
_OPTION_KEY_ELEMENTS = b"key_elements="
56
_OPTION_NODE_REFS = b"node_ref_lists="
57
_SIGNATURE = b"Bazaar Graph Index 1\n"
60
class BadIndexFormatSignature(errors.BzrError):
62
_fmt = "%(value)s is not an index of type %(_type)s."
64
def __init__(self, value, _type):
65
errors.BzrError.__init__(self)
70
class BadIndexData(errors.BzrError):
72
_fmt = "Error in data for index %(value)s."
74
def __init__(self, value):
75
errors.BzrError.__init__(self)
79
class BadIndexDuplicateKey(errors.BzrError):
81
_fmt = "The key '%(key)s' is already in index '%(index)s'."
83
def __init__(self, key, index):
84
errors.BzrError.__init__(self)
89
class BadIndexKey(errors.BzrError):
91
_fmt = "The key '%(key)s' is not a valid key."
93
def __init__(self, key):
94
errors.BzrError.__init__(self)
98
class BadIndexOptions(errors.BzrError):
100
_fmt = "Could not parse options for index %(value)s."
102
def __init__(self, value):
103
errors.BzrError.__init__(self)
107
class BadIndexValue(errors.BzrError):
109
_fmt = "The value '%(value)s' is not a valid value."
111
def __init__(self, value):
112
errors.BzrError.__init__(self)
116
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r\x00 ]')
117
_newline_null_re = re.compile(b'[\n\0]')
56
120
def _has_key_from_parent_map(self, key):
69
133
class GraphIndexBuilder(object):
70
134
"""A builder that can build a GraphIndex.
72
The resulting graph has the structure:
136
The resulting graph has the structure::
74
_SIGNATURE OPTIONS NODES NEWLINE
75
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
76
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
78
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
79
KEY := Not-whitespace-utf8
81
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
82
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
83
REFERENCE := DIGITS ; digits is the byte offset in the index of the
85
VALUE := no-newline-no-null-bytes
138
_SIGNATURE OPTIONS NODES NEWLINE
139
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
140
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
142
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
143
KEY := Not-whitespace-utf8
145
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
146
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
147
REFERENCE := DIGITS ; digits is the byte offset in the index of the
149
VALUE := no-newline-no-null-bytes
88
152
def __init__(self, reference_lists=0, key_elements=1):
105
169
def _check_key(self, key):
106
170
"""Raise BadIndexKey if key is not a valid key for this index."""
107
171
if type(key) not in (tuple, StaticTuple):
108
raise errors.BadIndexKey(key)
172
raise BadIndexKey(key)
109
173
if self._key_length != len(key):
110
raise errors.BadIndexKey(key)
174
raise BadIndexKey(key)
111
175
for element in key:
112
if not element or _whitespace_re.search(element) is not None:
113
raise errors.BadIndexKey(element)
176
if not element or type(element) != bytes or _whitespace_re.search(element) is not None:
177
raise BadIndexKey(key)
115
179
def _external_references(self):
116
180
"""Return references that are not present in this index.
146
210
key_dict = key_dict.setdefault(subkey, {})
147
211
key_dict[key[-1]] = key, value, references
149
for key, (absent, references, value) in self._nodes.iteritems():
213
for key, (absent, references, value) in viewitems(self._nodes):
152
216
key_dict = nodes_by_key
184
248
:param value: The value associate with this key. Must not contain
185
249
newlines or null characters.
186
250
:return: (node_refs, absent_references)
187
node_refs basically a packed form of 'references' where all
189
absent_references reference keys that are not in self._nodes.
190
This may contain duplicates if the same key is
191
referenced in multiple lists.
252
* node_refs: basically a packed form of 'references' where all
254
* absent_references: reference keys that are not in self._nodes.
255
This may contain duplicates if the same key is referenced in
193
258
as_st = StaticTuple.from_sequence
194
259
self._check_key(key)
195
260
if _newline_null_re.search(value) is not None:
196
raise errors.BadIndexValue(value)
261
raise BadIndexValue(value)
197
262
if len(references) != self.reference_lists:
198
raise errors.BadIndexValue(references)
263
raise BadIndexValue(references)
200
265
absent_references = []
201
266
for reference_list in references:
219
284
:param references: An iterable of iterables of keys. Each is a
220
285
reference to another key.
221
286
:param value: The value to associate with the key. It may be any
222
bytes as long as it does not contain \0 or \n.
287
bytes as long as it does not contain \\0 or \\n.
225
290
absent_references) = self._check_key_ref_value(key, references, value)
226
if key in self._nodes and self._nodes[key][0] != 'a':
227
raise errors.BadIndexDuplicateKey(key, self)
291
if key in self._nodes and self._nodes[key][0] != b'a':
292
raise BadIndexDuplicateKey(key, self)
228
293
for reference in absent_references:
229
294
# There may be duplicates, but I don't think it is worth worrying
231
self._nodes[reference] = ('a', (), '')
296
self._nodes[reference] = (b'a', (), b'')
232
297
self._absent_keys.update(absent_references)
233
298
self._absent_keys.discard(key)
234
self._nodes[key] = ('', node_refs, value)
299
self._nodes[key] = (b'', node_refs, value)
235
300
if self._nodes_by_key is not None and self._key_length > 1:
236
301
self._update_nodes_by_key(key, value, node_refs)
241
306
This is a no-op, but we need the api to conform to a generic 'Index'
245
310
def finish(self):
313
:returns: cBytesIO holding the full context of the index as it
314
should be written to disk.
246
316
lines = [_SIGNATURE]
247
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
248
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
317
lines.append(b'%s%d\n' % (_OPTION_NODE_REFS, self.reference_lists))
318
lines.append(b'%s%d\n' % (_OPTION_KEY_ELEMENTS, self._key_length))
249
319
key_count = len(self._nodes) - len(self._absent_keys)
250
lines.append(_OPTION_LEN + str(key_count) + '\n')
320
lines.append(b'%s%d\n' % (_OPTION_LEN, key_count))
251
321
prefix_length = sum(len(x) for x in lines)
252
322
# references are byte offsets. To avoid having to do nasty
253
323
# polynomial work to resolve offsets (references to later in the
264
334
# forward sorted by key. In future we may consider topological sorting,
265
335
# at the cost of table scans for direct lookup, or a second index for
267
nodes = sorted(self._nodes.items())
337
nodes = sorted(viewitems(self._nodes))
268
338
# if we do not prepass, we don't know how long it will be up front.
269
339
expected_bytes = None
270
340
# we only need to pre-pass if we have reference lists at all.
300
370
non_ref_bytes += len(ref_list) - 1
301
371
# how many digits are needed to represent the total byte count?
303
possible_total_bytes = non_ref_bytes + total_references*digits
373
possible_total_bytes = non_ref_bytes + total_references * digits
304
374
while 10 ** digits < possible_total_bytes:
306
possible_total_bytes = non_ref_bytes + total_references*digits
307
expected_bytes = possible_total_bytes + 1 # terminating newline
376
possible_total_bytes = non_ref_bytes + total_references * digits
377
expected_bytes = possible_total_bytes + 1 # terminating newline
308
378
# resolve key addresses.
309
379
key_addresses = {}
310
380
for key, non_ref_bytes, total_references in key_offset_info:
311
key_addresses[key] = non_ref_bytes + total_references*digits
381
key_addresses[key] = non_ref_bytes + total_references * digits
313
format_string = '%%0%sd' % digits
383
format_string = b'%%0%dd' % digits
314
384
for key, (absent, references, value) in nodes:
315
385
flattened_references = []
316
386
for ref_list in references:
317
387
ref_addresses = []
318
388
for reference in ref_list:
319
ref_addresses.append(format_string % key_addresses[reference])
320
flattened_references.append('\r'.join(ref_addresses))
321
string_key = '\x00'.join(key)
322
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
323
'\t'.join(flattened_references), value))
325
result = StringIO(''.join(lines))
389
ref_addresses.append(format_string %
390
key_addresses[reference])
391
flattened_references.append(b'\r'.join(ref_addresses))
392
string_key = b'\x00'.join(key)
393
lines.append(b"%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
394
b'\t'.join(flattened_references), value))
396
result = BytesIO(b''.join(lines))
326
397
if expected_bytes and len(result.getvalue()) != expected_bytes:
327
398
raise errors.BzrError('Failed index creation. Internal error:'
328
' mismatched output length and expected length: %d %d' %
329
(len(result.getvalue()), expected_bytes))
399
' mismatched output length and expected length: %d %d' %
400
(len(result.getvalue()), expected_bytes))
332
403
def set_optimize(self, for_size=None, combine_backing_indices=None):
385
456
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
386
457
"""Open an index called name on transport.
388
:param transport: A bzrlib.transport.Transport.
459
:param transport: A breezy.transport.Transport.
389
460
:param name: A path to provide to transport API calls.
390
461
:param size: The size of the index in bytes. This is used for bisection
391
462
logic to perform partial index reads. While the size could be
431
502
def __ne__(self, other):
432
503
return not self.__eq__(other)
505
def __lt__(self, other):
506
# We don't really care about the order, just that there is an order.
507
if (not isinstance(other, GraphIndex) and
508
not isinstance(other, InMemoryGraphIndex)):
509
raise TypeError(other)
510
return hash(self) < hash(other)
513
return hash((type(self), self._transport, self._name, self._size))
434
515
def __repr__(self):
435
516
return "%s(%r)" % (self.__class__.__name__,
436
self._transport.abspath(self._name))
517
self._transport.abspath(self._name))
438
519
def _buffer_all(self, stream=None):
439
520
"""Buffer all the index data.
444
525
# We already did this
446
527
if 'index' in debug.debug_flags:
447
mutter('Reading entire index %s', self._transport.abspath(self._name))
528
trace.mutter('Reading entire index %s',
529
self._transport.abspath(self._name))
448
530
if stream is None:
449
531
stream = self._transport.get(self._name)
450
532
if self._base_offset != 0:
451
533
# This is wasteful, but it is better than dealing with
452
534
# adjusting all the offsets, etc.
453
stream = StringIO(stream.read()[self._base_offset:])
454
self._read_prefix(stream)
455
self._expected_elements = 3 + self._key_length
457
# raw data keyed by offset
458
self._keys_by_offset = {}
459
# ready-to-return key:value or key:value, node_ref_lists
461
self._nodes_by_key = None
464
lines = stream.read().split('\n')
535
stream = BytesIO(stream.read()[self._base_offset:])
537
self._read_prefix(stream)
538
self._expected_elements = 3 + self._key_length
540
# raw data keyed by offset
541
self._keys_by_offset = {}
542
# ready-to-return key:value or key:value, node_ref_lists
544
self._nodes_by_key = None
547
lines = stream.read().split(b'\n')
467
551
_, _, _, trailers = self._parse_lines(lines, pos)
468
for key, absent, references, value in self._keys_by_offset.itervalues():
552
for key, absent, references, value in viewvalues(self._keys_by_offset):
471
555
# resolve references:
493
577
self._buffer_all()
494
578
if ref_list_num + 1 > self.node_ref_lists:
495
579
raise ValueError('No ref list %d, index has %d ref lists'
496
% (ref_list_num, self.node_ref_lists))
580
% (ref_list_num, self.node_ref_lists))
498
582
nodes = self._nodes
499
for key, (value, ref_lists) in nodes.iteritems():
583
for key, (value, ref_lists) in viewitems(nodes):
500
584
ref_list = ref_lists[ref_list_num]
501
585
refs.update([ref for ref in ref_list if ref not in nodes])
505
589
if self._nodes_by_key is None:
506
590
nodes_by_key = {}
507
591
if self.node_ref_lists:
508
for key, (value, references) in self._nodes.iteritems():
592
for key, (value, references) in viewitems(self._nodes):
509
593
key_dict = nodes_by_key
510
594
for subkey in key[:-1]:
511
595
key_dict = key_dict.setdefault(subkey, {})
512
596
key_dict[key[-1]] = key, value, references
514
for key, value in self._nodes.iteritems():
598
for key, value in viewitems(self._nodes):
515
599
key_dict = nodes_by_key
516
600
for subkey in key[:-1]:
517
601
key_dict = key_dict.setdefault(subkey, {})
531
615
if 'evil' in debug.debug_flags:
532
616
trace.mutter_callsite(3,
533
"iter_all_entries scales with size of history.")
617
"iter_all_entries scales with size of history.")
534
618
if self._nodes is None:
535
619
self._buffer_all()
536
620
if self.node_ref_lists:
537
for key, (value, node_ref_lists) in self._nodes.iteritems():
621
for key, (value, node_ref_lists) in viewitems(self._nodes):
538
622
yield self, key, value, node_ref_lists
540
for key, value in self._nodes.iteritems():
624
for key, value in viewitems(self._nodes):
541
625
yield self, key, value
543
627
def _read_prefix(self, stream):
544
628
signature = stream.read(len(self._signature()))
545
629
if not signature == self._signature():
546
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
630
raise BadIndexFormatSignature(self._name, GraphIndex)
547
631
options_line = stream.readline()
548
632
if not options_line.startswith(_OPTION_NODE_REFS):
549
raise errors.BadIndexOptions(self)
633
raise BadIndexOptions(self)
551
635
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
552
636
except ValueError:
553
raise errors.BadIndexOptions(self)
637
raise BadIndexOptions(self)
554
638
options_line = stream.readline()
555
639
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
556
raise errors.BadIndexOptions(self)
640
raise BadIndexOptions(self)
558
642
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
559
643
except ValueError:
560
raise errors.BadIndexOptions(self)
644
raise BadIndexOptions(self)
561
645
options_line = stream.readline()
562
646
if not options_line.startswith(_OPTION_LEN):
563
raise errors.BadIndexOptions(self)
647
raise BadIndexOptions(self)
565
649
self._key_count = int(options_line[len(_OPTION_LEN):-1])
566
650
except ValueError:
567
raise errors.BadIndexOptions(self)
651
raise BadIndexOptions(self)
569
653
def _resolve_references(self, references):
570
654
"""Return the resolved key references for references.
580
664
for ref_list in references:
581
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
666
tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
582
667
return tuple(node_refs)
584
def _find_index(self, range_map, key):
670
def _find_index(range_map, key):
585
671
"""Helper for the _parsed_*_index calls.
587
673
Given a range map - [(start, end), ...], finds the index of the range
670
756
if self._nodes is not None:
671
757
return self._iter_entries_from_total_buffer(keys)
673
return (result[1] for result in bisect_multi_bytes(
759
return (result[1] for result in bisect_multi.bisect_multi_bytes(
674
760
self._lookup_keys_via_location, self._size, keys))
676
762
def iter_entries_prefix(self, keys):
703
789
self._buffer_all()
704
790
if self._key_length == 1:
708
raise errors.BadIndexKey(key)
709
if len(key) != self._key_length:
710
raise errors.BadIndexKey(key)
792
_sanity_check_key(self, key)
711
793
if self.node_ref_lists:
712
794
value, node_refs = self._nodes[key]
713
795
yield self, key, value, node_refs
715
797
yield self, key, self._nodes[key]
717
799
nodes_by_key = self._get_nodes_by_key()
721
raise errors.BadIndexKey(key)
722
if len(key) != self._key_length:
723
raise errors.BadIndexKey(key)
724
# find what it refers to:
725
key_dict = nodes_by_key
727
# find the subdict whose contents should be returned.
729
while len(elements) and elements[0] is not None:
730
key_dict = key_dict[elements[0]]
733
# a non-existant lookup.
738
key_dict = dicts.pop(-1)
739
# can't be empty or would not exist
740
item, value = key_dict.iteritems().next()
741
if type(value) == dict:
743
dicts.extend(key_dict.itervalues())
746
for value in key_dict.itervalues():
747
# each value is the key:value:node refs tuple
749
yield (self, ) + value
751
# the last thing looked up was a terminal element
752
yield (self, ) + key_dict
800
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
754
803
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
755
804
"""See BTreeIndex._find_ancestors."""
819
868
index = self._parsed_byte_index(location)
820
869
if (len(self._parsed_byte_map) and
821
870
self._parsed_byte_map[index][0] <= location and
822
self._parsed_byte_map[index][1] > location):
871
self._parsed_byte_map[index][1] > location):
823
872
# the byte region has been parsed, so no read is needed.
837
886
# _read_and_parse triggered a _buffer_all because we requested the
838
887
# whole data range
839
888
for location, key in location_keys:
840
if key not in self._nodes: # not present
889
if key not in self._nodes: # not present
841
890
result.append(((location, key), False))
842
891
elif self.node_ref_lists:
843
892
value, refs = self._nodes[key]
844
893
result.append(((location, key),
845
(self, key, value, refs)))
894
(self, key, value, refs)))
847
896
result.append(((location, key),
848
(self, key, self._nodes[key])))
897
(self, key, self._nodes[key])))
850
899
# generate results:
851
900
# - figure out <, >, missing, present
870
919
pending_references.append((location, key))
872
921
result.append(((location, key), (self, key,
873
value, self._resolve_references(refs))))
922
value, self._resolve_references(refs))))
875
924
result.append(((location, key),
876
(self, key, self._bisect_nodes[key])))
925
(self, key, self._bisect_nodes[key])))
879
928
# has the region the key should be in, been parsed?
916
965
# answer key references we had to look-up-late.
917
966
value, refs = self._bisect_nodes[key]
918
967
result.append(((location, key), (self, key,
919
value, self._resolve_references(refs))))
968
value, self._resolve_references(refs))))
922
971
def _parse_header_from_bytes(self, bytes):
929
978
signature = bytes[0:len(self._signature())]
930
979
if not signature == self._signature():
931
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
980
raise BadIndexFormatSignature(self._name, GraphIndex)
932
981
lines = bytes[len(self._signature()):].splitlines()
933
982
options_line = lines[0]
934
983
if not options_line.startswith(_OPTION_NODE_REFS):
935
raise errors.BadIndexOptions(self)
984
raise BadIndexOptions(self)
937
986
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
938
987
except ValueError:
939
raise errors.BadIndexOptions(self)
988
raise BadIndexOptions(self)
940
989
options_line = lines[1]
941
990
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
942
raise errors.BadIndexOptions(self)
991
raise BadIndexOptions(self)
944
993
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
945
994
except ValueError:
946
raise errors.BadIndexOptions(self)
995
raise BadIndexOptions(self)
947
996
options_line = lines[2]
948
997
if not options_line.startswith(_OPTION_LEN):
949
raise errors.BadIndexOptions(self)
998
raise BadIndexOptions(self)
951
1000
self._key_count = int(options_line[len(_OPTION_LEN):])
952
1001
except ValueError:
953
raise errors.BadIndexOptions(self)
1002
raise BadIndexOptions(self)
954
1003
# calculate the bytes we have processed
955
1004
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
957
self._parsed_bytes(0, None, header_end, None)
1006
self._parsed_bytes(0, (), header_end, ())
958
1007
# setup parsing state
959
1008
self._expected_elements = 3 + self._key_length
960
1009
# raw data keyed by offset
1060
1109
if not start_adjacent:
1061
1110
# work around python bug in rfind
1062
1111
if trim_start is None:
1063
trim_start = data.find('\n') + 1
1112
trim_start = data.find(b'\n') + 1
1065
trim_start = data.find('\n', trim_start) + 1
1114
trim_start = data.find(b'\n', trim_start) + 1
1066
1115
if not (trim_start != 0):
1067
1116
raise AssertionError('no \n was present')
1068
1117
# print 'removing start', offset, trim_start, repr(data[:trim_start])
1069
1118
if not end_adjacent:
1070
1119
# work around python bug in rfind
1071
1120
if trim_end is None:
1072
trim_end = data.rfind('\n') + 1
1121
trim_end = data.rfind(b'\n') + 1
1074
trim_end = data.rfind('\n', None, trim_end) + 1
1123
trim_end = data.rfind(b'\n', None, trim_end) + 1
1075
1124
if not (trim_end != 0):
1076
1125
raise AssertionError('no \n was present')
1077
1126
# print 'removing end', offset, trim_end, repr(data[trim_end:])
1079
1128
trimmed_data = data[trim_start:trim_end]
1080
1129
if not (trimmed_data):
1081
1130
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
1082
% (trim_start, trim_end, offset, offset + len(data)))
1131
% (trim_start, trim_end, offset, offset + len(data)))
1084
1133
offset += trim_start
1085
1134
# print "parsing", repr(trimmed_data)
1086
1135
# splitlines mangles the \r delimiters.. don't use it.
1087
lines = trimmed_data.split('\n')
1136
lines = trimmed_data.split(b'\n')
1090
1139
first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
1091
1140
for key, value in nodes:
1092
1141
self._bisect_nodes[key] = value
1093
1142
self._parsed_bytes(offset, first_key,
1094
offset + len(trimmed_data), last_key)
1143
offset + len(trimmed_data), last_key)
1095
1144
return offset + len(trimmed_data), last_segment
1097
1146
def _parse_lines(self, lines, pos):
1102
1151
for line in lines:
1104
1153
# must be at the end
1106
1155
if not (self._size == pos + 1):
1107
1156
raise AssertionError("%s %s" % (self._size, pos))
1110
elements = line.split('\0')
1159
elements = line.split(b'\0')
1111
1160
if len(elements) != self._expected_elements:
1112
raise errors.BadIndexData(self)
1161
raise BadIndexData(self)
1113
1162
# keys are tuples. Each element is a string that may occur many
1114
1163
# times, so we intern them to save space. AB, RC, 200807
1115
key = tuple([intern(element) for element in elements[:self._key_length]])
1164
key = tuple([bytesintern(element)
1165
for element in elements[:self._key_length]])
1116
1166
if first_key is None:
1117
1167
first_key = key
1118
1168
absent, references, value = elements[-3:]
1120
for ref_string in references.split('\t'):
1170
for ref_string in references.split(b'\t'):
1121
1171
ref_lists.append(tuple([
1122
int(ref) for ref in ref_string.split('\r') if ref
1172
int(ref) for ref in ref_string.split(b'\r') if ref
1124
1174
ref_lists = tuple(ref_lists)
1125
1175
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
1126
pos += len(line) + 1 # +1 for the \n
1176
pos += len(line) + 1 # +1 for the \n
1129
1179
if self.node_ref_lists:
1158
1208
# combine two regions
1159
1209
if (index + 1 < len(self._parsed_byte_map) and
1160
1210
self._parsed_byte_map[index][1] == start and
1161
self._parsed_byte_map[index + 1][0] == end):
1211
self._parsed_byte_map[index + 1][0] == end):
1162
1212
# combine two regions
1163
1213
self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],
1164
self._parsed_byte_map[index + 1][1])
1214
self._parsed_byte_map[index + 1][1])
1165
1215
self._parsed_key_map[index] = (self._parsed_key_map[index][0],
1166
self._parsed_key_map[index + 1][1])
1216
self._parsed_key_map[index + 1][1])
1167
1217
del self._parsed_byte_map[index + 1]
1168
1218
del self._parsed_key_map[index + 1]
1169
1219
elif self._parsed_byte_map[index][1] == start:
1173
1223
self._parsed_key_map[index] = (
1174
1224
self._parsed_key_map[index][0], end_key)
1175
1225
elif (index + 1 < len(self._parsed_byte_map) and
1176
self._parsed_byte_map[index + 1][0] == end):
1226
self._parsed_byte_map[index + 1][0] == end):
1177
1227
# extend the higher entry
1178
1228
self._parsed_byte_map[index + 1] = (
1179
1229
start, self._parsed_byte_map[index + 1][1])
1200
1250
base_offset = self._base_offset
1201
1251
if base_offset != 0:
1202
1252
# Rewrite the ranges for the offset
1203
readv_ranges = [(start+base_offset, size)
1253
readv_ranges = [(start + base_offset, size)
1204
1254
for start, size in readv_ranges]
1205
1255
readv_data = self._transport.readv(self._name, readv_ranges, True,
1206
self._size + self._base_offset)
1256
self._size + self._base_offset)
1208
1258
for offset, data in readv_data:
1209
1259
offset -= base_offset
1217
1267
# We read the whole range, most likely because the
1218
1268
# Transport upcast our readv ranges into one long request
1219
1269
# for enough total data to grab the whole index.
1220
self._buffer_all(StringIO(data))
1270
self._buffer_all(BytesIO(data))
1222
1272
if self._bisect_nodes is None:
1223
1273
# this must be the start
1249
1299
performance significantly. For example, if one index is on local disk and a
1250
1300
second on a remote server, the local disk index should be before the other
1251
1301
in the index list.
1253
1303
Also, queries tend to need results from the same indices as previous
1254
1304
queries. So the indices will be reordered after every query to put the
1255
1305
indices that had the result(s) of that query first (while otherwise
1287
1337
def get_parent_map(self, keys):
1288
1338
"""See graph.StackedParentsProvider.get_parent_map"""
1289
1339
search_keys = set(keys)
1290
if NULL_REVISION in search_keys:
1291
search_keys.discard(NULL_REVISION)
1292
found_parents = {NULL_REVISION:[]}
1340
if _mod_revision.NULL_REVISION in search_keys:
1341
search_keys.discard(_mod_revision.NULL_REVISION)
1342
found_parents = {_mod_revision.NULL_REVISION: []}
1294
1344
found_parents = {}
1295
1345
for index, key, value, refs in self.iter_entries(search_keys):
1296
1346
parents = refs[0]
1297
1347
if not parents:
1298
parents = (NULL_REVISION,)
1348
parents = (_mod_revision.NULL_REVISION,)
1299
1349
found_parents[key] = parents
1300
1350
return found_parents
1302
has_key = _has_key_from_parent_map
1352
__contains__ = _has_key_from_parent_map
1304
1354
def insert_index(self, pos, index, name=None):
1305
1355
"""Insert a new index in the list of indices to query.
1429
1482
def _move_to_front_by_index(self, hit_indices):
1430
1483
"""Core logic for _move_to_front.
1432
1485
Returns a list of names corresponding to the hit_indices param.
1434
1487
indices_info = zip(self._index_names, self._indices)
1435
1488
if 'index' in debug.debug_flags:
1436
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1437
indices_info, hit_indices)
1489
indices_info = list(indices_info)
1490
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1491
'promoting %r', indices_info, hit_indices)
1439
1493
unhit_names = []
1440
1494
new_hit_indices = []
1447
1501
if len(new_hit_indices) == len(hit_indices):
1448
1502
# We've found all of the hit entries, everything else is
1450
unhit_names.extend(self._index_names[offset+1:])
1451
unhit_indices.extend(self._indices[offset+1:])
1504
unhit_names.extend(self._index_names[offset + 1:])
1505
unhit_indices.extend(self._indices[offset + 1:])
1454
1508
unhit_names.append(name)
1457
1511
self._indices = new_hit_indices + unhit_indices
1458
1512
self._index_names = hit_names + unhit_names
1459
1513
if 'index' in debug.debug_flags:
1460
mutter('CombinedGraphIndex reordered: %r', self._indices)
1514
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1461
1515
return hit_names
1463
1517
def _move_to_front_by_name(self, hit_names):
1516
1570
# CombinedGraphIndex does not know what the ref lists
1518
1572
search_keys = index._find_ancestors(search_keys,
1519
ref_list_num, parent_map, index_missing_keys)
1573
ref_list_num, parent_map, index_missing_keys)
1520
1574
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1521
1575
# sub_generation, len(search_keys),
1522
1576
# len(parent_map), len(index_missing_keys))
1550
1604
return sum((index.key_count() for index in self._indices), 0)
1551
except errors.NoSuchFile:
1552
self._reload_or_raise()
1605
except errors.NoSuchFile as e:
1606
if not self._try_reload(e):
1554
1609
missing_keys = _missing_keys_from_parent_map
1556
def _reload_or_raise(self):
1611
def _try_reload(self, error):
1557
1612
"""We just got a NoSuchFile exception.
1559
1614
Try to reload the indices, if it fails, just raise the current
1562
1617
if self._reload_func is None:
1564
exc_type, exc_value, exc_traceback = sys.exc_info()
1565
trace.mutter('Trying to reload after getting exception: %s',
1620
'Trying to reload after getting exception: %s', str(error))
1567
1621
if not self._reload_func():
1568
1622
# We tried to reload, but nothing changed, so we fail anyway
1569
1623
trace.mutter('_reload_func indicated nothing has changed.'
1570
1624
' Raising original exception.')
1571
raise exc_type, exc_value, exc_traceback
1573
1628
def set_sibling_indices(self, sibling_combined_graph_indices):
1574
1629
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1616
1672
if 'evil' in debug.debug_flags:
1617
1673
trace.mutter_callsite(3,
1618
"iter_all_entries scales with size of history.")
1674
"iter_all_entries scales with size of history.")
1619
1675
if self.reference_lists:
1620
for key, (absent, references, value) in self._nodes.iteritems():
1676
for key, (absent, references, value) in viewitems(self._nodes):
1622
1678
yield self, key, value, references
1624
for key, (absent, references, value) in self._nodes.iteritems():
1680
for key, (absent, references, value) in viewitems(self._nodes):
1626
1682
yield self, key, value
1665
1721
will be returned, and every match that is in the index will be
1668
# XXX: To much duplication with the GraphIndex class; consider finding
1669
# a good place to pull out the actual common logic.
1670
1724
keys = set(keys)
1673
1727
if self._key_length == 1:
1674
1728
for key in keys:
1677
raise errors.BadIndexKey(key)
1678
if len(key) != self._key_length:
1679
raise errors.BadIndexKey(key)
1729
_sanity_check_key(self, key)
1680
1730
node = self._nodes[key]
1686
1736
yield self, key, node[2]
1688
1738
nodes_by_key = self._get_nodes_by_key()
1692
raise errors.BadIndexKey(key)
1693
if len(key) != self._key_length:
1694
raise errors.BadIndexKey(key)
1695
# find what it refers to:
1696
key_dict = nodes_by_key
1697
elements = list(key)
1698
# find the subdict to return
1700
while len(elements) and elements[0] is not None:
1701
key_dict = key_dict[elements[0]]
1704
# a non-existant lookup.
1709
key_dict = dicts.pop(-1)
1710
# can't be empty or would not exist
1711
item, value = key_dict.iteritems().next()
1712
if type(value) == dict:
1714
dicts.extend(key_dict.itervalues())
1717
for value in key_dict.itervalues():
1718
yield (self, ) + value
1720
yield (self, ) + key_dict
1739
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
1722
1742
def key_count(self):
1723
1743
"""Return an estimate of the number of keys in this index.
1729
1749
def validate(self):
1730
1750
"""In memory index's have no known corruption at the moment."""
1752
def __lt__(self, other):
1753
# We don't really care about the order, just that there is an order.
1754
if (not isinstance(other, GraphIndex) and
1755
not isinstance(other, InMemoryGraphIndex)):
1756
raise TypeError(other)
1757
return hash(self) < hash(other)
1733
1760
class GraphIndexPrefixAdapter(object):
1734
1761
"""An adapter between GraphIndex with different key lengths.
1743
1770
def __init__(self, adapted, prefix, missing_key_length,
1744
add_nodes_callback=None):
1771
add_nodes_callback=None):
1745
1772
"""Construct an adapter against adapted with prefix."""
1746
1773
self.adapted = adapted
1747
self.prefix_key = prefix + (None,)*missing_key_length
1774
self.prefix_key = prefix + (None,) * missing_key_length
1748
1775
self.prefix = prefix
1749
1776
self.prefix_len = len(prefix)
1750
1777
self.add_nodes_callback = add_nodes_callback
1763
1790
for (key, value, node_refs) in nodes:
1764
1791
adjusted_references = (
1765
1792
tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
1766
for ref_list in node_refs))
1793
for ref_list in node_refs))
1767
1794
translated_nodes.append((self.prefix + key, value,
1768
adjusted_references))
1795
adjusted_references))
1769
1796
except ValueError:
1770
1797
# XXX: TODO add an explicit interface for getting the reference list
1771
1798
# status, to handle this bit of user-friendliness in the API more
1792
1819
for node in an_iter:
1794
1821
if node[1][:self.prefix_len] != self.prefix:
1795
raise errors.BadIndexData(self)
1822
raise BadIndexData(self)
1796
1823
for ref_list in node[3]:
1797
1824
for ref_node in ref_list:
1798
1825
if ref_node[:self.prefix_len] != self.prefix:
1799
raise errors.BadIndexData(self)
1826
raise BadIndexData(self)
1800
1827
yield node[0], node[1][self.prefix_len:], node[2], (
1801
1828
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
1802
for ref_list in node[3]))
1829
for ref_list in node[3]))
1804
1831
def iter_all_entries(self):
1805
1832
"""Iterate over all keys within the index
1855
1882
def validate(self):
1856
1883
"""Call the adapted's validate."""
1857
1884
self.adapted.validate()
1887
def _sanity_check_key(index_or_builder, key):
1888
"""Raise BadIndexKey if key cannot be used for prefix matching."""
1890
raise BadIndexKey(key)
1891
if len(key) != index_or_builder._key_length:
1892
raise BadIndexKey(key)
1895
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
1896
"""Helper for implementing prefix matching iterators."""
1898
_sanity_check_key(index_or_builder, key)
1899
# find what it refers to:
1900
key_dict = nodes_by_key
1901
elements = list(key)
1902
# find the subdict whose contents should be returned.
1904
while len(elements) and elements[0] is not None:
1905
key_dict = key_dict[elements[0]]
1908
# a non-existant lookup.
1913
values_view = viewvalues(dicts.pop())
1914
# can't be empty or would not exist
1915
value = next(iter(values_view))
1916
if isinstance(value, dict):
1917
# still descending, push values
1918
dicts.extend(values_view)
1920
# at leaf tuples, yield values
1921
for value in values_view:
1922
# each value is the key:value:node refs tuple
1924
yield (index_or_builder, ) + value
1926
# the last thing looked up was a terminal element
1927
yield (index_or_builder, ) + key_dict