29
27
from bisect import bisect_right
28
from cStringIO import StringIO
32
from ..lazy_import import lazy_import
32
from bzrlib.lazy_import import lazy_import
33
33
lazy_import(globals(), """
36
revision as _mod_revision,
34
from bzrlib import trace
35
from bzrlib.bisect_multi import bisect_multi_bytes
36
from bzrlib.revision import NULL_REVISION
37
from bzrlib.trace import mutter
44
from ..sixish import (
51
from ..static_tuple import StaticTuple
43
from bzrlib.static_tuple import StaticTuple
53
45
_HEADER_READV = (0, 200)
54
_OPTION_KEY_ELEMENTS = b"key_elements="
56
_OPTION_NODE_REFS = b"node_ref_lists="
57
_SIGNATURE = b"Bazaar Graph Index 1\n"
60
class BadIndexFormatSignature(errors.BzrError):
62
_fmt = "%(value)s is not an index of type %(_type)s."
64
def __init__(self, value, _type):
65
errors.BzrError.__init__(self)
70
class BadIndexData(errors.BzrError):
72
_fmt = "Error in data for index %(value)s."
74
def __init__(self, value):
75
errors.BzrError.__init__(self)
79
class BadIndexDuplicateKey(errors.BzrError):
81
_fmt = "The key '%(key)s' is already in index '%(index)s'."
83
def __init__(self, key, index):
84
errors.BzrError.__init__(self)
89
class BadIndexKey(errors.BzrError):
91
_fmt = "The key '%(key)s' is not a valid key."
93
def __init__(self, key):
94
errors.BzrError.__init__(self)
98
class BadIndexOptions(errors.BzrError):
100
_fmt = "Could not parse options for index %(value)s."
102
def __init__(self, value):
103
errors.BzrError.__init__(self)
107
class BadIndexValue(errors.BzrError):
109
_fmt = "The value '%(value)s' is not a valid value."
111
def __init__(self, value):
112
errors.BzrError.__init__(self)
116
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r\x00 ]')
117
_newline_null_re = re.compile(b'[\n\0]')
46
_OPTION_KEY_ELEMENTS = "key_elements="
48
_OPTION_NODE_REFS = "node_ref_lists="
49
_SIGNATURE = "Bazaar Graph Index 1\n"
52
_whitespace_re = re.compile('[\t\n\x0b\x0c\r\x00 ]')
53
_newline_null_re = re.compile('[\n\0]')
120
56
def _has_key_from_parent_map(self, key):
133
69
class GraphIndexBuilder(object):
134
70
"""A builder that can build a GraphIndex.
136
The resulting graph has the structure::
72
The resulting graph has the structure:
138
_SIGNATURE OPTIONS NODES NEWLINE
139
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
140
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
142
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
143
KEY := Not-whitespace-utf8
145
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
146
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
147
REFERENCE := DIGITS ; digits is the byte offset in the index of the
149
VALUE := no-newline-no-null-bytes
74
_SIGNATURE OPTIONS NODES NEWLINE
75
_SIGNATURE := 'Bazaar Graph Index 1' NEWLINE
76
OPTIONS := 'node_ref_lists=' DIGITS NEWLINE
78
NODE := KEY NULL ABSENT? NULL REFERENCES NULL VALUE NEWLINE
79
KEY := Not-whitespace-utf8
81
REFERENCES := REFERENCE_LIST (TAB REFERENCE_LIST){node_ref_lists - 1}
82
REFERENCE_LIST := (REFERENCE (CR REFERENCE)*)?
83
REFERENCE := DIGITS ; digits is the byte offset in the index of the
85
VALUE := no-newline-no-null-bytes
152
88
def __init__(self, reference_lists=0, key_elements=1):
169
105
def _check_key(self, key):
170
106
"""Raise BadIndexKey if key is not a valid key for this index."""
171
107
if type(key) not in (tuple, StaticTuple):
172
raise BadIndexKey(key)
108
raise errors.BadIndexKey(key)
173
109
if self._key_length != len(key):
174
raise BadIndexKey(key)
110
raise errors.BadIndexKey(key)
175
111
for element in key:
176
if not element or type(element) != bytes or _whitespace_re.search(element) is not None:
177
raise BadIndexKey(key)
112
if not element or _whitespace_re.search(element) is not None:
113
raise errors.BadIndexKey(element)
179
115
def _external_references(self):
180
116
"""Return references that are not present in this index.
210
146
key_dict = key_dict.setdefault(subkey, {})
211
147
key_dict[key[-1]] = key, value, references
213
for key, (absent, references, value) in viewitems(self._nodes):
149
for key, (absent, references, value) in self._nodes.iteritems():
216
152
key_dict = nodes_by_key
248
184
:param value: The value associate with this key. Must not contain
249
185
newlines or null characters.
250
186
:return: (node_refs, absent_references)
252
* node_refs: basically a packed form of 'references' where all
254
* absent_references: reference keys that are not in self._nodes.
255
This may contain duplicates if the same key is referenced in
187
node_refs basically a packed form of 'references' where all
189
absent_references reference keys that are not in self._nodes.
190
This may contain duplicates if the same key is
191
referenced in multiple lists.
258
193
as_st = StaticTuple.from_sequence
259
194
self._check_key(key)
260
195
if _newline_null_re.search(value) is not None:
261
raise BadIndexValue(value)
196
raise errors.BadIndexValue(value)
262
197
if len(references) != self.reference_lists:
263
raise BadIndexValue(references)
198
raise errors.BadIndexValue(references)
265
200
absent_references = []
266
201
for reference_list in references:
284
219
:param references: An iterable of iterables of keys. Each is a
285
220
reference to another key.
286
221
:param value: The value to associate with the key. It may be any
287
bytes as long as it does not contain \\0 or \\n.
222
bytes as long as it does not contain \0 or \n.
290
225
absent_references) = self._check_key_ref_value(key, references, value)
291
if key in self._nodes and self._nodes[key][0] != b'a':
292
raise BadIndexDuplicateKey(key, self)
226
if key in self._nodes and self._nodes[key][0] != 'a':
227
raise errors.BadIndexDuplicateKey(key, self)
293
228
for reference in absent_references:
294
229
# There may be duplicates, but I don't think it is worth worrying
296
self._nodes[reference] = (b'a', (), b'')
231
self._nodes[reference] = ('a', (), '')
297
232
self._absent_keys.update(absent_references)
298
233
self._absent_keys.discard(key)
299
self._nodes[key] = (b'', node_refs, value)
234
self._nodes[key] = ('', node_refs, value)
300
235
if self._nodes_by_key is not None and self._key_length > 1:
301
236
self._update_nodes_by_key(key, value, node_refs)
306
241
This is a no-op, but we need the api to conform to a generic 'Index'
310
245
def finish(self):
313
:returns: cBytesIO holding the full context of the index as it
314
should be written to disk.
316
246
lines = [_SIGNATURE]
317
lines.append(b'%s%d\n' % (_OPTION_NODE_REFS, self.reference_lists))
318
lines.append(b'%s%d\n' % (_OPTION_KEY_ELEMENTS, self._key_length))
247
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
248
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
319
249
key_count = len(self._nodes) - len(self._absent_keys)
320
lines.append(b'%s%d\n' % (_OPTION_LEN, key_count))
250
lines.append(_OPTION_LEN + str(key_count) + '\n')
321
251
prefix_length = sum(len(x) for x in lines)
322
252
# references are byte offsets. To avoid having to do nasty
323
253
# polynomial work to resolve offsets (references to later in the
334
264
# forward sorted by key. In future we may consider topological sorting,
335
265
# at the cost of table scans for direct lookup, or a second index for
337
nodes = sorted(viewitems(self._nodes))
267
nodes = sorted(self._nodes.items())
338
268
# if we do not prepass, we don't know how long it will be up front.
339
269
expected_bytes = None
340
270
# we only need to pre-pass if we have reference lists at all.
370
300
non_ref_bytes += len(ref_list) - 1
371
301
# how many digits are needed to represent the total byte count?
373
possible_total_bytes = non_ref_bytes + total_references * digits
303
possible_total_bytes = non_ref_bytes + total_references*digits
374
304
while 10 ** digits < possible_total_bytes:
376
possible_total_bytes = non_ref_bytes + total_references * digits
377
expected_bytes = possible_total_bytes + 1 # terminating newline
306
possible_total_bytes = non_ref_bytes + total_references*digits
307
expected_bytes = possible_total_bytes + 1 # terminating newline
378
308
# resolve key addresses.
379
309
key_addresses = {}
380
310
for key, non_ref_bytes, total_references in key_offset_info:
381
key_addresses[key] = non_ref_bytes + total_references * digits
311
key_addresses[key] = non_ref_bytes + total_references*digits
383
format_string = b'%%0%dd' % digits
313
format_string = '%%0%sd' % digits
384
314
for key, (absent, references, value) in nodes:
385
315
flattened_references = []
386
316
for ref_list in references:
387
317
ref_addresses = []
388
318
for reference in ref_list:
389
ref_addresses.append(format_string %
390
key_addresses[reference])
391
flattened_references.append(b'\r'.join(ref_addresses))
392
string_key = b'\x00'.join(key)
393
lines.append(b"%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
394
b'\t'.join(flattened_references), value))
396
result = BytesIO(b''.join(lines))
319
ref_addresses.append(format_string % key_addresses[reference])
320
flattened_references.append('\r'.join(ref_addresses))
321
string_key = '\x00'.join(key)
322
lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
323
'\t'.join(flattened_references), value))
325
result = StringIO(''.join(lines))
397
326
if expected_bytes and len(result.getvalue()) != expected_bytes:
398
327
raise errors.BzrError('Failed index creation. Internal error:'
399
' mismatched output length and expected length: %d %d' %
400
(len(result.getvalue()), expected_bytes))
328
' mismatched output length and expected length: %d %d' %
329
(len(result.getvalue()), expected_bytes))
403
332
def set_optimize(self, for_size=None, combine_backing_indices=None):
456
385
def __init__(self, transport, name, size, unlimited_cache=False, offset=0):
457
386
"""Open an index called name on transport.
459
:param transport: A breezy.transport.Transport.
388
:param transport: A bzrlib.transport.Transport.
460
389
:param name: A path to provide to transport API calls.
461
390
:param size: The size of the index in bytes. This is used for bisection
462
391
logic to perform partial index reads. While the size could be
502
431
def __ne__(self, other):
503
432
return not self.__eq__(other)
505
def __lt__(self, other):
506
# We don't really care about the order, just that there is an order.
507
if (not isinstance(other, GraphIndex) and
508
not isinstance(other, InMemoryGraphIndex)):
509
raise TypeError(other)
510
return hash(self) < hash(other)
513
return hash((type(self), self._transport, self._name, self._size))
515
434
def __repr__(self):
516
435
return "%s(%r)" % (self.__class__.__name__,
517
self._transport.abspath(self._name))
436
self._transport.abspath(self._name))
519
438
def _buffer_all(self, stream=None):
520
439
"""Buffer all the index data.
525
444
# We already did this
527
446
if 'index' in debug.debug_flags:
528
trace.mutter('Reading entire index %s',
529
self._transport.abspath(self._name))
447
mutter('Reading entire index %s', self._transport.abspath(self._name))
530
448
if stream is None:
531
449
stream = self._transport.get(self._name)
532
450
if self._base_offset != 0:
533
451
# This is wasteful, but it is better than dealing with
534
452
# adjusting all the offsets, etc.
535
stream = BytesIO(stream.read()[self._base_offset:])
537
self._read_prefix(stream)
538
self._expected_elements = 3 + self._key_length
540
# raw data keyed by offset
541
self._keys_by_offset = {}
542
# ready-to-return key:value or key:value, node_ref_lists
544
self._nodes_by_key = None
547
lines = stream.read().split(b'\n')
453
stream = StringIO(stream.read()[self._base_offset:])
454
self._read_prefix(stream)
455
self._expected_elements = 3 + self._key_length
457
# raw data keyed by offset
458
self._keys_by_offset = {}
459
# ready-to-return key:value or key:value, node_ref_lists
461
self._nodes_by_key = None
464
lines = stream.read().split('\n')
465
# GZ 2009-09-20: Should really use a try/finally block to ensure close
551
468
_, _, _, trailers = self._parse_lines(lines, pos)
552
for key, absent, references, value in viewvalues(self._keys_by_offset):
469
for key, absent, references, value in self._keys_by_offset.itervalues():
555
472
# resolve references:
577
494
self._buffer_all()
578
495
if ref_list_num + 1 > self.node_ref_lists:
579
496
raise ValueError('No ref list %d, index has %d ref lists'
580
% (ref_list_num, self.node_ref_lists))
497
% (ref_list_num, self.node_ref_lists))
582
499
nodes = self._nodes
583
for key, (value, ref_lists) in viewitems(nodes):
500
for key, (value, ref_lists) in nodes.iteritems():
584
501
ref_list = ref_lists[ref_list_num]
585
502
refs.update([ref for ref in ref_list if ref not in nodes])
589
506
if self._nodes_by_key is None:
590
507
nodes_by_key = {}
591
508
if self.node_ref_lists:
592
for key, (value, references) in viewitems(self._nodes):
509
for key, (value, references) in self._nodes.iteritems():
593
510
key_dict = nodes_by_key
594
511
for subkey in key[:-1]:
595
512
key_dict = key_dict.setdefault(subkey, {})
596
513
key_dict[key[-1]] = key, value, references
598
for key, value in viewitems(self._nodes):
515
for key, value in self._nodes.iteritems():
599
516
key_dict = nodes_by_key
600
517
for subkey in key[:-1]:
601
518
key_dict = key_dict.setdefault(subkey, {})
615
532
if 'evil' in debug.debug_flags:
616
533
trace.mutter_callsite(3,
617
"iter_all_entries scales with size of history.")
534
"iter_all_entries scales with size of history.")
618
535
if self._nodes is None:
619
536
self._buffer_all()
620
537
if self.node_ref_lists:
621
for key, (value, node_ref_lists) in viewitems(self._nodes):
538
for key, (value, node_ref_lists) in self._nodes.iteritems():
622
539
yield self, key, value, node_ref_lists
624
for key, value in viewitems(self._nodes):
541
for key, value in self._nodes.iteritems():
625
542
yield self, key, value
627
544
def _read_prefix(self, stream):
628
545
signature = stream.read(len(self._signature()))
629
546
if not signature == self._signature():
630
raise BadIndexFormatSignature(self._name, GraphIndex)
547
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
631
548
options_line = stream.readline()
632
549
if not options_line.startswith(_OPTION_NODE_REFS):
633
raise BadIndexOptions(self)
550
raise errors.BadIndexOptions(self)
635
552
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])
636
553
except ValueError:
637
raise BadIndexOptions(self)
554
raise errors.BadIndexOptions(self)
638
555
options_line = stream.readline()
639
556
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
640
raise BadIndexOptions(self)
557
raise errors.BadIndexOptions(self)
642
559
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])
643
560
except ValueError:
644
raise BadIndexOptions(self)
561
raise errors.BadIndexOptions(self)
645
562
options_line = stream.readline()
646
563
if not options_line.startswith(_OPTION_LEN):
647
raise BadIndexOptions(self)
564
raise errors.BadIndexOptions(self)
649
566
self._key_count = int(options_line[len(_OPTION_LEN):-1])
650
567
except ValueError:
651
raise BadIndexOptions(self)
568
raise errors.BadIndexOptions(self)
653
570
def _resolve_references(self, references):
654
571
"""Return the resolved key references for references.
664
581
for ref_list in references:
666
tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
582
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
667
583
return tuple(node_refs)
670
def _find_index(range_map, key):
585
def _find_index(self, range_map, key):
671
586
"""Helper for the _parsed_*_index calls.
673
588
Given a range map - [(start, end), ...], finds the index of the range
756
671
if self._nodes is not None:
757
672
return self._iter_entries_from_total_buffer(keys)
759
return (result[1] for result in bisect_multi.bisect_multi_bytes(
674
return (result[1] for result in bisect_multi_bytes(
760
675
self._lookup_keys_via_location, self._size, keys))
762
677
def iter_entries_prefix(self, keys):
789
704
self._buffer_all()
790
705
if self._key_length == 1:
792
_sanity_check_key(self, key)
709
raise errors.BadIndexKey(key)
710
if len(key) != self._key_length:
711
raise errors.BadIndexKey(key)
793
712
if self.node_ref_lists:
794
713
value, node_refs = self._nodes[key]
795
714
yield self, key, value, node_refs
797
716
yield self, key, self._nodes[key]
799
718
nodes_by_key = self._get_nodes_by_key()
800
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
722
raise errors.BadIndexKey(key)
723
if len(key) != self._key_length:
724
raise errors.BadIndexKey(key)
725
# find what it refers to:
726
key_dict = nodes_by_key
728
# find the subdict whose contents should be returned.
730
while len(elements) and elements[0] is not None:
731
key_dict = key_dict[elements[0]]
734
# a non-existant lookup.
739
key_dict = dicts.pop(-1)
740
# can't be empty or would not exist
741
item, value = key_dict.iteritems().next()
742
if type(value) == dict:
744
dicts.extend(key_dict.itervalues())
747
for value in key_dict.itervalues():
748
# each value is the key:value:node refs tuple
750
yield (self, ) + value
752
# the last thing looked up was a terminal element
753
yield (self, ) + key_dict
803
755
def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):
804
756
"""See BTreeIndex._find_ancestors."""
868
820
index = self._parsed_byte_index(location)
869
821
if (len(self._parsed_byte_map) and
870
822
self._parsed_byte_map[index][0] <= location and
871
self._parsed_byte_map[index][1] > location):
823
self._parsed_byte_map[index][1] > location):
872
824
# the byte region has been parsed, so no read is needed.
886
838
# _read_and_parse triggered a _buffer_all because we requested the
887
839
# whole data range
888
840
for location, key in location_keys:
889
if key not in self._nodes: # not present
841
if key not in self._nodes: # not present
890
842
result.append(((location, key), False))
891
843
elif self.node_ref_lists:
892
844
value, refs = self._nodes[key]
893
845
result.append(((location, key),
894
(self, key, value, refs)))
846
(self, key, value, refs)))
896
848
result.append(((location, key),
897
(self, key, self._nodes[key])))
849
(self, key, self._nodes[key])))
899
851
# generate results:
900
852
# - figure out <, >, missing, present
919
871
pending_references.append((location, key))
921
873
result.append(((location, key), (self, key,
922
value, self._resolve_references(refs))))
874
value, self._resolve_references(refs))))
924
876
result.append(((location, key),
925
(self, key, self._bisect_nodes[key])))
877
(self, key, self._bisect_nodes[key])))
928
880
# has the region the key should be in, been parsed?
965
917
# answer key references we had to look-up-late.
966
918
value, refs = self._bisect_nodes[key]
967
919
result.append(((location, key), (self, key,
968
value, self._resolve_references(refs))))
920
value, self._resolve_references(refs))))
971
923
def _parse_header_from_bytes(self, bytes):
978
930
signature = bytes[0:len(self._signature())]
979
931
if not signature == self._signature():
980
raise BadIndexFormatSignature(self._name, GraphIndex)
932
raise errors.BadIndexFormatSignature(self._name, GraphIndex)
981
933
lines = bytes[len(self._signature()):].splitlines()
982
934
options_line = lines[0]
983
935
if not options_line.startswith(_OPTION_NODE_REFS):
984
raise BadIndexOptions(self)
936
raise errors.BadIndexOptions(self)
986
938
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
987
939
except ValueError:
988
raise BadIndexOptions(self)
940
raise errors.BadIndexOptions(self)
989
941
options_line = lines[1]
990
942
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
991
raise BadIndexOptions(self)
943
raise errors.BadIndexOptions(self)
993
945
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
994
946
except ValueError:
995
raise BadIndexOptions(self)
947
raise errors.BadIndexOptions(self)
996
948
options_line = lines[2]
997
949
if not options_line.startswith(_OPTION_LEN):
998
raise BadIndexOptions(self)
950
raise errors.BadIndexOptions(self)
1000
952
self._key_count = int(options_line[len(_OPTION_LEN):])
1001
953
except ValueError:
1002
raise BadIndexOptions(self)
954
raise errors.BadIndexOptions(self)
1003
955
# calculate the bytes we have processed
1004
956
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
1006
self._parsed_bytes(0, (), header_end, ())
958
self._parsed_bytes(0, None, header_end, None)
1007
959
# setup parsing state
1008
960
self._expected_elements = 3 + self._key_length
1009
961
# raw data keyed by offset
1109
1061
if not start_adjacent:
1110
1062
# work around python bug in rfind
1111
1063
if trim_start is None:
1112
trim_start = data.find(b'\n') + 1
1064
trim_start = data.find('\n') + 1
1114
trim_start = data.find(b'\n', trim_start) + 1
1066
trim_start = data.find('\n', trim_start) + 1
1115
1067
if not (trim_start != 0):
1116
1068
raise AssertionError('no \n was present')
1117
1069
# print 'removing start', offset, trim_start, repr(data[:trim_start])
1118
1070
if not end_adjacent:
1119
1071
# work around python bug in rfind
1120
1072
if trim_end is None:
1121
trim_end = data.rfind(b'\n') + 1
1073
trim_end = data.rfind('\n') + 1
1123
trim_end = data.rfind(b'\n', None, trim_end) + 1
1075
trim_end = data.rfind('\n', None, trim_end) + 1
1124
1076
if not (trim_end != 0):
1125
1077
raise AssertionError('no \n was present')
1126
1078
# print 'removing end', offset, trim_end, repr(data[trim_end:])
1128
1080
trimmed_data = data[trim_start:trim_end]
1129
1081
if not (trimmed_data):
1130
1082
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
1131
% (trim_start, trim_end, offset, offset + len(data)))
1083
% (trim_start, trim_end, offset, offset + len(data)))
1133
1085
offset += trim_start
1134
1086
# print "parsing", repr(trimmed_data)
1135
1087
# splitlines mangles the \r delimiters.. don't use it.
1136
lines = trimmed_data.split(b'\n')
1088
lines = trimmed_data.split('\n')
1139
1091
first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
1140
1092
for key, value in nodes:
1141
1093
self._bisect_nodes[key] = value
1142
1094
self._parsed_bytes(offset, first_key,
1143
offset + len(trimmed_data), last_key)
1095
offset + len(trimmed_data), last_key)
1144
1096
return offset + len(trimmed_data), last_segment
1146
1098
def _parse_lines(self, lines, pos):
1151
1103
for line in lines:
1153
1105
# must be at the end
1155
1107
if not (self._size == pos + 1):
1156
1108
raise AssertionError("%s %s" % (self._size, pos))
1159
elements = line.split(b'\0')
1111
elements = line.split('\0')
1160
1112
if len(elements) != self._expected_elements:
1161
raise BadIndexData(self)
1113
raise errors.BadIndexData(self)
1162
1114
# keys are tuples. Each element is a string that may occur many
1163
1115
# times, so we intern them to save space. AB, RC, 200807
1164
key = tuple([bytesintern(element)
1165
for element in elements[:self._key_length]])
1116
key = tuple([intern(element) for element in elements[:self._key_length]])
1166
1117
if first_key is None:
1167
1118
first_key = key
1168
1119
absent, references, value = elements[-3:]
1170
for ref_string in references.split(b'\t'):
1121
for ref_string in references.split('\t'):
1171
1122
ref_lists.append(tuple([
1172
int(ref) for ref in ref_string.split(b'\r') if ref
1123
int(ref) for ref in ref_string.split('\r') if ref
1174
1125
ref_lists = tuple(ref_lists)
1175
1126
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
1176
pos += len(line) + 1 # +1 for the \n
1127
pos += len(line) + 1 # +1 for the \n
1179
1130
if self.node_ref_lists:
1208
1159
# combine two regions
1209
1160
if (index + 1 < len(self._parsed_byte_map) and
1210
1161
self._parsed_byte_map[index][1] == start and
1211
self._parsed_byte_map[index + 1][0] == end):
1162
self._parsed_byte_map[index + 1][0] == end):
1212
1163
# combine two regions
1213
1164
self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],
1214
self._parsed_byte_map[index + 1][1])
1165
self._parsed_byte_map[index + 1][1])
1215
1166
self._parsed_key_map[index] = (self._parsed_key_map[index][0],
1216
self._parsed_key_map[index + 1][1])
1167
self._parsed_key_map[index + 1][1])
1217
1168
del self._parsed_byte_map[index + 1]
1218
1169
del self._parsed_key_map[index + 1]
1219
1170
elif self._parsed_byte_map[index][1] == start:
1223
1174
self._parsed_key_map[index] = (
1224
1175
self._parsed_key_map[index][0], end_key)
1225
1176
elif (index + 1 < len(self._parsed_byte_map) and
1226
self._parsed_byte_map[index + 1][0] == end):
1177
self._parsed_byte_map[index + 1][0] == end):
1227
1178
# extend the higher entry
1228
1179
self._parsed_byte_map[index + 1] = (
1229
1180
start, self._parsed_byte_map[index + 1][1])
1250
1201
base_offset = self._base_offset
1251
1202
if base_offset != 0:
1252
1203
# Rewrite the ranges for the offset
1253
readv_ranges = [(start + base_offset, size)
1204
readv_ranges = [(start+base_offset, size)
1254
1205
for start, size in readv_ranges]
1255
1206
readv_data = self._transport.readv(self._name, readv_ranges, True,
1256
self._size + self._base_offset)
1207
self._size + self._base_offset)
1258
1209
for offset, data in readv_data:
1259
1210
offset -= base_offset
1267
1218
# We read the whole range, most likely because the
1268
1219
# Transport upcast our readv ranges into one long request
1269
1220
# for enough total data to grab the whole index.
1270
self._buffer_all(BytesIO(data))
1221
self._buffer_all(StringIO(data))
1272
1223
if self._bisect_nodes is None:
1273
1224
# this must be the start
1299
1250
performance significantly. For example, if one index is on local disk and a
1300
1251
second on a remote server, the local disk index should be before the other
1301
1252
in the index list.
1303
1254
Also, queries tend to need results from the same indices as previous
1304
1255
queries. So the indices will be reordered after every query to put the
1305
1256
indices that had the result(s) of that query first (while otherwise
1337
1288
def get_parent_map(self, keys):
1338
1289
"""See graph.StackedParentsProvider.get_parent_map"""
1339
1290
search_keys = set(keys)
1340
if _mod_revision.NULL_REVISION in search_keys:
1341
search_keys.discard(_mod_revision.NULL_REVISION)
1342
found_parents = {_mod_revision.NULL_REVISION: []}
1291
if NULL_REVISION in search_keys:
1292
search_keys.discard(NULL_REVISION)
1293
found_parents = {NULL_REVISION:[]}
1344
1295
found_parents = {}
1345
1296
for index, key, value, refs in self.iter_entries(search_keys):
1346
1297
parents = refs[0]
1347
1298
if not parents:
1348
parents = (_mod_revision.NULL_REVISION,)
1299
parents = (NULL_REVISION,)
1349
1300
found_parents[key] = parents
1350
1301
return found_parents
1352
__contains__ = _has_key_from_parent_map
1303
has_key = _has_key_from_parent_map
1354
1305
def insert_index(self, pos, index, name=None):
1355
1306
"""Insert a new index in the list of indices to query.
1482
1430
def _move_to_front_by_index(self, hit_indices):
1483
1431
"""Core logic for _move_to_front.
1485
1433
Returns a list of names corresponding to the hit_indices param.
1487
1435
indices_info = zip(self._index_names, self._indices)
1488
1436
if 'index' in debug.debug_flags:
1489
indices_info = list(indices_info)
1490
trace.mutter('CombinedGraphIndex reordering: currently %r, '
1491
'promoting %r', indices_info, hit_indices)
1437
mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
1438
indices_info, hit_indices)
1493
1440
unhit_names = []
1494
1441
new_hit_indices = []
1501
1448
if len(new_hit_indices) == len(hit_indices):
1502
1449
# We've found all of the hit entries, everything else is
1504
unhit_names.extend(self._index_names[offset + 1:])
1505
unhit_indices.extend(self._indices[offset + 1:])
1451
unhit_names.extend(self._index_names[offset+1:])
1452
unhit_indices.extend(self._indices[offset+1:])
1508
1455
unhit_names.append(name)
1511
1458
self._indices = new_hit_indices + unhit_indices
1512
1459
self._index_names = hit_names + unhit_names
1513
1460
if 'index' in debug.debug_flags:
1514
trace.mutter('CombinedGraphIndex reordered: %r', self._indices)
1461
mutter('CombinedGraphIndex reordered: %r', self._indices)
1515
1462
return hit_names
1517
1464
def _move_to_front_by_name(self, hit_names):
1570
1517
# CombinedGraphIndex does not know what the ref lists
1572
1519
search_keys = index._find_ancestors(search_keys,
1573
ref_list_num, parent_map, index_missing_keys)
1520
ref_list_num, parent_map, index_missing_keys)
1574
1521
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1575
1522
# sub_generation, len(search_keys),
1576
1523
# len(parent_map), len(index_missing_keys))
1604
1551
return sum((index.key_count() for index in self._indices), 0)
1605
except errors.NoSuchFile as e:
1606
if not self._try_reload(e):
1552
except errors.NoSuchFile:
1553
self._reload_or_raise()
1609
1555
missing_keys = _missing_keys_from_parent_map
1611
def _try_reload(self, error):
1557
def _reload_or_raise(self):
1612
1558
"""We just got a NoSuchFile exception.
1614
1560
Try to reload the indices, if it fails, just raise the current
1617
1563
if self._reload_func is None:
1620
'Trying to reload after getting exception: %s', str(error))
1565
exc_type, exc_value, exc_traceback = sys.exc_info()
1566
trace.mutter('Trying to reload after getting exception: %s',
1621
1568
if not self._reload_func():
1622
1569
# We tried to reload, but nothing changed, so we fail anyway
1623
1570
trace.mutter('_reload_func indicated nothing has changed.'
1624
1571
' Raising original exception.')
1572
raise exc_type, exc_value, exc_traceback
1628
1574
def set_sibling_indices(self, sibling_combined_graph_indices):
1629
1575
"""Set the CombinedGraphIndex objects to reorder after reordering self.
1672
1617
if 'evil' in debug.debug_flags:
1673
1618
trace.mutter_callsite(3,
1674
"iter_all_entries scales with size of history.")
1619
"iter_all_entries scales with size of history.")
1675
1620
if self.reference_lists:
1676
for key, (absent, references, value) in viewitems(self._nodes):
1621
for key, (absent, references, value) in self._nodes.iteritems():
1678
1623
yield self, key, value, references
1680
for key, (absent, references, value) in viewitems(self._nodes):
1625
for key, (absent, references, value) in self._nodes.iteritems():
1682
1627
yield self, key, value
1721
1666
will be returned, and every match that is in the index will be
1669
# XXX: To much duplication with the GraphIndex class; consider finding
1670
# a good place to pull out the actual common logic.
1724
1671
keys = set(keys)
1727
1674
if self._key_length == 1:
1728
1675
for key in keys:
1729
_sanity_check_key(self, key)
1678
raise errors.BadIndexKey(key)
1679
if len(key) != self._key_length:
1680
raise errors.BadIndexKey(key)
1730
1681
node = self._nodes[key]
1736
1687
yield self, key, node[2]
1738
1689
nodes_by_key = self._get_nodes_by_key()
1739
for entry in _iter_entries_prefix(self, nodes_by_key, keys):
1693
raise errors.BadIndexKey(key)
1694
if len(key) != self._key_length:
1695
raise errors.BadIndexKey(key)
1696
# find what it refers to:
1697
key_dict = nodes_by_key
1698
elements = list(key)
1699
# find the subdict to return
1701
while len(elements) and elements[0] is not None:
1702
key_dict = key_dict[elements[0]]
1705
# a non-existant lookup.
1710
key_dict = dicts.pop(-1)
1711
# can't be empty or would not exist
1712
item, value = key_dict.iteritems().next()
1713
if type(value) == dict:
1715
dicts.extend(key_dict.itervalues())
1718
for value in key_dict.itervalues():
1719
yield (self, ) + value
1721
yield (self, ) + key_dict
1742
1723
def key_count(self):
1743
1724
"""Return an estimate of the number of keys in this index.
1749
1730
def validate(self):
1750
1731
"""In memory index's have no known corruption at the moment."""
1752
def __lt__(self, other):
1753
# We don't really care about the order, just that there is an order.
1754
if (not isinstance(other, GraphIndex) and
1755
not isinstance(other, InMemoryGraphIndex)):
1756
raise TypeError(other)
1757
return hash(self) < hash(other)
1760
1734
class GraphIndexPrefixAdapter(object):
1761
1735
"""An adapter between GraphIndex with different key lengths.
1770
1744
def __init__(self, adapted, prefix, missing_key_length,
1771
add_nodes_callback=None):
1745
add_nodes_callback=None):
1772
1746
"""Construct an adapter against adapted with prefix."""
1773
1747
self.adapted = adapted
1774
self.prefix_key = prefix + (None,) * missing_key_length
1748
self.prefix_key = prefix + (None,)*missing_key_length
1775
1749
self.prefix = prefix
1776
1750
self.prefix_len = len(prefix)
1777
1751
self.add_nodes_callback = add_nodes_callback
1790
1764
for (key, value, node_refs) in nodes:
1791
1765
adjusted_references = (
1792
1766
tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
1793
for ref_list in node_refs))
1767
for ref_list in node_refs))
1794
1768
translated_nodes.append((self.prefix + key, value,
1795
adjusted_references))
1769
adjusted_references))
1796
1770
except ValueError:
1797
1771
# XXX: TODO add an explicit interface for getting the reference list
1798
1772
# status, to handle this bit of user-friendliness in the API more
1819
1793
for node in an_iter:
1821
1795
if node[1][:self.prefix_len] != self.prefix:
1822
raise BadIndexData(self)
1796
raise errors.BadIndexData(self)
1823
1797
for ref_list in node[3]:
1824
1798
for ref_node in ref_list:
1825
1799
if ref_node[:self.prefix_len] != self.prefix:
1826
raise BadIndexData(self)
1800
raise errors.BadIndexData(self)
1827
1801
yield node[0], node[1][self.prefix_len:], node[2], (
1828
1802
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
1829
for ref_list in node[3]))
1803
for ref_list in node[3]))
1831
1805
def iter_all_entries(self):
1832
1806
"""Iterate over all keys within the index
1882
1856
def validate(self):
1883
1857
"""Call the adapted's validate."""
1884
1858
self.adapted.validate()
1887
def _sanity_check_key(index_or_builder, key):
1888
"""Raise BadIndexKey if key cannot be used for prefix matching."""
1890
raise BadIndexKey(key)
1891
if len(key) != index_or_builder._key_length:
1892
raise BadIndexKey(key)
1895
def _iter_entries_prefix(index_or_builder, nodes_by_key, keys):
1896
"""Helper for implementing prefix matching iterators."""
1898
_sanity_check_key(index_or_builder, key)
1899
# find what it refers to:
1900
key_dict = nodes_by_key
1901
elements = list(key)
1902
# find the subdict whose contents should be returned.
1904
while len(elements) and elements[0] is not None:
1905
key_dict = key_dict[elements[0]]
1908
# a non-existant lookup.
1913
values_view = viewvalues(dicts.pop())
1914
# can't be empty or would not exist
1915
value = next(iter(values_view))
1916
if isinstance(value, dict):
1917
# still descending, push values
1918
dicts.extend(values_view)
1920
# at leaf tuples, yield values
1921
for value in values_view:
1922
# each value is the key:value:node refs tuple
1924
yield (index_or_builder, ) + value
1926
# the last thing looked up was a terminal element
1927
yield (index_or_builder, ) + key_dict