171
165
if self._key_length != len(key):
172
166
raise BadIndexKey(key)
173
167
for element in key:
174
if not element or _whitespace_re.search(element) is not None:
175
raise BadIndexKey(element)
168
if not element or type(element) != bytes or _whitespace_re.search(element) is not None:
169
raise BadIndexKey(key)
177
171
def _external_references(self):
178
172
"""Return references that are not present in this index.
200
194
if self._nodes_by_key is None:
201
195
nodes_by_key = {}
202
196
if self.reference_lists:
203
for key, (absent, references, value) in viewitems(self._nodes):
197
for key, (absent, references, value) in self._nodes.items():
206
200
key_dict = nodes_by_key
208
202
key_dict = key_dict.setdefault(subkey, {})
209
203
key_dict[key[-1]] = key, value, references
211
for key, (absent, references, value) in viewitems(self._nodes):
205
for key, (absent, references, value) in self._nodes.items():
214
208
key_dict = nodes_by_key
246
240
:param value: The value associate with this key. Must not contain
247
241
newlines or null characters.
248
242
:return: (node_refs, absent_references)
250
244
* node_refs: basically a packed form of 'references' where all
251
245
iterables are tuples
252
246
* absent_references: reference keys that are not in self._nodes.
288
282
absent_references) = self._check_key_ref_value(key, references, value)
289
if key in self._nodes and self._nodes[key][0] != 'a':
283
if key in self._nodes and self._nodes[key][0] != b'a':
290
284
raise BadIndexDuplicateKey(key, self)
291
285
for reference in absent_references:
292
286
# There may be duplicates, but I don't think it is worth worrying
294
self._nodes[reference] = ('a', (), '')
288
self._nodes[reference] = (b'a', (), b'')
295
289
self._absent_keys.update(absent_references)
296
290
self._absent_keys.discard(key)
297
self._nodes[key] = ('', node_refs, value)
291
self._nodes[key] = (b'', node_refs, value)
298
292
if self._nodes_by_key is not None and self._key_length > 1:
299
293
self._update_nodes_by_key(key, value, node_refs)
304
298
This is a no-op, but we need the api to conform to a generic 'Index'
308
302
def finish(self):
309
303
"""Finish the index.
311
:returns: cBytesIO holding the full context of the index as it
305
:returns: cBytesIO holding the full context of the index as it
312
306
should be written to disk.
314
308
lines = [_SIGNATURE]
332
326
# forward sorted by key. In future we may consider topological sorting,
333
327
# at the cost of table scans for direct lookup, or a second index for
335
nodes = sorted(viewitems(self._nodes))
329
nodes = sorted(self._nodes.items())
336
330
# if we do not prepass, we don't know how long it will be up front.
337
331
expected_bytes = None
338
332
# we only need to pre-pass if we have reference lists at all.
368
362
non_ref_bytes += len(ref_list) - 1
369
363
# how many digits are needed to represent the total byte count?
371
possible_total_bytes = non_ref_bytes + total_references*digits
365
possible_total_bytes = non_ref_bytes + total_references * digits
372
366
while 10 ** digits < possible_total_bytes:
374
possible_total_bytes = non_ref_bytes + total_references*digits
375
expected_bytes = possible_total_bytes + 1 # terminating newline
368
possible_total_bytes = non_ref_bytes + total_references * digits
369
expected_bytes = possible_total_bytes + 1 # terminating newline
376
370
# resolve key addresses.
377
371
key_addresses = {}
378
372
for key, non_ref_bytes, total_references in key_offset_info:
379
key_addresses[key] = non_ref_bytes + total_references*digits
373
key_addresses[key] = non_ref_bytes + total_references * digits
381
375
format_string = b'%%0%dd' % digits
382
376
for key, (absent, references, value) in nodes:
384
378
for ref_list in references:
385
379
ref_addresses = []
386
380
for reference in ref_list:
387
ref_addresses.append(format_string % key_addresses[reference])
381
ref_addresses.append(format_string %
382
key_addresses[reference])
388
383
flattened_references.append(b'\r'.join(ref_addresses))
389
384
string_key = b'\x00'.join(key)
390
385
lines.append(b"%s\x00%s\x00%s\x00%s\n" % (string_key, absent,
391
b'\t'.join(flattened_references), value))
386
b'\t'.join(flattened_references), value))
392
387
lines.append(b'\n')
393
388
result = BytesIO(b''.join(lines))
394
389
if expected_bytes and len(result.getvalue()) != expected_bytes:
395
390
raise errors.BzrError('Failed index creation. Internal error:'
396
' mismatched output length and expected length: %d %d' %
397
(len(result.getvalue()), expected_bytes))
391
' mismatched output length and expected length: %d %d' %
392
(len(result.getvalue()), expected_bytes))
400
395
def set_optimize(self, for_size=None, combine_backing_indices=None):
499
494
def __ne__(self, other):
500
495
return not self.__eq__(other)
497
def __lt__(self, other):
498
# We don't really care about the order, just that there is an order.
499
if (not isinstance(other, GraphIndex) and
500
not isinstance(other, InMemoryGraphIndex)):
501
raise TypeError(other)
502
return hash(self) < hash(other)
505
return hash((type(self), self._transport, self._name, self._size))
502
507
def __repr__(self):
503
508
return "%s(%r)" % (self.__class__.__name__,
504
self._transport.abspath(self._name))
509
self._transport.abspath(self._name))
506
511
def _buffer_all(self, stream=None):
507
512
"""Buffer all the index data.
514
519
if 'index' in debug.debug_flags:
515
520
trace.mutter('Reading entire index %s',
516
self._transport.abspath(self._name))
521
self._transport.abspath(self._name))
517
522
if stream is None:
518
523
stream = self._transport.get(self._name)
519
524
if self._base_offset != 0:
520
525
# This is wasteful, but it is better than dealing with
521
526
# adjusting all the offsets, etc.
522
527
stream = BytesIO(stream.read()[self._base_offset:])
523
self._read_prefix(stream)
524
self._expected_elements = 3 + self._key_length
526
# raw data keyed by offset
527
self._keys_by_offset = {}
528
# ready-to-return key:value or key:value, node_ref_lists
530
self._nodes_by_key = None
533
lines = stream.read().split('\n')
534
# GZ 2009-09-20: Should really use a try/finally block to ensure close
529
self._read_prefix(stream)
530
self._expected_elements = 3 + self._key_length
532
# raw data keyed by offset
533
self._keys_by_offset = {}
534
# ready-to-return key:value or key:value, node_ref_lists
536
self._nodes_by_key = None
539
lines = stream.read().split(b'\n')
537
543
_, _, _, trailers = self._parse_lines(lines, pos)
538
for key, absent, references, value in viewvalues(self._keys_by_offset):
544
for key, absent, references, value in self._keys_by_offset.values():
541
547
# resolve references:
563
569
self._buffer_all()
564
570
if ref_list_num + 1 > self.node_ref_lists:
565
571
raise ValueError('No ref list %d, index has %d ref lists'
566
% (ref_list_num, self.node_ref_lists))
572
% (ref_list_num, self.node_ref_lists))
568
574
nodes = self._nodes
569
for key, (value, ref_lists) in viewitems(nodes):
575
for key, (value, ref_lists) in nodes.items():
570
576
ref_list = ref_lists[ref_list_num]
571
577
refs.update([ref for ref in ref_list if ref not in nodes])
575
581
if self._nodes_by_key is None:
576
582
nodes_by_key = {}
577
583
if self.node_ref_lists:
578
for key, (value, references) in viewitems(self._nodes):
584
for key, (value, references) in self._nodes.items():
579
585
key_dict = nodes_by_key
580
586
for subkey in key[:-1]:
581
587
key_dict = key_dict.setdefault(subkey, {})
582
588
key_dict[key[-1]] = key, value, references
584
for key, value in viewitems(self._nodes):
590
for key, value in self._nodes.items():
585
591
key_dict = nodes_by_key
586
592
for subkey in key[:-1]:
587
593
key_dict = key_dict.setdefault(subkey, {})
601
607
if 'evil' in debug.debug_flags:
602
608
trace.mutter_callsite(3,
603
"iter_all_entries scales with size of history.")
609
"iter_all_entries scales with size of history.")
604
610
if self._nodes is None:
605
611
self._buffer_all()
606
612
if self.node_ref_lists:
607
for key, (value, node_ref_lists) in viewitems(self._nodes):
613
for key, (value, node_ref_lists) in self._nodes.items():
608
614
yield self, key, value, node_ref_lists
610
for key, value in viewitems(self._nodes):
616
for key, value in self._nodes.items():
611
617
yield self, key, value
613
619
def _read_prefix(self, stream):
650
656
for ref_list in references:
651
node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
658
tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
652
659
return tuple(node_refs)
654
def _find_index(self, range_map, key):
662
def _find_index(range_map, key):
655
663
"""Helper for the _parsed_*_index calls.
657
665
Given a range map - [(start, end), ...], finds the index of the range
689
697
asking for 'b' will return 1
690
698
asking for 'e' will return 1
692
search_key = (key, None)
700
search_key = (key, b'')
693
701
return self._find_index(self._parsed_key_map, search_key)
695
703
def _is_parsed(self, offset):
852
860
index = self._parsed_byte_index(location)
853
861
if (len(self._parsed_byte_map) and
854
862
self._parsed_byte_map[index][0] <= location and
855
self._parsed_byte_map[index][1] > location):
863
self._parsed_byte_map[index][1] > location):
856
864
# the byte region has been parsed, so no read is needed.
870
878
# _read_and_parse triggered a _buffer_all because we requested the
871
879
# whole data range
872
880
for location, key in location_keys:
873
if key not in self._nodes: # not present
881
if key not in self._nodes: # not present
874
882
result.append(((location, key), False))
875
883
elif self.node_ref_lists:
876
884
value, refs = self._nodes[key]
877
885
result.append(((location, key),
878
(self, key, value, refs)))
886
(self, key, value, refs)))
880
888
result.append(((location, key),
881
(self, key, self._nodes[key])))
889
(self, key, self._nodes[key])))
883
891
# generate results:
884
892
# - figure out <, >, missing, present
903
911
pending_references.append((location, key))
905
913
result.append(((location, key), (self, key,
906
value, self._resolve_references(refs))))
914
value, self._resolve_references(refs))))
908
916
result.append(((location, key),
909
(self, key, self._bisect_nodes[key])))
917
(self, key, self._bisect_nodes[key])))
912
920
# has the region the key should be in, been parsed?
949
957
# answer key references we had to look-up-late.
950
958
value, refs = self._bisect_nodes[key]
951
959
result.append(((location, key), (self, key,
952
value, self._resolve_references(refs))))
960
value, self._resolve_references(refs))))
955
963
def _parse_header_from_bytes(self, bytes):
986
994
raise BadIndexOptions(self)
987
995
# calculate the bytes we have processed
988
996
header_end = (len(signature) + len(lines[0]) + len(lines[1]) +
990
self._parsed_bytes(0, None, header_end, None)
998
self._parsed_bytes(0, (), header_end, ())
991
999
# setup parsing state
992
1000
self._expected_elements = 3 + self._key_length
993
1001
# raw data keyed by offset
1093
1101
if not start_adjacent:
1094
1102
# work around python bug in rfind
1095
1103
if trim_start is None:
1096
trim_start = data.find('\n') + 1
1104
trim_start = data.find(b'\n') + 1
1098
trim_start = data.find('\n', trim_start) + 1
1106
trim_start = data.find(b'\n', trim_start) + 1
1099
1107
if not (trim_start != 0):
1100
1108
raise AssertionError('no \n was present')
1101
1109
# print 'removing start', offset, trim_start, repr(data[:trim_start])
1102
1110
if not end_adjacent:
1103
1111
# work around python bug in rfind
1104
1112
if trim_end is None:
1105
trim_end = data.rfind('\n') + 1
1113
trim_end = data.rfind(b'\n') + 1
1107
trim_end = data.rfind('\n', None, trim_end) + 1
1115
trim_end = data.rfind(b'\n', None, trim_end) + 1
1108
1116
if not (trim_end != 0):
1109
1117
raise AssertionError('no \n was present')
1110
1118
# print 'removing end', offset, trim_end, repr(data[trim_end:])
1112
1120
trimmed_data = data[trim_start:trim_end]
1113
1121
if not (trimmed_data):
1114
1122
raise AssertionError('read unneeded data [%d:%d] from [%d:%d]'
1115
% (trim_start, trim_end, offset, offset + len(data)))
1123
% (trim_start, trim_end, offset, offset + len(data)))
1117
1125
offset += trim_start
1118
1126
# print "parsing", repr(trimmed_data)
1119
1127
# splitlines mangles the \r delimiters.. don't use it.
1120
lines = trimmed_data.split('\n')
1128
lines = trimmed_data.split(b'\n')
1123
1131
first_key, last_key, nodes, _ = self._parse_lines(lines, pos)
1124
1132
for key, value in nodes:
1125
1133
self._bisect_nodes[key] = value
1126
1134
self._parsed_bytes(offset, first_key,
1127
offset + len(trimmed_data), last_key)
1135
offset + len(trimmed_data), last_key)
1128
1136
return offset + len(trimmed_data), last_segment
1130
1138
def _parse_lines(self, lines, pos):
1135
1143
for line in lines:
1137
1145
# must be at the end
1139
1147
if not (self._size == pos + 1):
1140
1148
raise AssertionError("%s %s" % (self._size, pos))
1143
elements = line.split('\0')
1151
elements = line.split(b'\0')
1144
1152
if len(elements) != self._expected_elements:
1145
1153
raise BadIndexData(self)
1146
1154
# keys are tuples. Each element is a string that may occur many
1147
1155
# times, so we intern them to save space. AB, RC, 200807
1148
key = tuple([intern(element) for element in elements[:self._key_length]])
1156
key = tuple([element for element in elements[:self._key_length]])
1149
1157
if first_key is None:
1150
1158
first_key = key
1151
1159
absent, references, value = elements[-3:]
1153
for ref_string in references.split('\t'):
1161
for ref_string in references.split(b'\t'):
1154
1162
ref_lists.append(tuple([
1155
int(ref) for ref in ref_string.split('\r') if ref
1163
int(ref) for ref in ref_string.split(b'\r') if ref
1157
1165
ref_lists = tuple(ref_lists)
1158
1166
self._keys_by_offset[pos] = (key, absent, ref_lists, value)
1159
pos += len(line) + 1 # +1 for the \n
1167
pos += len(line) + 1 # +1 for the \n
1162
1170
if self.node_ref_lists:
1191
1199
# combine two regions
1192
1200
if (index + 1 < len(self._parsed_byte_map) and
1193
1201
self._parsed_byte_map[index][1] == start and
1194
self._parsed_byte_map[index + 1][0] == end):
1202
self._parsed_byte_map[index + 1][0] == end):
1195
1203
# combine two regions
1196
1204
self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],
1197
self._parsed_byte_map[index + 1][1])
1205
self._parsed_byte_map[index + 1][1])
1198
1206
self._parsed_key_map[index] = (self._parsed_key_map[index][0],
1199
self._parsed_key_map[index + 1][1])
1207
self._parsed_key_map[index + 1][1])
1200
1208
del self._parsed_byte_map[index + 1]
1201
1209
del self._parsed_key_map[index + 1]
1202
1210
elif self._parsed_byte_map[index][1] == start:
1206
1214
self._parsed_key_map[index] = (
1207
1215
self._parsed_key_map[index][0], end_key)
1208
1216
elif (index + 1 < len(self._parsed_byte_map) and
1209
self._parsed_byte_map[index + 1][0] == end):
1217
self._parsed_byte_map[index + 1][0] == end):
1210
1218
# extend the higher entry
1211
1219
self._parsed_byte_map[index + 1] = (
1212
1220
start, self._parsed_byte_map[index + 1][1])
1233
1241
base_offset = self._base_offset
1234
1242
if base_offset != 0:
1235
1243
# Rewrite the ranges for the offset
1236
readv_ranges = [(start+base_offset, size)
1244
readv_ranges = [(start + base_offset, size)
1237
1245
for start, size in readv_ranges]
1238
1246
readv_data = self._transport.readv(self._name, readv_ranges, True,
1239
self._size + self._base_offset)
1247
self._size + self._base_offset)
1241
1249
for offset, data in readv_data:
1242
1250
offset -= base_offset
1282
1290
performance significantly. For example, if one index is on local disk and a
1283
1291
second on a remote server, the local disk index should be before the other
1284
1292
in the index list.
1286
1294
Also, queries tend to need results from the same indices as previous
1287
1295
queries. So the indices will be reordered after every query to put the
1288
1296
indices that had the result(s) of that query first (while otherwise
1310
1318
def __repr__(self):
1311
1319
return "%s(%s)" % (
1312
self.__class__.__name__,
1313
', '.join(map(repr, self._indices)))
1320
self.__class__.__name__,
1321
', '.join(map(repr, self._indices)))
1315
1323
def clear_cache(self):
1316
1324
"""See GraphIndex.clear_cache()"""
1322
1330
search_keys = set(keys)
1323
1331
if _mod_revision.NULL_REVISION in search_keys:
1324
1332
search_keys.discard(_mod_revision.NULL_REVISION)
1325
found_parents = {_mod_revision.NULL_REVISION:[]}
1333
found_parents = {_mod_revision.NULL_REVISION: []}
1327
1335
found_parents = {}
1328
1336
for index, key, value, refs in self.iter_entries(search_keys):
1465
1473
def _move_to_front_by_index(self, hit_indices):
1466
1474
"""Core logic for _move_to_front.
1468
1476
Returns a list of names corresponding to the hit_indices param.
1470
1478
indices_info = zip(self._index_names, self._indices)
1484
1492
if len(new_hit_indices) == len(hit_indices):
1485
1493
# We've found all of the hit entries, everything else is
1487
unhit_names.extend(self._index_names[offset+1:])
1488
unhit_indices.extend(self._indices[offset+1:])
1495
unhit_names.extend(self._index_names[offset + 1:])
1496
unhit_indices.extend(self._indices[offset + 1:])
1491
1499
unhit_names.append(name)
1553
1561
# CombinedGraphIndex does not know what the ref lists
1555
1563
search_keys = index._find_ancestors(search_keys,
1556
ref_list_num, parent_map, index_missing_keys)
1564
ref_list_num, parent_map, index_missing_keys)
1557
1565
# print ' \t \t%2d\t%4d\t%5d\t%5d' % (
1558
1566
# sub_generation, len(search_keys),
1559
1567
# len(parent_map), len(index_missing_keys))
1600
1608
if self._reload_func is None:
1602
trace.mutter('Trying to reload after getting exception: %s', error)
1611
'Trying to reload after getting exception: %s', str(error))
1603
1612
if not self._reload_func():
1604
1613
# We tried to reload, but nothing changed, so we fail anyway
1605
1614
trace.mutter('_reload_func indicated nothing has changed.'
1654
1663
if 'evil' in debug.debug_flags:
1655
1664
trace.mutter_callsite(3,
1656
"iter_all_entries scales with size of history.")
1665
"iter_all_entries scales with size of history.")
1657
1666
if self.reference_lists:
1658
for key, (absent, references, value) in viewitems(self._nodes):
1667
for key, (absent, references, value) in self._nodes.items():
1660
1669
yield self, key, value, references
1662
for key, (absent, references, value) in viewitems(self._nodes):
1671
for key, (absent, references, value) in self._nodes.items():
1664
1673
yield self, key, value
1731
1740
def validate(self):
1732
1741
"""In memory index's have no known corruption at the moment."""
1743
def __lt__(self, other):
1744
# We don't really care about the order, just that there is an order.
1745
if (not isinstance(other, GraphIndex) and
1746
not isinstance(other, InMemoryGraphIndex)):
1747
raise TypeError(other)
1748
return hash(self) < hash(other)
1735
1751
class GraphIndexPrefixAdapter(object):
1736
1752
"""An adapter between GraphIndex with different key lengths.
1745
1761
def __init__(self, adapted, prefix, missing_key_length,
1746
add_nodes_callback=None):
1762
add_nodes_callback=None):
1747
1763
"""Construct an adapter against adapted with prefix."""
1748
1764
self.adapted = adapted
1749
self.prefix_key = prefix + (None,)*missing_key_length
1765
self.prefix_key = prefix + (None,) * missing_key_length
1750
1766
self.prefix = prefix
1751
1767
self.prefix_len = len(prefix)
1752
1768
self.add_nodes_callback = add_nodes_callback
1765
1781
for (key, value, node_refs) in nodes:
1766
1782
adjusted_references = (
1767
1783
tuple(tuple(self.prefix + ref_node for ref_node in ref_list)
1768
for ref_list in node_refs))
1784
for ref_list in node_refs))
1769
1785
translated_nodes.append((self.prefix + key, value,
1770
adjusted_references))
1786
adjusted_references))
1771
1787
except ValueError:
1772
1788
# XXX: TODO add an explicit interface for getting the reference list
1773
1789
# status, to handle this bit of user-friendliness in the API more
1801
1817
raise BadIndexData(self)
1802
1818
yield node[0], node[1][self.prefix_len:], node[2], (
1803
1819
tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)
1804
for ref_list in node[3]))
1820
for ref_list in node[3]))
1806
1822
def iter_all_entries(self):
1807
1823
"""Iterate over all keys within the index
1885
1901
if len(elements):
1886
1902
dicts = [key_dict]
1888
values_view = viewvalues(dicts.pop())
1904
values_view = dicts.pop().values()
1889
1905
# can't be empty or would not exist
1890
1906
value = next(iter(values_view))
1891
1907
if isinstance(value, dict):