1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
1
# Copyright (C) 2008-2011 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
18
18
"""B+Tree indices"""
21
from bisect import bisect_right
20
from __future__ import absolute_import, division
22
from ..lazy_import import lazy_import
23
lazy_import(globals(), """
37
from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
38
from bzrlib.transport import get_transport
41
_BTSIGNATURE = "B+Tree Graph Index 2\n"
42
_OPTION_ROW_LENGTHS = "row_lengths="
43
_LEAF_FLAG = "type=leaf\n"
44
_INTERNAL_FLAG = "type=internal\n"
45
_INTERNAL_OFFSET = "offset="
43
from .index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
44
from ..sixish import (
53
_BTSIGNATURE = b"B+Tree Graph Index 2\n"
54
_OPTION_ROW_LENGTHS = b"row_lengths="
55
_LEAF_FLAG = b"type=leaf\n"
56
_INTERNAL_FLAG = b"type=internal\n"
57
_INTERNAL_OFFSET = b"offset="
47
59
_RESERVED_HEADER_BYTES = 120
62
74
def __init__(self):
63
75
"""Create a _BuilderRow."""
65
self.spool = None# tempfile.TemporaryFile(prefix='bzr-index-row-')
77
self.spool = None # tempfile.TemporaryFile(prefix='bzr-index-row-')
68
80
def finish_node(self, pad=True):
69
81
byte_lines, _, padding = self.writer.finish()
70
82
if self.nodes == 0:
71
self.spool = cStringIO.StringIO()
83
self.spool = BytesIO()
73
self.spool.write("\x00" * _RESERVED_HEADER_BYTES)
85
self.spool.write(b"\x00" * _RESERVED_HEADER_BYTES)
74
86
elif self.nodes == 1:
75
87
# We got bigger than 1 node, switch to a temp file
76
88
spool = tempfile.TemporaryFile(prefix='bzr-index-row-')
137
149
of nodes that BTreeBuilder will hold in memory.
139
151
index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists,
140
key_elements=key_elements)
152
key_elements=key_elements)
141
153
self._spill_at = spill_at
142
154
self._backing_indices = []
143
155
# A map of {key: (node_refs, value)}
158
170
:param references: An iterable of iterables of keys. Each is a
159
171
reference to another key.
160
172
:param value: The value to associate with the key. It may be any
161
bytes as long as it does not contain \0 or \n.
173
bytes as long as it does not contain \\0 or \\n.
163
175
# Ensure that 'key' is a StaticTuple
164
176
key = static_tuple.StaticTuple.from_sequence(key).intern()
165
177
# we don't care about absent_references
166
178
node_refs, _ = self._check_key_ref_value(key, references, value)
167
179
if key in self._nodes:
168
raise errors.BadIndexDuplicateKey(key, self)
180
raise index.BadIndexDuplicateKey(key, self)
169
181
self._nodes[key] = static_tuple.StaticTuple(node_refs, value)
170
182
if self._nodes_by_key is not None and self._key_length > 1:
171
183
self._update_nodes_by_key(key, value, node_refs)
193
205
new_backing_file, size = self._spill_mem_keys_without_combining()
194
206
# Note: The transport here isn't strictly needed, because we will use
195
207
# direct access to the new_backing._file object
196
new_backing = BTreeGraphIndex(get_transport('.'), '<temp>', size)
208
new_backing = BTreeGraphIndex(transport.get_transport_from_path('.'),
197
210
# GC will clean up the file
198
211
new_backing._file = new_backing_file
199
212
if self._combine_backing_indices:
256
269
current_values = []
257
270
for iterator in iterators_to_combine:
259
current_values.append(iterator.next())
272
current_values.append(next(iterator))
260
273
except StopIteration:
261
274
current_values.append(None)
264
277
# Decorate candidates with the value to allow 2.4's min to be used.
265
278
candidates = [(item[1][1], item) for item
266
in enumerate(current_values) if item[1] is not None]
279
in enumerate(current_values) if item[1] is not None]
267
280
if not len(candidates):
269
282
selected = min(candidates)
270
283
# undecorate back to (pos, node)
271
284
selected = selected[1]
272
285
if last == selected[1][1]:
273
raise errors.BadIndexDuplicateKey(last, self)
286
raise index.BadIndexDuplicateKey(last, self)
274
287
last = selected[1][1]
275
288
# Yield, with self as the index
276
289
yield (self,) + selected[1][1:]
277
290
pos = selected[0]
279
current_values[pos] = iterators_to_combine[pos].next()
292
current_values[pos] = next(iterators_to_combine[pos])
280
293
except StopIteration:
281
294
current_values[pos] = None
289
302
flag when writing out. This is used by the _spill_mem_keys_to_disk
292
306
if rows[-1].writer is None:
293
307
# opening a new leaf chunk;
294
309
for pos, internal_row in enumerate(rows[:-1]):
295
310
# flesh out any internal nodes that are needed to
296
311
# preserve the height of the tree
297
312
if internal_row.writer is None:
298
313
length = _PAGE_SIZE
299
314
if internal_row.nodes == 0:
300
length -= _RESERVED_HEADER_BYTES # padded
315
length -= _RESERVED_HEADER_BYTES # padded
301
316
if allow_optimize:
302
317
optimize_for_size = self._optimize_for_size
304
319
optimize_for_size = False
305
internal_row.writer = chunk_writer.ChunkWriter(length, 0,
306
optimize_for_size=optimize_for_size)
320
internal_row.writer = chunk_writer.ChunkWriter(
321
length, 0, optimize_for_size=optimize_for_size)
307
322
internal_row.writer.write(_INTERNAL_FLAG)
308
internal_row.writer.write(_INTERNAL_OFFSET +
309
str(rows[pos + 1].nodes) + "\n")
323
internal_row.writer.write(_INTERNAL_OFFSET
324
+ b"%d\n" % rows[pos + 1].nodes)
311
326
length = _PAGE_SIZE
312
327
if rows[-1].nodes == 0:
313
length -= _RESERVED_HEADER_BYTES # padded
314
rows[-1].writer = chunk_writer.ChunkWriter(length,
315
optimize_for_size=self._optimize_for_size)
328
length -= _RESERVED_HEADER_BYTES # padded
329
rows[-1].writer = chunk_writer.ChunkWriter(
330
length, optimize_for_size=self._optimize_for_size)
316
331
rows[-1].writer.write(_LEAF_FLAG)
317
332
if rows[-1].writer.write(line):
333
# if we failed to write, despite having an empty page to write to,
334
# then line is too big. raising the error avoids infinite recursion
335
# searching for a suitably large page that will not be found.
337
raise index.BadIndexKey(string_key)
318
338
# this key did not fit in the node:
319
339
rows[-1].finish_node()
320
key_line = string_key + "\n"
340
key_line = string_key + b"\n"
322
342
for row in reversed(rows[:-1]):
323
343
# Mark the start of the next node in the node above. If it
345
365
optimize_for_size=self._optimize_for_size)
346
366
new_row.writer.write(_INTERNAL_FLAG)
347
new_row.writer.write(_INTERNAL_OFFSET +
348
str(rows[1].nodes - 1) + "\n")
367
new_row.writer.write(_INTERNAL_OFFSET
368
+ b"%d\n" % (rows[1].nodes - 1))
349
369
new_row.writer.write(key_line)
350
self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
370
self._add_key(string_key, line, rows,
371
allow_optimize=allow_optimize)
352
373
def _write_nodes(self, node_iterator, allow_optimize=True):
353
374
"""Write node_iterator out as a B+Tree.
379
400
# First key triggers the first row
380
401
rows.append(_LeafBuilderRow())
382
string_key, line = _btree_serializer._flatten_node(node,
383
self.reference_lists)
384
self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
403
string_key, line = _btree_serializer._flatten_node(
404
node, self.reference_lists)
405
self._add_key(string_key, line, rows,
406
allow_optimize=allow_optimize)
385
407
for row in reversed(rows):
386
pad = (type(row) != _LeafBuilderRow)
408
pad = (not isinstance(row, _LeafBuilderRow))
387
409
row.finish_node(pad=pad)
388
410
lines = [_BTSIGNATURE]
389
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
390
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
391
lines.append(_OPTION_LEN + str(key_count) + '\n')
411
lines.append(b'%s%d\n' % (_OPTION_NODE_REFS, self.reference_lists))
412
lines.append(b'%s%d\n' % (_OPTION_KEY_ELEMENTS, self._key_length))
413
lines.append(b'%s%d\n' % (_OPTION_LEN, key_count))
392
414
row_lengths = [row.nodes for row in rows]
393
lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')
415
lines.append(_OPTION_ROW_LENGTHS + ','.join(
416
map(str, row_lengths)).encode('ascii') + b'\n')
394
417
if row_lengths and row_lengths[-1] > 1:
395
418
result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
397
result = cStringIO.StringIO()
398
421
result.writelines(lines)
399
422
position = sum(map(len, lines))
401
423
if position > _RESERVED_HEADER_BYTES:
402
424
raise AssertionError("Could not fit the header in the"
403
425
" reserved space: %d > %d"
404
426
% (position, _RESERVED_HEADER_BYTES))
405
427
# write the rows out:
407
reserved = _RESERVED_HEADER_BYTES # reserved space for first node
429
reserved = _RESERVED_HEADER_BYTES # reserved space for first node
408
430
row.spool.flush()
409
431
row.spool.seek(0)
410
432
# copy nodes to the finalised file.
412
434
node = row.spool.read(_PAGE_SIZE)
413
435
result.write(node[reserved:])
414
436
if len(node) == _PAGE_SIZE:
415
result.write("\x00" * (reserved - position))
416
position = 0 # Only the root row actually has an offset
437
result.write(b"\x00" * (reserved - position))
438
position = 0 # Only the root row actually has an offset
417
439
copied_len = osutils.pumpfile(row.spool, result)
418
440
if copied_len != (row.nodes - 1) * _PAGE_SIZE:
419
if type(row) != _LeafBuilderRow:
441
if not isinstance(row, _LeafBuilderRow):
420
442
raise AssertionError("Incorrect amount of data copied"
421
" expected: %d, got: %d"
422
% ((row.nodes - 1) * _PAGE_SIZE,
443
" expected: %d, got: %d"
444
% ((row.nodes - 1) * _PAGE_SIZE,
425
447
size = result.tell()
442
464
efficient order for the index (in this case dictionary hash order).
444
466
if 'evil' in debug.debug_flags:
445
trace.mutter_callsite(3,
446
"iter_all_entries scales with size of history.")
467
trace.mutter_callsite(
468
3, "iter_all_entries scales with size of history.")
447
469
# Doing serial rather than ordered would be faster; but this shouldn't
448
470
# be getting called routinely anyway.
449
471
iterators = [self._iter_mem_nodes()]
458
480
"""Iterate over keys within the index.
460
482
:param keys: An iterable providing the keys to be retrieved.
461
:return: An iterable of (index, key, value, reference_lists). There is no
462
defined order for the result iteration - it will be in the most
483
:return: An iterable of (index, key, value, reference_lists). There is
484
no defined order for the result iteration - it will be in the most
463
485
efficient order for the index (keys iteration order in this case).
483
505
# Find things that are in backing indices that have not been handled
485
507
if not self._backing_indices:
486
return # We won't find anything there either
508
return # We won't find anything there either
487
509
# Remove all of the keys that we found locally
488
510
keys.difference_update(local_keys)
489
511
for backing in self._backing_indices:
524
544
yield (self,) + node[1:]
525
545
if self._key_length == 1:
529
raise errors.BadIndexKey(key)
530
if len(key) != self._key_length:
531
raise errors.BadIndexKey(key)
547
index._sanity_check_key(self, key)
533
549
node = self._nodes[key]
539
555
yield self, key, node[1]
544
raise errors.BadIndexKey(key)
545
if len(key) != self._key_length:
546
raise errors.BadIndexKey(key)
547
# find what it refers to:
548
key_dict = self._get_nodes_by_key()
550
# find the subdict to return
552
while len(elements) and elements[0] is not None:
553
key_dict = key_dict[elements[0]]
556
# a non-existant lookup.
561
key_dict = dicts.pop(-1)
562
# can't be empty or would not exist
563
item, value = key_dict.iteritems().next()
564
if type(value) == dict:
566
dicts.extend(key_dict.itervalues())
569
for value in key_dict.itervalues():
570
yield (self, ) + tuple(value)
572
yield (self, ) + key_dict
557
nodes_by_key = self._get_nodes_by_key()
558
for entry in index._iter_entries_prefix(self, nodes_by_key, keys):
574
561
def _get_nodes_by_key(self):
575
562
if self._nodes_by_key is None:
576
563
nodes_by_key = {}
577
564
if self.reference_lists:
578
for key, (references, value) in self._nodes.iteritems():
565
for key, (references, value) in viewitems(self._nodes):
579
566
key_dict = nodes_by_key
580
567
for subkey in key[:-1]:
581
568
key_dict = key_dict.setdefault(subkey, {})
582
569
key_dict[key[-1]] = key, value, references
584
for key, (references, value) in self._nodes.iteritems():
571
for key, (references, value) in viewitems(self._nodes):
585
572
key_dict = nodes_by_key
586
573
for subkey in key[:-1]:
587
574
key_dict = key_dict.setdefault(subkey, {})
595
582
For InMemoryGraphIndex the estimate is exact.
597
return len(self._nodes) + sum(backing.key_count() for backing in
598
self._backing_indices if backing is not None)
584
return len(self._nodes) + sum(
586
for backing in self._backing_indices
587
if backing is not None)
600
589
def validate(self):
601
590
"""In memory index's have no known corruption at the moment."""
604
class _LeafNode(object):
592
def __lt__(self, other):
593
if isinstance(other, type(self)):
594
return self._nodes < other._nodes
595
# Always sort existing indexes before ones that are still being built.
596
if isinstance(other, BTreeGraphIndex):
601
class _LeafNode(dict):
605
602
"""A leaf node for a serialised B+Tree index."""
607
__slots__ = ('keys', 'min_key', 'max_key')
604
__slots__ = ('min_key', 'max_key', '_keys')
609
606
def __init__(self, bytes, key_length, ref_list_length):
610
607
"""Parse bytes to create a leaf node object."""
611
608
# splitlines mangles the \r delimiters.. don't use it.
612
key_list = _btree_serializer._parse_leaf_lines(bytes,
613
key_length, ref_list_length)
609
key_list = _btree_serializer._parse_leaf_lines(
610
bytes, key_length, ref_list_length)
615
612
self.min_key = key_list[0][0]
616
613
self.max_key = key_list[-1][0]
618
615
self.min_key = self.max_key = None
619
self.keys = dict(key_list)
616
super(_LeafNode, self).__init__(key_list)
617
self._keys = dict(self)
620
"""Return a sorted list of (key, (value, refs)) items"""
621
items = sorted(self.items())
625
"""Return a sorted list of all keys."""
626
keys = sorted(self.keys())
622
630
class _InternalNode(object):
627
635
def __init__(self, bytes):
628
636
"""Parse bytes to create an internal node object."""
629
637
# splitlines mangles the \r delimiters.. don't use it.
630
self.keys = self._parse_lines(bytes.split('\n'))
638
self.keys = self._parse_lines(bytes.split(b'\n'))
632
640
def _parse_lines(self, lines):
634
642
self.offset = int(lines[1][7:])
635
643
as_st = static_tuple.StaticTuple.from_sequence
636
644
for line in lines[2:]:
639
nodes.append(as_st(map(intern, line.split('\0'))).intern())
647
# GZ 2017-05-24: Used to intern() each chunk of line as well, need
648
# to recheck performance and perhaps adapt StaticTuple to adjust.
649
nodes.append(as_st(line.split(b'\0')).intern())
671
681
self._recommended_pages = self._compute_recommended_pages()
672
682
self._root_node = None
673
683
self._base_offset = offset
684
self._leaf_factory = _LeafNode
674
685
# Default max size is 100,000 leave values
675
self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
686
self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
676
687
if unlimited_cache:
677
688
self._leaf_node_cache = {}
678
689
self._internal_node_cache = {}
684
695
self._internal_node_cache = fifo_cache.FIFOCache(100)
685
696
self._key_count = None
686
697
self._row_lengths = None
687
self._row_offsets = None # Start of each row, [-1] is the end
698
self._row_offsets = None # Start of each row, [-1] is the end
689
703
def __eq__(self, other):
690
704
"""Equal when self and other were created with the same parameters."""
692
type(self) == type(other) and
693
self._transport == other._transport and
694
self._name == other._name and
695
self._size == other._size)
706
isinstance(self, type(other))
707
and self._transport == other._transport
708
and self._name == other._name
709
and self._size == other._size)
711
def __lt__(self, other):
712
if isinstance(other, type(self)):
713
return ((self._name, self._size) < (other._name, other._size))
714
# Always sort existing indexes before ones that are still being built.
715
if isinstance(other, BTreeBuilder):
697
719
def __ne__(self, other):
698
720
return not self.__eq__(other)
732
754
pages fit in that length.
734
756
recommended_read = self._transport.recommended_page_size()
735
recommended_pages = int(math.ceil(recommended_read /
757
recommended_pages = int(math.ceil(recommended_read / _PAGE_SIZE))
737
758
return recommended_pages
739
760
def _compute_total_pages_in_index(self):
750
771
return self._row_offsets[-1]
751
772
# This is the number of pages as defined by the size of the index. They
752
773
# should be indentical.
753
total_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))
774
total_pages = int(math.ceil(self._size / _PAGE_SIZE))
754
775
return total_pages
756
777
def _expand_offsets(self, offsets):
787
808
if total_pages - len(cached_offsets) <= self._recommended_pages:
788
809
# Read whatever is left
789
810
if cached_offsets:
790
expanded = [x for x in xrange(total_pages)
791
if x not in cached_offsets]
811
expanded = [x for x in range(total_pages)
812
if x not in cached_offsets]
793
expanded = range(total_pages)
814
expanded = list(range(total_pages))
794
815
if 'index' in debug.debug_flags:
795
816
trace.mutter(' reading all unread pages: %s', expanded)
845
866
if first is None:
846
867
first, end = self._find_layer_first_and_end(pos)
847
868
previous = pos - 1
849
and previous not in cached_offsets
850
and previous not in final_offsets
851
and previous >= first):
870
previous not in cached_offsets and
871
previous not in final_offsets and
852
873
next_tips.add(previous)
854
if (after < total_pages
855
and after not in cached_offsets
856
and after not in final_offsets
875
if (after < total_pages and
876
after not in cached_offsets and
877
after not in final_offsets and
858
879
next_tips.add(after)
859
880
# This would keep us from going bigger than
860
881
# recommended_pages by only expanding the first offsets.
884
905
self._get_root_node()
885
906
if ref_list_num + 1 > self.node_ref_lists:
886
907
raise ValueError('No ref list %d, index has %d ref lists'
887
% (ref_list_num, self.node_ref_lists))
908
% (ref_list_num, self.node_ref_lists))
890
911
for node in self.iter_all_entries():
910
931
def _get_offsets_to_cached_pages(self):
911
932
"""Determine what nodes we already have cached."""
912
cached_offsets = set(self._internal_node_cache.keys())
933
cached_offsets = set(self._internal_node_cache)
934
# cache may be dict or LRUCache, keys() is the common method
913
935
cached_offsets.update(self._leaf_node_cache.keys())
914
936
if self._root_node is not None:
915
937
cached_offsets.add(0)
948
970
def _cache_leaf_values(self, nodes):
949
971
"""Cache directly from key => value, skipping the btree."""
950
972
if self._leaf_value_cache is not None:
951
for node in nodes.itervalues():
952
for key, value in node.keys.iteritems():
973
for node in viewvalues(nodes):
974
for key, value in node.all_items():
953
975
if key in self._leaf_value_cache:
954
976
# Don't add the rest of the keys, we've seen this node
965
987
def iter_all_entries(self):
966
988
"""Iterate over all keys within the index.
968
:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
990
:return: An iterable of (index, key, value) or
991
(index, key, value, reference_lists).
969
992
The former tuple is used when there are no reference lists in the
970
993
index, making the API compatible with simple key:value index types.
971
994
There is no defined order for the result iteration - it will be in
972
995
the most efficient order for the index.
974
997
if 'evil' in debug.debug_flags:
975
trace.mutter_callsite(3,
976
"iter_all_entries scales with size of history.")
998
trace.mutter_callsite(
999
3, "iter_all_entries scales with size of history.")
977
1000
if not self.key_count():
979
1002
if self._row_offsets[-1] == 1:
980
1003
# There is only the root node, and we read that via key_count()
981
1004
if self.node_ref_lists:
982
for key, (value, refs) in sorted(self._root_node.keys.items()):
1005
for key, (value, refs) in self._root_node.all_items():
983
1006
yield (self, key, value, refs)
985
for key, (value, refs) in sorted(self._root_node.keys.items()):
1008
for key, (value, refs) in self._root_node.all_items():
986
1009
yield (self, key, value)
988
1011
start_of_leaves = self._row_offsets[-2]
989
1012
end_of_leaves = self._row_offsets[-1]
990
needed_offsets = range(start_of_leaves, end_of_leaves)
1013
needed_offsets = list(range(start_of_leaves, end_of_leaves))
991
1014
if needed_offsets == [0]:
992
1015
# Special case when we only have a root node, as we have already
993
1016
# read everything
998
1021
# for spilling index builds to disk.
999
1022
if self.node_ref_lists:
1000
1023
for _, node in nodes:
1001
for key, (value, refs) in sorted(node.keys.items()):
1024
for key, (value, refs) in node.all_items():
1002
1025
yield (self, key, value, refs)
1004
1027
for _, node in nodes:
1005
for key, (value, refs) in sorted(node.keys.items()):
1028
for key, (value, refs) in node.all_items():
1006
1029
yield (self, key, value)
1031
1054
# function, so there is even more to be gained.
1032
1055
# iter_steps = len(in_keys) + len(fixed_keys)
1033
1056
# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
1034
if len(in_keys) == 1: # Bisect will always be faster for M = 1
1035
return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]
1057
if len(in_keys) == 1: # Bisect will always be faster for M = 1
1058
return [(bisect.bisect_right(fixed_keys, in_keys[0]), in_keys)]
1036
1059
# elif bisect_steps < iter_steps:
1038
1061
# for key in in_keys:
1041
1064
# return [(o, offsets[o]) for o in sorted(offsets)]
1042
1065
in_keys_iter = iter(in_keys)
1043
1066
fixed_keys_iter = enumerate(fixed_keys)
1044
cur_in_key = in_keys_iter.next()
1045
cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()
1047
class InputDone(Exception): pass
1048
class FixedDone(Exception): pass
1067
cur_in_key = next(in_keys_iter)
1068
cur_fixed_offset, cur_fixed_key = next(fixed_keys_iter)
1070
class InputDone(Exception):
1073
class FixedDone(Exception):
1110
1136
positions = self._multi_bisect_right(sub_keys, node.keys)
1111
1137
node_offset = next_row_start + node.offset
1112
1138
next_nodes_and_keys.extend([(node_offset + pos, s_keys)
1113
for pos, s_keys in positions])
1139
for pos, s_keys in positions])
1114
1140
keys_at_index = next_nodes_and_keys
1115
1141
# We should now be at the _LeafNodes
1116
1142
node_indexes = [idx for idx, s_keys in keys_at_index]
1170
1195
node = nodes[node_index]
1171
1196
for next_sub_key in sub_keys:
1172
if next_sub_key in node.keys:
1173
value, refs = node.keys[next_sub_key]
1197
if next_sub_key in node:
1198
value, refs = node[next_sub_key]
1174
1199
if self.node_ref_lists:
1175
1200
yield (self, next_sub_key, value, refs)
1209
1234
if ref_list_num >= self.node_ref_lists:
1210
1235
raise ValueError('No ref list %d, index has %d ref lists'
1211
% (ref_list_num, self.node_ref_lists))
1236
% (ref_list_num, self.node_ref_lists))
1213
1238
# The main trick we are trying to accomplish is that when we find a
1214
1239
# key listing its parents, we expect that the parent key is also likely
1244
1269
# sub_keys is all of the keys we are looking for that should exist
1245
1270
# on this page, if they aren't here, then they won't be found
1246
1271
node = nodes[node_index]
1247
node_keys = node.keys
1248
1272
parents_to_check = set()
1249
1273
for next_sub_key in sub_keys:
1250
if next_sub_key not in node_keys:
1274
if next_sub_key not in node:
1251
1275
# This one is just not present in the index at all
1252
1276
missing_keys.add(next_sub_key)
1254
value, refs = node_keys[next_sub_key]
1278
value, refs = node[next_sub_key]
1255
1279
parent_keys = refs[ref_list_num]
1256
1280
parent_map[next_sub_key] = parent_keys
1257
1281
parents_to_check.update(parent_keys)
1264
1288
while parents_to_check:
1265
1289
next_parents_to_check = set()
1266
1290
for key in parents_to_check:
1267
if key in node_keys:
1268
value, refs = node_keys[key]
1292
value, refs = node[key]
1269
1293
parent_keys = refs[ref_list_num]
1270
1294
parent_map[key] = parent_keys
1271
1295
next_parents_to_check.update(parent_keys)
1291
1315
parents_not_on_page.add(key)
1293
# assert key != node.min_key and key != node.max_key
1317
# assert (key != node.min_key and
1318
# key != node.max_key)
1294
1319
# If it was going to be present, it would be on
1295
1320
# *this* page, so mark it missing.
1296
1321
missing_keys.add(key)
1333
1358
self._get_root_node()
1334
1359
# TODO: only access nodes that can satisfy the prefixes we are looking
1335
1360
# for. For now, to meet API usage (as this function is not used by
1336
# current bzrlib) just suck the entire index and iterate in memory.
1361
# current breezy) just suck the entire index and iterate in memory.
1338
1363
if self.node_ref_lists:
1339
1364
if self._key_length == 1:
1365
1390
key_dict[key[-1]] = key_value
1366
1391
if self._key_length == 1:
1367
1392
for key in keys:
1370
raise errors.BadIndexKey(key)
1371
if len(key) != self._key_length:
1372
raise errors.BadIndexKey(key)
1393
index._sanity_check_key(self, key)
1374
1395
if self.node_ref_lists:
1375
1396
value, node_refs = nodes[key]
1379
1400
except KeyError:
1385
raise errors.BadIndexKey(key)
1386
if len(key) != self._key_length:
1387
raise errors.BadIndexKey(key)
1388
# find what it refers to:
1389
key_dict = nodes_by_key
1390
elements = list(key)
1391
# find the subdict whose contents should be returned.
1393
while len(elements) and elements[0] is not None:
1394
key_dict = key_dict[elements[0]]
1397
# a non-existant lookup.
1402
key_dict = dicts.pop(-1)
1403
# can't be empty or would not exist
1404
item, value = key_dict.iteritems().next()
1405
if type(value) == dict:
1407
dicts.extend(key_dict.itervalues())
1410
for value in key_dict.itervalues():
1411
# each value is the key:value:node refs tuple
1413
yield (self, ) + value
1415
# the last thing looked up was a terminal element
1416
yield (self, ) + key_dict
1403
for entry in index._iter_entries_prefix(self, nodes_by_key, keys):
1418
1406
def key_count(self):
1419
1407
"""Return an estimate of the number of keys in this index.
1445
1433
signature = bytes[0:len(self._signature())]
1446
1434
if not signature == self._signature():
1447
raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex)
1435
raise index.BadIndexFormatSignature(self._name, BTreeGraphIndex)
1448
1436
lines = bytes[len(self._signature()):].splitlines()
1449
1437
options_line = lines[0]
1450
1438
if not options_line.startswith(_OPTION_NODE_REFS):
1451
raise errors.BadIndexOptions(self)
1439
raise index.BadIndexOptions(self)
1453
1441
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
1454
1442
except ValueError:
1455
raise errors.BadIndexOptions(self)
1443
raise index.BadIndexOptions(self)
1456
1444
options_line = lines[1]
1457
1445
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
1458
raise errors.BadIndexOptions(self)
1446
raise index.BadIndexOptions(self)
1460
1448
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
1461
1449
except ValueError:
1462
raise errors.BadIndexOptions(self)
1450
raise index.BadIndexOptions(self)
1463
1451
options_line = lines[2]
1464
1452
if not options_line.startswith(_OPTION_LEN):
1465
raise errors.BadIndexOptions(self)
1453
raise index.BadIndexOptions(self)
1467
1455
self._key_count = int(options_line[len(_OPTION_LEN):])
1468
1456
except ValueError:
1469
raise errors.BadIndexOptions(self)
1457
raise index.BadIndexOptions(self)
1470
1458
options_line = lines[3]
1471
1459
if not options_line.startswith(_OPTION_ROW_LENGTHS):
1472
raise errors.BadIndexOptions(self)
1460
raise index.BadIndexOptions(self)
1474
self._row_lengths = map(int, [length for length in
1475
options_line[len(_OPTION_ROW_LENGTHS):].split(',')
1462
self._row_lengths = [int(length) for length in
1463
options_line[len(_OPTION_ROW_LENGTHS):].split(
1477
1466
except ValueError:
1478
raise errors.BadIndexOptions(self)
1467
raise index.BadIndexOptions(self)
1479
1468
self._compute_row_offsets()
1481
1470
# calculate the bytes we have processed
1514
1503
self._size = num_bytes - base_offset
1515
1504
# the whole thing should be parsed out of 'bytes'
1516
1505
ranges = [(start, min(_PAGE_SIZE, num_bytes - start))
1517
for start in xrange(base_offset, num_bytes, _PAGE_SIZE)]
1507
base_offset, num_bytes, _PAGE_SIZE)]
1520
1510
if offset > self._size:
1528
1518
elif bytes is not None:
1529
1519
# already have the whole file
1530
data_ranges = [(start, bytes[start:start+size])
1520
data_ranges = [(start, bytes[start:start + size])
1531
1521
for start, size in ranges]
1532
1522
elif self._file is None:
1533
1523
data_ranges = self._transport.readv(self._name, ranges)
1546
1536
bytes = zlib.decompress(data)
1547
1537
if bytes.startswith(_LEAF_FLAG):
1548
node = _LeafNode(bytes, self._key_length, self.node_ref_lists)
1538
node = self._leaf_factory(bytes, self._key_length,
1539
self.node_ref_lists)
1549
1540
elif bytes.startswith(_INTERNAL_FLAG):
1550
1541
node = _InternalNode(bytes)
1552
1543
raise AssertionError("Unknown node type for %r" % bytes)
1553
yield offset / _PAGE_SIZE, node
1544
yield offset // _PAGE_SIZE, node
1555
1546
def _signature(self):
1556
1547
"""The file signature for this index type."""
1566
1557
# We shouldn't be reading anything anyway
1568
1559
node_end = self._row_offsets[-1]
1569
for node in self._read_nodes(range(start_node, node_end)):
1560
for node in self._read_nodes(list(range(start_node, node_end))):
1564
_gcchk_factory = _LeafNode
1574
from bzrlib import _btree_serializer_pyx as _btree_serializer
1575
except ImportError, e:
1567
from . import _btree_serializer_pyx as _btree_serializer
1568
_gcchk_factory = _btree_serializer._parse_into_chk
1569
except ImportError as e:
1576
1570
osutils.failed_to_load_extension(e)
1577
from bzrlib import _btree_serializer_py as _btree_serializer
1571
from . import _btree_serializer_py as _btree_serializer