18
18
"""B+Tree indices"""
20
from io import BytesIO
22
from ..lazy_import import lazy_import
23
lazy_import(globals(), """
20
from bisect import bisect_right
43
from .index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
46
_BTSIGNATURE = b"B+Tree Graph Index 2\n"
47
_OPTION_ROW_LENGTHS = b"row_lengths="
48
_LEAF_FLAG = b"type=leaf\n"
49
_INTERNAL_FLAG = b"type=internal\n"
50
_INTERNAL_OFFSET = b"offset="
35
from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
36
from bzrlib.transport import get_transport
39
_BTSIGNATURE = "B+Tree Graph Index 2\n"
40
_OPTION_ROW_LENGTHS = "row_lengths="
41
_LEAF_FLAG = "type=leaf\n"
42
_INTERNAL_FLAG = "type=internal\n"
43
_INTERNAL_OFFSET = "offset="
52
45
_RESERVED_HEADER_BYTES = 120
67
60
def __init__(self):
68
61
"""Create a _BuilderRow."""
70
self.spool = None # tempfile.TemporaryFile(prefix='bzr-index-row-')
63
self.spool = tempfile.TemporaryFile()
73
66
def finish_node(self, pad=True):
74
67
byte_lines, _, padding = self.writer.finish()
75
68
if self.nodes == 0:
76
self.spool = BytesIO()
78
self.spool.write(b"\x00" * _RESERVED_HEADER_BYTES)
80
# We got bigger than 1 node, switch to a temp file
81
spool = tempfile.TemporaryFile(prefix='bzr-index-row-')
82
spool.write(self.spool.getvalue())
70
self.spool.write("\x00" * _RESERVED_HEADER_BYTES)
85
72
if not pad and padding:
163
150
:param references: An iterable of iterables of keys. Each is a
164
151
reference to another key.
165
152
:param value: The value to associate with the key. It may be any
166
bytes as long as it does not contain \\0 or \\n.
153
bytes as long as it does not contain \0 or \n.
168
# Ensure that 'key' is a StaticTuple
169
key = static_tuple.StaticTuple.from_sequence(key).intern()
170
155
# we don't care about absent_references
171
156
node_refs, _ = self._check_key_ref_value(key, references, value)
172
157
if key in self._nodes:
173
raise index.BadIndexDuplicateKey(key, self)
174
self._nodes[key] = static_tuple.StaticTuple(node_refs, value)
158
raise errors.BadIndexDuplicateKey(key, self)
159
self._nodes[key] = (node_refs, value)
175
161
if self._nodes_by_key is not None and self._key_length > 1:
176
162
self._update_nodes_by_key(key, value, node_refs)
177
if len(self._nodes) < self._spill_at:
163
if len(self._keys) < self._spill_at:
179
165
self._spill_mem_keys_to_disk()
196
182
backing_pos) = self._spill_mem_keys_and_combine()
198
184
new_backing_file, size = self._spill_mem_keys_without_combining()
185
dir_path, base_name = osutils.split(new_backing_file.name)
199
186
# Note: The transport here isn't strictly needed, because we will use
200
187
# direct access to the new_backing._file object
201
new_backing = BTreeGraphIndex(transport.get_transport_from_path('.'),
188
new_backing = BTreeGraphIndex(get_transport(dir_path),
203
190
# GC will clean up the file
204
191
new_backing._file = new_backing_file
205
192
if self._combine_backing_indices:
262
250
current_values = []
263
251
for iterator in iterators_to_combine:
265
current_values.append(next(iterator))
253
current_values.append(iterator.next())
266
254
except StopIteration:
267
255
current_values.append(None)
270
258
# Decorate candidates with the value to allow 2.4's min to be used.
271
259
candidates = [(item[1][1], item) for item
272
in enumerate(current_values) if item[1] is not None]
260
in enumerate(current_values) if item[1] is not None]
273
261
if not len(candidates):
275
263
selected = min(candidates)
276
264
# undecorate back to (pos, node)
277
265
selected = selected[1]
278
266
if last == selected[1][1]:
279
raise index.BadIndexDuplicateKey(last, self)
267
raise errors.BadIndexDuplicateKey(last, self)
280
268
last = selected[1][1]
281
269
# Yield, with self as the index
282
270
yield (self,) + selected[1][1:]
283
271
pos = selected[0]
285
current_values[pos] = next(iterators_to_combine[pos])
273
current_values[pos] = iterators_to_combine[pos].next()
286
274
except StopIteration:
287
275
current_values[pos] = None
295
283
flag when writing out. This is used by the _spill_mem_keys_to_disk
299
286
if rows[-1].writer is None:
300
287
# opening a new leaf chunk;
302
288
for pos, internal_row in enumerate(rows[:-1]):
303
289
# flesh out any internal nodes that are needed to
304
290
# preserve the height of the tree
305
291
if internal_row.writer is None:
306
292
length = _PAGE_SIZE
307
293
if internal_row.nodes == 0:
308
length -= _RESERVED_HEADER_BYTES # padded
294
length -= _RESERVED_HEADER_BYTES # padded
309
295
if allow_optimize:
310
296
optimize_for_size = self._optimize_for_size
312
298
optimize_for_size = False
313
internal_row.writer = chunk_writer.ChunkWriter(
314
length, 0, optimize_for_size=optimize_for_size)
299
internal_row.writer = chunk_writer.ChunkWriter(length, 0,
300
optimize_for_size=optimize_for_size)
315
301
internal_row.writer.write(_INTERNAL_FLAG)
316
internal_row.writer.write(_INTERNAL_OFFSET
317
+ b"%d\n" % rows[pos + 1].nodes)
302
internal_row.writer.write(_INTERNAL_OFFSET +
303
str(rows[pos + 1].nodes) + "\n")
319
305
length = _PAGE_SIZE
320
306
if rows[-1].nodes == 0:
321
length -= _RESERVED_HEADER_BYTES # padded
322
rows[-1].writer = chunk_writer.ChunkWriter(
323
length, optimize_for_size=self._optimize_for_size)
307
length -= _RESERVED_HEADER_BYTES # padded
308
rows[-1].writer = chunk_writer.ChunkWriter(length,
309
optimize_for_size=self._optimize_for_size)
324
310
rows[-1].writer.write(_LEAF_FLAG)
325
311
if rows[-1].writer.write(line):
326
# if we failed to write, despite having an empty page to write to,
327
# then line is too big. raising the error avoids infinite recursion
328
# searching for a suitably large page that will not be found.
330
raise index.BadIndexKey(string_key)
331
312
# this key did not fit in the node:
332
313
rows[-1].finish_node()
333
key_line = string_key + b"\n"
314
key_line = string_key + "\n"
335
316
for row in reversed(rows[:-1]):
336
317
# Mark the start of the next node in the node above. If it
358
339
optimize_for_size=self._optimize_for_size)
359
340
new_row.writer.write(_INTERNAL_FLAG)
360
new_row.writer.write(_INTERNAL_OFFSET
361
+ b"%d\n" % (rows[1].nodes - 1))
341
new_row.writer.write(_INTERNAL_OFFSET +
342
str(rows[1].nodes - 1) + "\n")
362
343
new_row.writer.write(key_line)
363
self._add_key(string_key, line, rows,
364
allow_optimize=allow_optimize)
344
self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
366
346
def _write_nodes(self, node_iterator, allow_optimize=True):
367
347
"""Write node_iterator out as a B+Tree.
393
373
# First key triggers the first row
394
374
rows.append(_LeafBuilderRow())
396
string_key, line = _btree_serializer._flatten_node(
397
node, self.reference_lists)
398
self._add_key(string_key, line, rows,
399
allow_optimize=allow_optimize)
376
string_key, line = _btree_serializer._flatten_node(node,
377
self.reference_lists)
378
self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
400
379
for row in reversed(rows):
401
pad = (not isinstance(row, _LeafBuilderRow))
380
pad = (type(row) != _LeafBuilderRow)
402
381
row.finish_node(pad=pad)
382
result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
403
383
lines = [_BTSIGNATURE]
404
lines.append(b'%s%d\n' % (_OPTION_NODE_REFS, self.reference_lists))
405
lines.append(b'%s%d\n' % (_OPTION_KEY_ELEMENTS, self._key_length))
406
lines.append(b'%s%d\n' % (_OPTION_LEN, key_count))
384
lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')
385
lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')
386
lines.append(_OPTION_LEN + str(key_count) + '\n')
407
387
row_lengths = [row.nodes for row in rows]
408
lines.append(_OPTION_ROW_LENGTHS + ','.join(
409
map(str, row_lengths)).encode('ascii') + b'\n')
410
if row_lengths and row_lengths[-1] > 1:
411
result = tempfile.NamedTemporaryFile(prefix='bzr-index-')
388
lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')
414
389
result.writelines(lines)
415
390
position = sum(map(len, lines))
416
392
if position > _RESERVED_HEADER_BYTES:
417
393
raise AssertionError("Could not fit the header in the"
418
394
" reserved space: %d > %d"
419
395
% (position, _RESERVED_HEADER_BYTES))
420
396
# write the rows out:
422
reserved = _RESERVED_HEADER_BYTES # reserved space for first node
398
reserved = _RESERVED_HEADER_BYTES # reserved space for first node
423
399
row.spool.flush()
424
400
row.spool.seek(0)
425
401
# copy nodes to the finalised file.
426
402
# Special case the first node as it may be prefixed
427
403
node = row.spool.read(_PAGE_SIZE)
428
404
result.write(node[reserved:])
429
if len(node) == _PAGE_SIZE:
430
result.write(b"\x00" * (reserved - position))
431
position = 0 # Only the root row actually has an offset
405
result.write("\x00" * (reserved - position))
406
position = 0 # Only the root row actually has an offset
432
407
copied_len = osutils.pumpfile(row.spool, result)
433
408
if copied_len != (row.nodes - 1) * _PAGE_SIZE:
434
if not isinstance(row, _LeafBuilderRow):
409
if type(row) != _LeafBuilderRow:
435
410
raise AssertionError("Incorrect amount of data copied"
436
" expected: %d, got: %d"
437
% ((row.nodes - 1) * _PAGE_SIZE,
411
" expected: %d, got: %d"
412
% ((row.nodes - 1) * _PAGE_SIZE,
440
415
size = result.tell()
473
448
"""Iterate over keys within the index.
475
450
:param keys: An iterable providing the keys to be retrieved.
476
:return: An iterable of (index, key, value, reference_lists). There is
477
no defined order for the result iteration - it will be in the most
451
:return: An iterable of (index, key, value, reference_lists). There is no
452
defined order for the result iteration - it will be in the most
478
453
efficient order for the index (keys iteration order in this case).
481
# Note: We don't use keys.intersection() here. If you read the C api,
482
# set.intersection(other) special cases when other is a set and
483
# will iterate the smaller of the two and lookup in the other.
484
# It does *not* do this for any other type (even dict, unlike
485
# some other set functions.) Since we expect keys is generally <<
486
# self._nodes, it is faster to iterate over it in a list
489
local_keys = [key for key in keys if key in nodes]
456
local_keys = keys.intersection(self._keys)
490
457
if self.reference_lists:
491
458
for key in local_keys:
459
node = self._nodes[key]
493
460
yield self, key, node[1], node[0]
495
462
for key in local_keys:
463
node = self._nodes[key]
497
464
yield self, key, node[1]
498
465
# Find things that are in backing indices that have not been handled
500
467
if not self._backing_indices:
501
return # We won't find anything there either
468
return # We won't find anything there either
502
469
# Remove all of the keys that we found locally
503
470
keys.difference_update(local_keys)
504
471
for backing in self._backing_indices:
548
521
yield self, key, node[1]
550
nodes_by_key = self._get_nodes_by_key()
551
for entry in index._iter_entries_prefix(self, nodes_by_key, keys):
526
raise errors.BadIndexKey(key)
527
if len(key) != self._key_length:
528
raise errors.BadIndexKey(key)
529
# find what it refers to:
530
key_dict = self._get_nodes_by_key()
532
# find the subdict to return
534
while len(elements) and elements[0] is not None:
535
key_dict = key_dict[elements[0]]
538
# a non-existant lookup.
543
key_dict = dicts.pop(-1)
544
# can't be empty or would not exist
545
item, value = key_dict.iteritems().next()
546
if type(value) == dict:
548
dicts.extend(key_dict.itervalues())
551
for value in key_dict.itervalues():
552
yield (self, ) + value
554
yield (self, ) + key_dict
554
556
def _get_nodes_by_key(self):
555
557
if self._nodes_by_key is None:
556
558
nodes_by_key = {}
557
559
if self.reference_lists:
558
for key, (references, value) in self._nodes.items():
560
for key, (references, value) in self._nodes.iteritems():
559
561
key_dict = nodes_by_key
560
562
for subkey in key[:-1]:
561
563
key_dict = key_dict.setdefault(subkey, {})
562
564
key_dict[key[-1]] = key, value, references
564
for key, (references, value) in self._nodes.items():
566
for key, (references, value) in self._nodes.iteritems():
565
567
key_dict = nodes_by_key
566
568
for subkey in key[:-1]:
567
569
key_dict = key_dict.setdefault(subkey, {})
575
577
For InMemoryGraphIndex the estimate is exact.
577
return len(self._nodes) + sum(
579
for backing in self._backing_indices
580
if backing is not None)
579
return len(self._keys) + sum(backing.key_count() for backing in
580
self._backing_indices if backing is not None)
582
582
def validate(self):
583
583
"""In memory index's have no known corruption at the moment."""
585
def __lt__(self, other):
586
if isinstance(other, type(self)):
587
return self._nodes < other._nodes
588
# Always sort existing indexes before ones that are still being built.
589
if isinstance(other, BTreeGraphIndex):
594
class _LeafNode(dict):
586
class _LeafNode(object):
595
587
"""A leaf node for a serialised B+Tree index."""
597
__slots__ = ('min_key', 'max_key', '_keys')
589
__slots__ = ('keys', 'min_key', 'max_key')
599
591
def __init__(self, bytes, key_length, ref_list_length):
600
592
"""Parse bytes to create a leaf node object."""
601
593
# splitlines mangles the \r delimiters.. don't use it.
602
key_list = _btree_serializer._parse_leaf_lines(
603
bytes, key_length, ref_list_length)
594
key_list = _btree_serializer._parse_leaf_lines(bytes,
595
key_length, ref_list_length)
605
597
self.min_key = key_list[0][0]
606
598
self.max_key = key_list[-1][0]
608
600
self.min_key = self.max_key = None
609
super(_LeafNode, self).__init__(key_list)
610
self._keys = dict(self)
613
"""Return a sorted list of (key, (value, refs)) items"""
614
items = sorted(self.items())
618
"""Return a sorted list of all keys."""
619
keys = sorted(self.keys())
601
self.keys = dict(key_list)
623
604
class _InternalNode(object):
628
609
def __init__(self, bytes):
629
610
"""Parse bytes to create an internal node object."""
630
611
# splitlines mangles the \r delimiters.. don't use it.
631
self.keys = self._parse_lines(bytes.split(b'\n'))
612
self.keys = self._parse_lines(bytes.split('\n'))
633
614
def _parse_lines(self, lines):
635
616
self.offset = int(lines[1][7:])
636
as_st = static_tuple.StaticTuple.from_sequence
637
617
for line in lines[2:]:
640
# GZ 2017-05-24: Used to intern() each chunk of line as well, need
641
# to recheck performance and perhaps adapt StaticTuple to adjust.
642
nodes.append(as_st(line.split(b'\0')).intern())
620
nodes.append(tuple(map(intern, line.split('\0'))))
650
628
memory except when very large walks are done.
653
def __init__(self, transport, name, size, unlimited_cache=False,
631
def __init__(self, transport, name, size):
655
632
"""Create a B+Tree index object on the index name.
657
634
:param transport: The transport to read data for the index from.
661
638
the initial read (to read the root node header) can be done
662
639
without over-reading even on empty indices, and on small indices
663
640
allows single-IO to read the entire index.
664
:param unlimited_cache: If set to True, then instead of using an
665
LRUCache with size _NODE_CACHE_SIZE, we will use a dict and always
666
cache all leaf nodes.
667
:param offset: The start of the btree index data isn't byte 0 of the
668
file. Instead it starts at some point later.
670
642
self._transport = transport
671
643
self._name = name
673
645
self._file = None
674
646
self._recommended_pages = self._compute_recommended_pages()
675
647
self._root_node = None
676
self._base_offset = offset
677
self._leaf_factory = _LeafNode
678
648
# Default max size is 100,000 leave values
679
self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
681
self._leaf_node_cache = {}
682
self._internal_node_cache = {}
684
self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)
685
# We use a FIFO here just to prevent possible blowout. However, a
686
# 300k record btree has only 3k leaf nodes, and only 20 internal
687
# nodes. A value of 100 scales to ~100*100*100 = 1M records.
688
self._internal_node_cache = fifo_cache.FIFOCache(100)
649
self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
650
self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)
651
# We could limit this, but even a 300k record btree has only 3k leaf
652
# nodes, and only 20 internal nodes. So the default of 100 nodes in an
653
# LRU would mean we always cache everything anyway, no need to pay the
655
self._internal_node_cache = fifo_cache.FIFOCache(100)
689
656
self._key_count = None
690
657
self._row_lengths = None
691
self._row_offsets = None # Start of each row, [-1] is the end
658
self._row_offsets = None # Start of each row, [-1] is the end
696
660
def __eq__(self, other):
697
661
"""Equal when self and other were created with the same parameters."""
699
isinstance(self, type(other))
700
and self._transport == other._transport
701
and self._name == other._name
702
and self._size == other._size)
704
def __lt__(self, other):
705
if isinstance(other, type(self)):
706
return ((self._name, self._size) < (other._name, other._size))
707
# Always sort existing indexes before ones that are still being built.
708
if isinstance(other, BTreeBuilder):
663
type(self) == type(other) and
664
self._transport == other._transport and
665
self._name == other._name and
666
self._size == other._size)
712
668
def __ne__(self, other):
713
669
return not self.__eq__(other)
729
685
start_of_leaves = None
730
686
for node_pos, node in self._read_nodes(sorted(nodes)):
731
if node_pos == 0: # Special case
687
if node_pos == 0: # Special case
732
688
self._root_node = node
734
690
if start_of_leaves is None:
735
691
start_of_leaves = self._row_offsets[-2]
736
692
if node_pos < start_of_leaves:
737
self._internal_node_cache[node_pos] = node
693
self._internal_node_cache.add(node_pos, node)
739
self._leaf_node_cache[node_pos] = node
695
self._leaf_node_cache.add(node_pos, node)
740
696
found[node_pos] = node
801
758
if total_pages - len(cached_offsets) <= self._recommended_pages:
802
759
# Read whatever is left
803
760
if cached_offsets:
804
expanded = [x for x in range(total_pages)
805
if x not in cached_offsets]
761
expanded = [x for x in xrange(total_pages)
762
if x not in cached_offsets]
807
expanded = list(range(total_pages))
764
expanded = range(total_pages)
808
765
if 'index' in debug.debug_flags:
809
766
trace.mutter(' reading all unread pages: %s', expanded)
859
816
if first is None:
860
817
first, end = self._find_layer_first_and_end(pos)
861
818
previous = pos - 1
863
previous not in cached_offsets and
864
previous not in final_offsets and
820
and previous not in cached_offsets
821
and previous not in final_offsets
822
and previous >= first):
866
823
next_tips.add(previous)
868
if (after < total_pages and
869
after not in cached_offsets and
870
after not in final_offsets and
825
if (after < total_pages
826
and after not in cached_offsets
827
and after not in final_offsets
872
829
next_tips.add(after)
873
830
# This would keep us from going bigger than
874
831
# recommended_pages by only expanding the first offsets.
880
837
new_tips = next_tips
881
838
return final_offsets
883
def clear_cache(self):
884
"""Clear out any cached/memoized values.
886
This can be called at any time, but generally it is used when we have
887
extracted some information, but don't expect to be requesting any more
890
# Note that we don't touch self._root_node or self._internal_node_cache
891
# We don't expect either of those to be big, and it can save
892
# round-trips in the future. We may re-evaluate this if InternalNode
893
# memory starts to be an issue.
894
self._leaf_node_cache.clear()
896
840
def external_references(self, ref_list_num):
897
841
if self._root_node is None:
898
842
self._get_root_node()
899
843
if ref_list_num + 1 > self.node_ref_lists:
900
844
raise ValueError('No ref list %d, index has %d ref lists'
901
% (ref_list_num, self.node_ref_lists))
845
% (ref_list_num, self.node_ref_lists))
904
848
for node in self.iter_all_entries():
963
906
def _cache_leaf_values(self, nodes):
964
907
"""Cache directly from key => value, skipping the btree."""
965
908
if self._leaf_value_cache is not None:
966
for node in nodes.values():
967
for key, value in node.all_items():
909
for node in nodes.itervalues():
910
for key, value in node.keys.iteritems():
968
911
if key in self._leaf_value_cache:
969
912
# Don't add the rest of the keys, we've seen this node
980
923
def iter_all_entries(self):
981
924
"""Iterate over all keys within the index.
983
:return: An iterable of (index, key, value) or
984
(index, key, value, reference_lists).
926
:return: An iterable of (index, key, value) or (index, key, value, reference_lists).
985
927
The former tuple is used when there are no reference lists in the
986
928
index, making the API compatible with simple key:value index types.
987
929
There is no defined order for the result iteration - it will be in
988
930
the most efficient order for the index.
990
932
if 'evil' in debug.debug_flags:
991
trace.mutter_callsite(
992
3, "iter_all_entries scales with size of history.")
933
trace.mutter_callsite(3,
934
"iter_all_entries scales with size of history.")
993
935
if not self.key_count():
995
937
if self._row_offsets[-1] == 1:
996
938
# There is only the root node, and we read that via key_count()
997
939
if self.node_ref_lists:
998
for key, (value, refs) in self._root_node.all_items():
940
for key, (value, refs) in sorted(self._root_node.keys.items()):
999
941
yield (self, key, value, refs)
1001
for key, (value, refs) in self._root_node.all_items():
943
for key, (value, refs) in sorted(self._root_node.keys.items()):
1002
944
yield (self, key, value)
1004
946
start_of_leaves = self._row_offsets[-2]
1005
947
end_of_leaves = self._row_offsets[-1]
1006
needed_offsets = list(range(start_of_leaves, end_of_leaves))
948
needed_offsets = range(start_of_leaves, end_of_leaves)
1007
949
if needed_offsets == [0]:
1008
950
# Special case when we only have a root node, as we have already
1009
951
# read everything
1014
956
# for spilling index builds to disk.
1015
957
if self.node_ref_lists:
1016
958
for _, node in nodes:
1017
for key, (value, refs) in node.all_items():
959
for key, (value, refs) in sorted(node.keys.items()):
1018
960
yield (self, key, value, refs)
1020
962
for _, node in nodes:
1021
for key, (value, refs) in node.all_items():
963
for key, (value, refs) in sorted(node.keys.items()):
1022
964
yield (self, key, value)
1047
989
# function, so there is even more to be gained.
1048
990
# iter_steps = len(in_keys) + len(fixed_keys)
1049
991
# bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
1050
if len(in_keys) == 1: # Bisect will always be faster for M = 1
1051
return [(bisect.bisect_right(fixed_keys, in_keys[0]), in_keys)]
992
if len(in_keys) == 1: # Bisect will always be faster for M = 1
993
return [(bisect_right(fixed_keys, in_keys[0]), in_keys)]
1052
994
# elif bisect_steps < iter_steps:
1054
996
# for key in in_keys:
1057
999
# return [(o, offsets[o]) for o in sorted(offsets)]
1058
1000
in_keys_iter = iter(in_keys)
1059
1001
fixed_keys_iter = enumerate(fixed_keys)
1060
cur_in_key = next(in_keys_iter)
1061
cur_fixed_offset, cur_fixed_key = next(fixed_keys_iter)
1063
class InputDone(Exception):
1066
class FixedDone(Exception):
1002
cur_in_key = in_keys_iter.next()
1003
cur_fixed_offset, cur_fixed_key = fixed_keys_iter.next()
1005
class InputDone(Exception): pass
1006
class FixedDone(Exception): pass
1129
1068
positions = self._multi_bisect_right(sub_keys, node.keys)
1130
1069
node_offset = next_row_start + node.offset
1131
1070
next_nodes_and_keys.extend([(node_offset + pos, s_keys)
1132
for pos, s_keys in positions])
1071
for pos, s_keys in positions])
1133
1072
keys_at_index = next_nodes_and_keys
1134
1073
# We should now be at the _LeafNodes
1135
1074
node_indexes = [idx for idx, s_keys in keys_at_index]
1262
1202
# sub_keys is all of the keys we are looking for that should exist
1263
1203
# on this page, if they aren't here, then they won't be found
1264
1204
node = nodes[node_index]
1205
node_keys = node.keys
1265
1206
parents_to_check = set()
1266
1207
for next_sub_key in sub_keys:
1267
if next_sub_key not in node:
1208
if next_sub_key not in node_keys:
1268
1209
# This one is just not present in the index at all
1269
1210
missing_keys.add(next_sub_key)
1271
value, refs = node[next_sub_key]
1212
value, refs = node_keys[next_sub_key]
1272
1213
parent_keys = refs[ref_list_num]
1273
1214
parent_map[next_sub_key] = parent_keys
1274
1215
parents_to_check.update(parent_keys)
1393
1337
except KeyError:
1396
for entry in index._iter_entries_prefix(self, nodes_by_key, keys):
1343
raise errors.BadIndexKey(key)
1344
if len(key) != self._key_length:
1345
raise errors.BadIndexKey(key)
1346
# find what it refers to:
1347
key_dict = nodes_by_key
1348
elements = list(key)
1349
# find the subdict whose contents should be returned.
1351
while len(elements) and elements[0] is not None:
1352
key_dict = key_dict[elements[0]]
1355
# a non-existant lookup.
1360
key_dict = dicts.pop(-1)
1361
# can't be empty or would not exist
1362
item, value = key_dict.iteritems().next()
1363
if type(value) == dict:
1365
dicts.extend(key_dict.itervalues())
1368
for value in key_dict.itervalues():
1369
# each value is the key:value:node refs tuple
1371
yield (self, ) + value
1373
# the last thing looked up was a terminal element
1374
yield (self, ) + key_dict
1399
1376
def key_count(self):
1400
1377
"""Return an estimate of the number of keys in this index.
1426
1403
signature = bytes[0:len(self._signature())]
1427
1404
if not signature == self._signature():
1428
raise index.BadIndexFormatSignature(self._name, BTreeGraphIndex)
1405
raise errors.BadIndexFormatSignature(self._name, BTreeGraphIndex)
1429
1406
lines = bytes[len(self._signature()):].splitlines()
1430
1407
options_line = lines[0]
1431
1408
if not options_line.startswith(_OPTION_NODE_REFS):
1432
raise index.BadIndexOptions(self)
1409
raise errors.BadIndexOptions(self)
1434
1411
self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):])
1435
1412
except ValueError:
1436
raise index.BadIndexOptions(self)
1413
raise errors.BadIndexOptions(self)
1437
1414
options_line = lines[1]
1438
1415
if not options_line.startswith(_OPTION_KEY_ELEMENTS):
1439
raise index.BadIndexOptions(self)
1416
raise errors.BadIndexOptions(self)
1441
1418
self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):])
1442
1419
except ValueError:
1443
raise index.BadIndexOptions(self)
1420
raise errors.BadIndexOptions(self)
1444
1421
options_line = lines[2]
1445
1422
if not options_line.startswith(_OPTION_LEN):
1446
raise index.BadIndexOptions(self)
1423
raise errors.BadIndexOptions(self)
1448
1425
self._key_count = int(options_line[len(_OPTION_LEN):])
1449
1426
except ValueError:
1450
raise index.BadIndexOptions(self)
1427
raise errors.BadIndexOptions(self)
1451
1428
options_line = lines[3]
1452
1429
if not options_line.startswith(_OPTION_ROW_LENGTHS):
1453
raise index.BadIndexOptions(self)
1430
raise errors.BadIndexOptions(self)
1455
self._row_lengths = [int(length) for length in
1456
options_line[len(_OPTION_ROW_LENGTHS):].split(
1432
self._row_lengths = map(int, [length for length in
1433
options_line[len(_OPTION_ROW_LENGTHS):].split(',')
1459
1435
except ValueError:
1460
raise index.BadIndexOptions(self)
1436
raise errors.BadIndexOptions(self)
1461
1437
self._compute_row_offsets()
1463
1439
# calculate the bytes we have processed
1492
1467
# The only case where we don't know the size, is for very
1493
1468
# small indexes. So we read the whole thing
1494
1469
bytes = self._transport.get_bytes(self._name)
1495
num_bytes = len(bytes)
1496
self._size = num_bytes - base_offset
1470
self._size = len(bytes)
1497
1471
# the whole thing should be parsed out of 'bytes'
1498
ranges = [(start, min(_PAGE_SIZE, num_bytes - start))
1500
base_offset, num_bytes, _PAGE_SIZE)]
1472
ranges.append((0, len(bytes)))
1503
1475
if offset > self._size:
1505
1477
' of the file %s > %s'
1506
1478
% (offset, self._size))
1507
1479
size = min(size, self._size - offset)
1508
ranges.append((base_offset + offset, size))
1480
ranges.append((offset, size))
1511
1483
elif bytes is not None:
1512
1484
# already have the whole file
1513
data_ranges = [(start, bytes[start:start + size])
1514
for start, size in ranges]
1485
data_ranges = [(start, bytes[start:start+_PAGE_SIZE])
1486
for start in xrange(0, len(bytes), _PAGE_SIZE)]
1515
1487
elif self._file is None:
1516
1488
data_ranges = self._transport.readv(self._name, ranges)
1529
1500
bytes = zlib.decompress(data)
1530
1501
if bytes.startswith(_LEAF_FLAG):
1531
node = self._leaf_factory(bytes, self._key_length,
1532
self.node_ref_lists)
1502
node = _LeafNode(bytes, self._key_length, self.node_ref_lists)
1533
1503
elif bytes.startswith(_INTERNAL_FLAG):
1534
1504
node = _InternalNode(bytes)
1536
1506
raise AssertionError("Unknown node type for %r" % bytes)
1537
yield offset // _PAGE_SIZE, node
1507
yield offset / _PAGE_SIZE, node
1539
1509
def _signature(self):
1540
1510
"""The file signature for this index type."""
1550
1520
# We shouldn't be reading anything anyway
1552
1522
node_end = self._row_offsets[-1]
1553
for node in self._read_nodes(list(range(start_node, node_end))):
1523
for node in self._read_nodes(range(start_node, node_end)):
1557
_gcchk_factory = _LeafNode
1560
from . import _btree_serializer_pyx as _btree_serializer
1561
_gcchk_factory = _btree_serializer._parse_into_chk
1562
except ImportError as e:
1563
osutils.failed_to_load_extension(e)
1564
from . import _btree_serializer_py as _btree_serializer
1528
from bzrlib import _btree_serializer_pyx as _btree_serializer
1530
from bzrlib import _btree_serializer_py as _btree_serializer