/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/bzr/btree_index.py

  • Committer: Jelmer Vernooij
  • Date: 2020-05-06 02:13:25 UTC
  • mfrom: (7490.7.21 work)
  • mto: This revision was merged to the branch mainline in revision 7501.
  • Revision ID: jelmer@jelmer.uk-20200506021325-awbmmqu1zyorz7sj
Merge 3.1 branch.

Show diffs side-by-side

added added

removed removed

Lines of Context:
17
17
 
18
18
"""B+Tree indices"""
19
19
 
20
 
from __future__ import absolute_import, division
 
20
from io import BytesIO
21
21
 
22
22
from ..lazy_import import lazy_import
23
23
lazy_import(globals(), """
41
41
    index,
42
42
    )
43
43
from .index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
44
 
from ..sixish import (
45
 
    BytesIO,
46
 
    map,
47
 
    range,
48
 
    viewitems,
49
 
    viewkeys,
50
 
    viewvalues,
51
 
    )
52
44
 
53
45
 
54
46
_BTSIGNATURE = b"B+Tree Graph Index 2\n"
75
67
    def __init__(self):
76
68
        """Create a _BuilderRow."""
77
69
        self.nodes = 0
78
 
        self.spool = None# tempfile.TemporaryFile(prefix='bzr-index-row-')
 
70
        self.spool = None  # tempfile.TemporaryFile(prefix='bzr-index-row-')
79
71
        self.writer = None
80
72
 
81
73
    def finish_node(self, pad=True):
150
142
            of nodes that BTreeBuilder will hold in memory.
151
143
        """
152
144
        index.GraphIndexBuilder.__init__(self, reference_lists=reference_lists,
153
 
            key_elements=key_elements)
 
145
                                         key_elements=key_elements)
154
146
        self._spill_at = spill_at
155
147
        self._backing_indices = []
156
148
        # A map of {key: (node_refs, value)}
277
269
        while True:
278
270
            # Decorate candidates with the value to allow 2.4's min to be used.
279
271
            candidates = [(item[1][1], item) for item
280
 
                in enumerate(current_values) if item[1] is not None]
 
272
                          in enumerate(current_values) if item[1] is not None]
281
273
            if not len(candidates):
282
274
                return
283
275
            selected = min(candidates)
313
305
                if internal_row.writer is None:
314
306
                    length = _PAGE_SIZE
315
307
                    if internal_row.nodes == 0:
316
 
                        length -= _RESERVED_HEADER_BYTES # padded
 
308
                        length -= _RESERVED_HEADER_BYTES  # padded
317
309
                    if allow_optimize:
318
310
                        optimize_for_size = self._optimize_for_size
319
311
                    else:
320
312
                        optimize_for_size = False
321
 
                    internal_row.writer = chunk_writer.ChunkWriter(length, 0,
322
 
                        optimize_for_size=optimize_for_size)
 
313
                    internal_row.writer = chunk_writer.ChunkWriter(
 
314
                        length, 0, optimize_for_size=optimize_for_size)
323
315
                    internal_row.writer.write(_INTERNAL_FLAG)
324
 
                    internal_row.writer.write(_INTERNAL_OFFSET +
325
 
                        b"%d\n" % rows[pos + 1].nodes)
 
316
                    internal_row.writer.write(_INTERNAL_OFFSET
 
317
                                              + b"%d\n" % rows[pos + 1].nodes)
326
318
            # add a new leaf
327
319
            length = _PAGE_SIZE
328
320
            if rows[-1].nodes == 0:
329
 
                length -= _RESERVED_HEADER_BYTES # padded
330
 
            rows[-1].writer = chunk_writer.ChunkWriter(length,
331
 
                optimize_for_size=self._optimize_for_size)
 
321
                length -= _RESERVED_HEADER_BYTES  # padded
 
322
            rows[-1].writer = chunk_writer.ChunkWriter(
 
323
                length, optimize_for_size=self._optimize_for_size)
332
324
            rows[-1].writer.write(_LEAF_FLAG)
333
325
        if rows[-1].writer.write(line):
334
326
            # if we failed to write, despite having an empty page to write to,
365
357
                    reserved_bytes,
366
358
                    optimize_for_size=self._optimize_for_size)
367
359
                new_row.writer.write(_INTERNAL_FLAG)
368
 
                new_row.writer.write(_INTERNAL_OFFSET +
369
 
                    b"%d\n" % (rows[1].nodes - 1))
 
360
                new_row.writer.write(_INTERNAL_OFFSET
 
361
                                     + b"%d\n" % (rows[1].nodes - 1))
370
362
                new_row.writer.write(key_line)
371
 
            self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
 
363
            self._add_key(string_key, line, rows,
 
364
                          allow_optimize=allow_optimize)
372
365
 
373
366
    def _write_nodes(self, node_iterator, allow_optimize=True):
374
367
        """Write node_iterator out as a B+Tree.
400
393
                # First key triggers the first row
401
394
                rows.append(_LeafBuilderRow())
402
395
            key_count += 1
403
 
            string_key, line = _btree_serializer._flatten_node(node,
404
 
                                    self.reference_lists)
405
 
            self._add_key(string_key, line, rows, allow_optimize=allow_optimize)
 
396
            string_key, line = _btree_serializer._flatten_node(
 
397
                node, self.reference_lists)
 
398
            self._add_key(string_key, line, rows,
 
399
                          allow_optimize=allow_optimize)
406
400
        for row in reversed(rows):
407
401
            pad = (not isinstance(row, _LeafBuilderRow))
408
402
            row.finish_node(pad=pad)
419
413
            result = BytesIO()
420
414
        result.writelines(lines)
421
415
        position = sum(map(len, lines))
422
 
        root_row = True
423
416
        if position > _RESERVED_HEADER_BYTES:
424
417
            raise AssertionError("Could not fit the header in the"
425
418
                                 " reserved space: %d > %d"
426
419
                                 % (position, _RESERVED_HEADER_BYTES))
427
420
        # write the rows out:
428
421
        for row in rows:
429
 
            reserved = _RESERVED_HEADER_BYTES # reserved space for first node
 
422
            reserved = _RESERVED_HEADER_BYTES  # reserved space for first node
430
423
            row.spool.flush()
431
424
            row.spool.seek(0)
432
425
            # copy nodes to the finalised file.
435
428
            result.write(node[reserved:])
436
429
            if len(node) == _PAGE_SIZE:
437
430
                result.write(b"\x00" * (reserved - position))
438
 
            position = 0 # Only the root row actually has an offset
 
431
            position = 0  # Only the root row actually has an offset
439
432
            copied_len = osutils.pumpfile(row.spool, result)
440
433
            if copied_len != (row.nodes - 1) * _PAGE_SIZE:
441
434
                if not isinstance(row, _LeafBuilderRow):
442
435
                    raise AssertionError("Incorrect amount of data copied"
443
 
                        " expected: %d, got: %d"
444
 
                        % ((row.nodes - 1) * _PAGE_SIZE,
445
 
                           copied_len))
 
436
                                         " expected: %d, got: %d"
 
437
                                         % ((row.nodes - 1) * _PAGE_SIZE,
 
438
                                            copied_len))
446
439
        result.flush()
447
440
        size = result.tell()
448
441
        result.seek(0)
464
457
            efficient order for the index (in this case dictionary hash order).
465
458
        """
466
459
        if 'evil' in debug.debug_flags:
467
 
            trace.mutter_callsite(3,
468
 
                "iter_all_entries scales with size of history.")
 
460
            trace.mutter_callsite(
 
461
                3, "iter_all_entries scales with size of history.")
469
462
        # Doing serial rather than ordered would be faster; but this shouldn't
470
463
        # be getting called routinely anyway.
471
464
        iterators = [self._iter_mem_nodes()]
480
473
        """Iterate over keys within the index.
481
474
 
482
475
        :param keys: An iterable providing the keys to be retrieved.
483
 
        :return: An iterable of (index, key, value, reference_lists). There is no
484
 
            defined order for the result iteration - it will be in the most
 
476
        :return: An iterable of (index, key, value, reference_lists). There is
 
477
            no defined order for the result iteration - it will be in the most
485
478
            efficient order for the index (keys iteration order in this case).
486
479
        """
487
480
        keys = set(keys)
505
498
        # Find things that are in backing indices that have not been handled
506
499
        # yet.
507
500
        if not self._backing_indices:
508
 
            return # We won't find anything there either
 
501
            return  # We won't find anything there either
509
502
        # Remove all of the keys that we found locally
510
503
        keys.difference_update(local_keys)
511
504
        for backing in self._backing_indices:
562
555
        if self._nodes_by_key is None:
563
556
            nodes_by_key = {}
564
557
            if self.reference_lists:
565
 
                for key, (references, value) in viewitems(self._nodes):
 
558
                for key, (references, value) in self._nodes.items():
566
559
                    key_dict = nodes_by_key
567
560
                    for subkey in key[:-1]:
568
561
                        key_dict = key_dict.setdefault(subkey, {})
569
562
                    key_dict[key[-1]] = key, value, references
570
563
            else:
571
 
                for key, (references, value) in viewitems(self._nodes):
 
564
                for key, (references, value) in self._nodes.items():
572
565
                    key_dict = nodes_by_key
573
566
                    for subkey in key[:-1]:
574
567
                        key_dict = key_dict.setdefault(subkey, {})
581
574
 
582
575
        For InMemoryGraphIndex the estimate is exact.
583
576
        """
584
 
        return len(self._nodes) + sum(backing.key_count() for backing in
585
 
            self._backing_indices if backing is not None)
 
577
        return len(self._nodes) + sum(
 
578
            backing.key_count()
 
579
            for backing in self._backing_indices
 
580
            if backing is not None)
586
581
 
587
582
    def validate(self):
588
583
        """In memory index's have no known corruption at the moment."""
589
584
 
 
585
    def __lt__(self, other):
 
586
        if isinstance(other, type(self)):
 
587
            return self._nodes < other._nodes
 
588
        # Always sort existing indexes before ones that are still being built.
 
589
        if isinstance(other, BTreeGraphIndex):
 
590
            return False
 
591
        raise TypeError
 
592
 
590
593
 
591
594
class _LeafNode(dict):
592
595
    """A leaf node for a serialised B+Tree index."""
596
599
    def __init__(self, bytes, key_length, ref_list_length):
597
600
        """Parse bytes to create a leaf node object."""
598
601
        # splitlines mangles the \r delimiters.. don't use it.
599
 
        key_list = _btree_serializer._parse_leaf_lines(bytes,
600
 
            key_length, ref_list_length)
 
602
        key_list = _btree_serializer._parse_leaf_lines(
 
603
            bytes, key_length, ref_list_length)
601
604
        if key_list:
602
605
            self.min_key = key_list[0][0]
603
606
            self.max_key = key_list[-1][0]
673
676
        self._base_offset = offset
674
677
        self._leaf_factory = _LeafNode
675
678
        # Default max size is 100,000 leave values
676
 
        self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
 
679
        self._leaf_value_cache = None  # lru_cache.LRUCache(100*1000)
677
680
        if unlimited_cache:
678
681
            self._leaf_node_cache = {}
679
682
            self._internal_node_cache = {}
685
688
            self._internal_node_cache = fifo_cache.FIFOCache(100)
686
689
        self._key_count = None
687
690
        self._row_lengths = None
688
 
        self._row_offsets = None # Start of each row, [-1] is the end
 
691
        self._row_offsets = None  # Start of each row, [-1] is the end
689
692
 
690
693
    def __hash__(self):
691
694
        return id(self)
693
696
    def __eq__(self, other):
694
697
        """Equal when self and other were created with the same parameters."""
695
698
        return (
696
 
            isinstance(self, type(other)) and
697
 
            self._transport == other._transport and
698
 
            self._name == other._name and
699
 
            self._size == other._size)
 
699
            isinstance(self, type(other))
 
700
            and self._transport == other._transport
 
701
            and self._name == other._name
 
702
            and self._size == other._size)
 
703
 
 
704
    def __lt__(self, other):
 
705
        if isinstance(other, type(self)):
 
706
            return ((self._name, self._size) < (other._name, other._size))
 
707
        # Always sort existing indexes before ones that are still being built.
 
708
        if isinstance(other, BTreeBuilder):
 
709
            return True
 
710
        raise TypeError
700
711
 
701
712
    def __ne__(self, other):
702
713
        return not self.__eq__(other)
717
728
        found = {}
718
729
        start_of_leaves = None
719
730
        for node_pos, node in self._read_nodes(sorted(nodes)):
720
 
            if node_pos == 0: # Special case
 
731
            if node_pos == 0:  # Special case
721
732
                self._root_node = node
722
733
            else:
723
734
                if start_of_leaves is None:
791
802
            # Read whatever is left
792
803
            if cached_offsets:
793
804
                expanded = [x for x in range(total_pages)
794
 
                               if x not in cached_offsets]
 
805
                            if x not in cached_offsets]
795
806
            else:
796
807
                expanded = list(range(total_pages))
797
808
            if 'index' in debug.debug_flags:
848
859
                if first is None:
849
860
                    first, end = self._find_layer_first_and_end(pos)
850
861
                previous = pos - 1
851
 
                if (previous > 0
852
 
                    and previous not in cached_offsets
853
 
                    and previous not in final_offsets
854
 
                    and previous >= first):
 
862
                if (previous > 0 and
 
863
                    previous not in cached_offsets and
 
864
                    previous not in final_offsets and
 
865
                        previous >= first):
855
866
                    next_tips.add(previous)
856
867
                after = pos + 1
857
 
                if (after < total_pages
858
 
                    and after not in cached_offsets
859
 
                    and after not in final_offsets
860
 
                    and after < end):
 
868
                if (after < total_pages and
 
869
                    after not in cached_offsets and
 
870
                    after not in final_offsets and
 
871
                        after < end):
861
872
                    next_tips.add(after)
862
873
                # This would keep us from going bigger than
863
874
                # recommended_pages by only expanding the first offsets.
887
898
            self._get_root_node()
888
899
        if ref_list_num + 1 > self.node_ref_lists:
889
900
            raise ValueError('No ref list %d, index has %d ref lists'
890
 
                % (ref_list_num, self.node_ref_lists))
 
901
                             % (ref_list_num, self.node_ref_lists))
891
902
        keys = set()
892
903
        refs = set()
893
904
        for node in self.iter_all_entries():
952
963
    def _cache_leaf_values(self, nodes):
953
964
        """Cache directly from key => value, skipping the btree."""
954
965
        if self._leaf_value_cache is not None:
955
 
            for node in viewvalues(nodes):
 
966
            for node in nodes.values():
956
967
                for key, value in node.all_items():
957
968
                    if key in self._leaf_value_cache:
958
969
                        # Don't add the rest of the keys, we've seen this node
969
980
    def iter_all_entries(self):
970
981
        """Iterate over all keys within the index.
971
982
 
972
 
        :return: An iterable of (index, key, value) or (index, key, value, reference_lists).
 
983
        :return: An iterable of (index, key, value) or
 
984
            (index, key, value, reference_lists).
973
985
            The former tuple is used when there are no reference lists in the
974
986
            index, making the API compatible with simple key:value index types.
975
987
            There is no defined order for the result iteration - it will be in
976
988
            the most efficient order for the index.
977
989
        """
978
990
        if 'evil' in debug.debug_flags:
979
 
            trace.mutter_callsite(3,
980
 
                "iter_all_entries scales with size of history.")
 
991
            trace.mutter_callsite(
 
992
                3, "iter_all_entries scales with size of history.")
981
993
        if not self.key_count():
982
994
            return
983
995
        if self._row_offsets[-1] == 1:
1035
1047
        #       function, so there is even more to be gained.
1036
1048
        # iter_steps = len(in_keys) + len(fixed_keys)
1037
1049
        # bisect_steps = len(in_keys) * math.log(len(fixed_keys), 2)
1038
 
        if len(in_keys) == 1: # Bisect will always be faster for M = 1
 
1050
        if len(in_keys) == 1:  # Bisect will always be faster for M = 1
1039
1051
            return [(bisect.bisect_right(fixed_keys, in_keys[0]), in_keys)]
1040
1052
        # elif bisect_steps < iter_steps:
1041
1053
        #     offsets = {}
1048
1060
        cur_in_key = next(in_keys_iter)
1049
1061
        cur_fixed_offset, cur_fixed_key = next(fixed_keys_iter)
1050
1062
 
1051
 
        class InputDone(Exception): pass
1052
 
        class FixedDone(Exception): pass
 
1063
        class InputDone(Exception):
 
1064
            pass
 
1065
 
 
1066
        class FixedDone(Exception):
 
1067
            pass
1053
1068
 
1054
1069
        output = []
1055
1070
        cur_out = []
1114
1129
                positions = self._multi_bisect_right(sub_keys, node.keys)
1115
1130
                node_offset = next_row_start + node.offset
1116
1131
                next_nodes_and_keys.extend([(node_offset + pos, s_keys)
1117
 
                                           for pos, s_keys in positions])
 
1132
                                            for pos, s_keys in positions])
1118
1133
            keys_at_index = next_nodes_and_keys
1119
1134
        # We should now be at the _LeafNodes
1120
1135
        node_indexes = [idx for idx, s_keys in keys_at_index]
1163
1178
                else:
1164
1179
                    needed_keys.append(key)
1165
1180
 
1166
 
        last_key = None
1167
1181
        needed_keys = keys
1168
1182
        if not needed_keys:
1169
1183
            return
1212
1226
            return set()
1213
1227
        if ref_list_num >= self.node_ref_lists:
1214
1228
            raise ValueError('No ref list %d, index has %d ref lists'
1215
 
                % (ref_list_num, self.node_ref_lists))
 
1229
                             % (ref_list_num, self.node_ref_lists))
1216
1230
 
1217
1231
        # The main trick we are trying to accomplish is that when we find a
1218
1232
        # key listing its parents, we expect that the parent key is also likely
1293
1307
                            # LeafNode
1294
1308
                            parents_not_on_page.add(key)
1295
1309
                        else:
1296
 
                            # assert key != node.min_key and key != node.max_key
 
1310
                            # assert (key != node.min_key and
 
1311
                            #         key != node.max_key)
1297
1312
                            # If it was going to be present, it would be on
1298
1313
                            # *this* page, so mark it missing.
1299
1314
                            missing_keys.add(key)
1438
1453
            raise index.BadIndexOptions(self)
1439
1454
        try:
1440
1455
            self._row_lengths = [int(length) for length in
1441
 
                options_line[len(_OPTION_ROW_LENGTHS):].split(b',')
1442
 
                if length]
 
1456
                                 options_line[len(_OPTION_ROW_LENGTHS):].split(
 
1457
                                     b',')
 
1458
                                 if length]
1443
1459
        except ValueError:
1444
1460
            raise index.BadIndexOptions(self)
1445
1461
        self._compute_row_offsets()
1480
1496
                    self._size = num_bytes - base_offset
1481
1497
                    # the whole thing should be parsed out of 'bytes'
1482
1498
                    ranges = [(start, min(_PAGE_SIZE, num_bytes - start))
1483
 
                        for start in range(base_offset, num_bytes, _PAGE_SIZE)]
 
1499
                              for start in range(
 
1500
                                  base_offset, num_bytes, _PAGE_SIZE)]
1484
1501
                    break
1485
1502
            else:
1486
1503
                if offset > self._size:
1493
1510
            return
1494
1511
        elif bytes is not None:
1495
1512
            # already have the whole file
1496
 
            data_ranges = [(start, bytes[start:start+size])
 
1513
            data_ranges = [(start, bytes[start:start + size])
1497
1514
                           for start, size in ranges]
1498
1515
        elif self._file is None:
1499
1516
            data_ranges = self._transport.readv(self._name, ranges)