650
646
:return: A dict of {node_pos: node}
652
if len(nodes) > cache._max_cache:
653
trace.mutter('Requesting %s > %s nodes, not all will be cached',
654
len(nodes), cache._max_cache)
649
start_of_leaves = None
656
650
for node_pos, node in self._read_nodes(sorted(nodes)):
657
651
if node_pos == 0: # Special case
658
652
self._root_node = node
660
cache.add(node_pos, node)
654
if start_of_leaves is None:
655
start_of_leaves = self._row_offsets[-2]
656
if node_pos < start_of_leaves:
657
self._internal_node_cache.add(node_pos, node)
659
self._leaf_node_cache.add(node_pos, node)
661
660
found[node_pos] = node
663
def _compute_recommended_pages(self):
664
"""Convert transport's recommended_page_size into btree pages.
666
recommended_page_size is in bytes, we want to know how many _PAGE_SIZE
667
pages fit in that length.
669
recommended_read = self._transport.recommended_page_size()
670
recommended_pages = int(math.ceil(recommended_read /
672
return recommended_pages
674
def _compute_total_pages_in_index(self):
675
"""How many pages are in the index.
677
If we have read the header we will use the value stored there.
678
Otherwise it will be computed based on the length of the index.
680
if self._size is None:
681
raise AssertionError('_compute_total_pages_in_index should not be'
682
' called when self._size is None')
683
if self._root_node is not None:
684
# This is the number of pages as defined by the header
685
return self._row_offsets[-1]
686
# This is the number of pages as defined by the size of the index. They
687
# should be indentical.
688
total_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))
691
def _expand_offsets(self, offsets):
692
"""Find extra pages to download.
694
The idea is that we always want to make big-enough requests (like 64kB
695
for http), so that we don't waste round trips. So given the entries
696
that we already have cached and the new pages being downloaded figure
697
out what other pages we might want to read.
699
See also doc/developers/btree_index_prefetch.txt for more details.
701
:param offsets: The offsets to be read
702
:return: A list of offsets to download
704
if 'index' in debug.debug_flags:
705
trace.mutter('expanding: %s\toffsets: %s', self._name, offsets)
707
if len(offsets) >= self._recommended_pages:
708
# Don't add more, we are already requesting more than enough
709
if 'index' in debug.debug_flags:
710
trace.mutter(' not expanding large request (%s >= %s)',
711
len(offsets), self._recommended_pages)
713
if self._size is None:
714
# Don't try anything, because we don't know where the file ends
715
if 'index' in debug.debug_flags:
716
trace.mutter(' not expanding without knowing index size')
718
total_pages = self._compute_total_pages_in_index()
719
cached_offsets = self._get_offsets_to_cached_pages()
720
# If reading recommended_pages would read the rest of the index, just
722
if total_pages - len(cached_offsets) <= self._recommended_pages:
723
# Read whatever is left
725
expanded = [x for x in xrange(total_pages)
726
if x not in cached_offsets]
728
expanded = range(total_pages)
729
if 'index' in debug.debug_flags:
730
trace.mutter(' reading all unread pages: %s', expanded)
733
if self._root_node is None:
734
# ATM on the first read of the root node of a large index, we don't
735
# bother pre-reading any other pages. This is because the
736
# likelyhood of actually reading interesting pages is very low.
737
# See doc/developers/btree_index_prefetch.txt for a discussion, and
738
# a possible implementation when we are guessing that the second
739
# layer index is small
740
final_offsets = offsets
742
tree_depth = len(self._row_lengths)
743
if len(cached_offsets) < tree_depth and len(offsets) == 1:
744
# We haven't read enough to justify expansion
745
# If we are only going to read the root node, and 1 leaf node,
746
# then it isn't worth expanding our request. Once we've read at
747
# least 2 nodes, then we are probably doing a search, and we
748
# start expanding our requests.
749
if 'index' in debug.debug_flags:
750
trace.mutter(' not expanding on first reads')
752
final_offsets = self._expand_to_neighbors(offsets, cached_offsets,
755
final_offsets = sorted(final_offsets)
756
if 'index' in debug.debug_flags:
757
trace.mutter('expanded: %s', final_offsets)
760
def _expand_to_neighbors(self, offsets, cached_offsets, total_pages):
761
"""Expand requests to neighbors until we have enough pages.
763
This is called from _expand_offsets after policy has determined that we
765
We only want to expand requests within a given layer. We cheat a little
766
bit and assume all requests will be in the same layer. This is true
767
given the current design, but if it changes this algorithm may perform
770
:param offsets: requested offsets
771
:param cached_offsets: offsets for pages we currently have cached
772
:return: A set() of offsets after expansion
774
final_offsets = set(offsets)
776
new_tips = set(final_offsets)
777
while len(final_offsets) < self._recommended_pages and new_tips:
781
first, end = self._find_layer_first_and_end(pos)
784
and previous not in cached_offsets
785
and previous not in final_offsets
786
and previous >= first):
787
next_tips.add(previous)
789
if (after < total_pages
790
and after not in cached_offsets
791
and after not in final_offsets
794
# This would keep us from going bigger than
795
# recommended_pages by only expanding the first offsets.
796
# However, if we are making a 'wide' request, it is
797
# reasonable to expand all points equally.
798
# if len(final_offsets) > recommended_pages:
800
final_offsets.update(next_tips)
804
def _find_layer_first_and_end(self, offset):
805
"""Find the start/stop nodes for the layer corresponding to offset.
807
:return: (first, end)
808
first is the first node in this layer
809
end is the first node of the next layer
812
for roffset in self._row_offsets:
819
def _get_offsets_to_cached_pages(self):
820
"""Determine what nodes we already have cached."""
821
cached_offsets = set(self._internal_node_cache.keys())
822
cached_offsets.update(self._leaf_node_cache.keys())
823
if self._root_node is not None:
824
cached_offsets.add(0)
825
return cached_offsets
827
def _get_root_node(self):
828
if self._root_node is None:
829
# We may not have a root node yet
830
self._get_internal_nodes([0])
831
return self._root_node
664
833
def _get_nodes(self, cache, node_indexes):