/brz/remove-bazaar : revision 4595.17.2

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

Committer: Martin
Date: 2009-11-07 08:02:13 UTC
mfrom: (4789 +trunk)
mto: This revision was merged to the branch mainline in revision 4809.
Revision ID: gzlist@googlemail.com-20091107080213-jad185091b3l69ih

Merge bzr.dev 4789 to resolve conflict from the disabling of plink auto-detection, and relocate NEWS

files added:
bzrlib/_export_c_api.h

bzrlib/_import_c_api.h

bzrlib/_simple_set_pyx.pxd

bzrlib/_simple_set_pyx.pyx

bzrlib/_static_tuple_c.c

bzrlib/_static_tuple_c.h

bzrlib/_static_tuple_c.pxd

bzrlib/_static_tuple_py.py

bzrlib/cleanup.py

bzrlib/crash.py

bzrlib/doc_generate/sphinx_conf.py

bzrlib/static_tuple.py

bzrlib/tests/features.py

bzrlib/tests/per_foreign_vcs

bzrlib/tests/per_foreign_vcs/__init__.py

bzrlib/tests/per_foreign_vcs/test_branch.py

bzrlib/tests/per_uifactory

bzrlib/tests/per_uifactory/__init__.py

bzrlib/tests/script.py

bzrlib/tests/test__simple_set.py

bzrlib/tests/test__static_tuple.py

bzrlib/tests/test_cleanup.py

bzrlib/tests/test_crash.py

bzrlib/tests/test_lock.py

bzrlib/tests/test_patches_data/binary.patch

bzrlib/tests/test_script.py

bzrlib/transport/pathfilter.py

doc/Bazaar-Logo-For-Manuals.png

doc/developers/_static

doc/developers/_static/bzr icon 16.png

doc/developers/_static/bzr.ico

doc/developers/_templates

doc/developers/_templates/layout.html

doc/developers/apport.txt

doc/developers/conf.py

doc/developers/content-filtering.txt

doc/developers/implementation-notes.txt

doc/developers/index-plain.txt

doc/developers/miscellaneous-notes.txt

doc/developers/plans.txt

doc/developers/process.txt

doc/developers/specifications.txt

doc/en/user-guide/index-plain.txt

doc/es/_static

doc/es/_static/bzr icon 16.png

doc/es/_static/bzr.ico

doc/es/_static/es

doc/es/_templates

doc/es/_templates/layout.html

doc/es/conf.py

doc/es/quick-reference/index.txt

doc/es/user-guide/index-plain.txt

doc/index.es.txt

doc/index.ru.txt

doc/ru/_static

doc/ru/_static/bzr icon 16.png

doc/ru/_static/bzr.ico

doc/ru/_static/ru

doc/ru/_templates

doc/ru/_templates/layout.html

doc/ru/conf.py

doc/ru/quick-reference/index.txt

doc/ru/user-guide/index-plain.txt

tools/generate_release_notes.py

tools/package_docs.py

tools/packaging/update-control.sh

files removed:
doc/BUILD-NOTES

doc/_static/en/quick-reference

doc/bazaar-vcs.org.kid

doc/en/developer-guide

doc/en/migration

doc/en/migration/index.txt

doc/en/user-guide/index.txt

doc/es/developer-guide

doc/es/release-notes

doc/es/user-reference

files renamed:
bzrlib/tests/test_pack_repository.py => bzrlib/tests/per_pack_repository.py

bzrlib/tests/test_versionedfile.py => bzrlib/tests/per_versionedfile.py

doc/en/developer-guide/HACKING.txt => doc/developers/HACKING.txt

doc/Makefile => doc/en/Makefile

doc/_static/ => doc/en/_static/

doc/_static/en/quick-reference/Makefile => doc/en/_static/en/Makefile

doc/_static/en/quick-reference/bzr-quick-reference.pdf => doc/en/_static/en/bzr-en-quick-reference.pdf

doc/_static/en/quick-reference/bzr-quick-reference.png => doc/en/_static/en/bzr-en-quick-reference.png

doc/_static/en/quick-reference/bzr-quick-reference.svg => doc/en/_static/en/bzr-en-quick-reference.svg

doc/_templates/ => doc/en/_templates/

doc/conf.py => doc/en/conf.py

doc/contents.txt => doc/en/index.txt

doc/make.bat => doc/en/make.bat

doc/en/user-guide/index-for-2x.txt => doc/en/user-guide/index.txt

doc/es/quick-reference/Makefile => doc/es/_static/es/Makefile

doc/es/quick-reference/quick-start-summary.pdf => doc/es/_static/es/bzr-es-quick-reference.pdf

doc/es/quick-reference/quick-start-summary.png => doc/es/_static/es/bzr-es-quick-reference.png

doc/es/quick-reference/quick-start-summary.svg => doc/es/_static/es/bzr-es-quick-reference.svg

doc/index.es.txt => doc/es/index.txt

doc/ru/quick-reference/Makefile => doc/ru/_static/ru/Makefile

doc/ru/quick-reference/quick-start-summary.pdf => doc/ru/_static/ru/bzr-ru-quick-reference.pdf

doc/ru/quick-reference/quick-start-summary.png => doc/ru/_static/ru/bzr-ru-quick-reference.png

doc/ru/quick-reference/quick-start-summary.svg => doc/ru/_static/ru/bzr-ru-quick-reference.svg

doc/index.ru.txt => doc/ru/index.txt

files modified:
.bzrignore

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/_bencode_pyx.pyx

bzrlib/_btree_serializer_pyx.pyx

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/_patiencediff_c.c

bzrlib/_readdir_pyx.pyx

bzrlib/annotate.py

bzrlib/bencode.py

bzrlib/branch.py

bzrlib/btree_index.py

bzrlib/builtins.py

bzrlib/bundle/apply_bundle.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/chk_map.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/foreign.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/inventory_delta.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/mail_client.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/missing.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/progress.py

bzrlib/python-compat.h

bzrlib/reconcile.py

bzrlib/reconfigure.py

bzrlib/registry.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/send.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/smart/vfs.py

bzrlib/status.py

bzrlib/symbol_versioning.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_breakin.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/ftp_server/pyftpdlib_based.py

bzrlib/tests/http_utils.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_permissions.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_repository/test_add_fallback_repository.py

bzrlib/tests/per_repository/test_check.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_fileid_involved.py

bzrlib/tests/per_repository/test_reconcile.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_chk/__init__.py

bzrlib/tests/per_repository_chk/test_supported.py

bzrlib/tests/per_repository_reference/test_add_revision.py

bzrlib/tests/per_repository_reference/test_add_signature_text.py

bzrlib/tests/per_repository_reference/test_check.py

bzrlib/tests/per_transport.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_foreign.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_hooks.py

bzrlib/tests/test_http.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_inventory_delta.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_mutabletree.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_reconfigure.py

bzrlib/tests/test_registry.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revisionspec.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_ssh_transport.py

bzrlib/tests/test_status.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_version.py

bzrlib/tests/test_version_info.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/tests/test_wsgi.py

bzrlib/tests/test_xml.py

bzrlib/tests/transport_util.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/upgrade.py

bzrlib/util/_bencode_py.py

bzrlib/version.py

bzrlib/version_info_formats/format_rio.py

bzrlib/versionedfile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

doc/developers/bug-handling.txt

doc/developers/cycle.txt

doc/developers/dirstate.txt

doc/developers/index.txt

doc/developers/integration.txt

doc/developers/inventory.txt

doc/developers/network-protocol.txt

doc/developers/overview.txt

doc/developers/ppa.txt

doc/developers/releasing.txt

doc/developers/testing.txt

doc/en/_templates/index.html

doc/en/_templates/layout.html

doc/en/mini-tutorial/index.txt

doc/en/quick-reference/index.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/browsing_history.txt

doc/en/user-guide/core_concepts.txt

doc/en/user-guide/filtered_views.txt

doc/en/user-guide/getting_help.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/merging_changes.txt

doc/en/user-guide/organizing_your_workspace.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/publishing_a_branch.txt

doc/en/user-guide/server.txt

doc/en/user-guide/specifying_revisions.txt

doc/en/user-guide/svn_plugin.txt

doc/en/user-guide/undoing_mistakes.txt

doc/en/user-guide/using_checkouts.txt

doc/en/user-guide/writing_a_plugin.txt

doc/es/mini-tutorial/index.txt

doc/es/user-guide/index.txt

doc/index.txt

setup.py

tools/packaging/build-packages.sh

tools/packaging/update-changelogs.sh

tools/packaging/update-packaging-branches.sh

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

Show diffs side-by-side

added added

removed removed

bzrlib/btree_index.py

"""B+Tree indices"""

import cStringIO

from bisect import bisect_right

import math

import tempfile

def __init__(self):

"""Create a _BuilderRow."""

self.nodes = 0

self.spool = tempfile.TemporaryFile()

self.spool = None# tempfile.TemporaryFile(prefix='bzr-index-row-')

self.writer = None

def finish_node(self, pad=True):

byte_lines, _, padding = self.writer.finish()

if self.nodes == 0:

self.spool = cStringIO.StringIO()

# padded note:

self.spool.write("\x00" * _RESERVED_HEADER_BYTES)

elif self.nodes == 1:

# We got bigger than 1 node, switch to a temp file

spool = tempfile.TemporaryFile(prefix='bzr-index-row-')

spool.write(self.spool.getvalue())

self.spool = spool

skipped_bytes = 0

if not pad and padding:

del byte_lines[-1]

156

163

node_refs, _ = self._check_key_ref_value(key, references, value)

157

164

if key in self._nodes:

158

165

raise errors.BadIndexDuplicateKey(key, self)

166

# TODO: StaticTuple

159

167

self._nodes[key] = (node_refs, value)

160

168

self._keys.add(key)

161

169

if self._nodes_by_key is not None and self._key_length > 1:

182

190

backing_pos) = self._spill_mem_keys_and_combine()

183

191

else:

184

192

new_backing_file, size = self._spill_mem_keys_without_combining()

185

dir_path, base_name = osutils.split(new_backing_file.name)

186

193

# Note: The transport here isn't strictly needed, because we will use

187

194

# direct access to the new_backing._file object

188

new_backing = BTreeGraphIndex(get_transport(dir_path),

189

base_name, size)

195

new_backing = BTreeGraphIndex(get_transport('.'), '<temp>', size)

190

196

# GC will clean up the file

191

197

new_backing._file = new_backing_file

192

198

if self._combine_backing_indices:

379

385

for row in reversed(rows):

380

386

pad = (type(row) != _LeafBuilderRow)

381

387

row.finish_node(pad=pad)

382

result = tempfile.NamedTemporaryFile(prefix='bzr-index-')

383

388

lines = [_BTSIGNATURE]

384

389

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

385

390

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

386

391

lines.append(_OPTION_LEN + str(key_count) + '\n')

387

392

row_lengths = [row.nodes for row in rows]

388

393

lines.append(_OPTION_ROW_LENGTHS + ','.join(map(str, row_lengths)) + '\n')

394

if row_lengths and row_lengths[-1] > 1:

395

result = tempfile.NamedTemporaryFile(prefix='bzr-index-')

396

else:

397

result = cStringIO.StringIO()

389

398

result.writelines(lines)

390

399

position = sum(map(len, lines))

391

400

root_row = True

402

411

# Special case the first node as it may be prefixed

403

412

node = row.spool.read(_PAGE_SIZE)

404

413

result.write(node[reserved:])

405

result.write("\x00" * (reserved - position))

414

if len(node) == _PAGE_SIZE:

415

result.write("\x00" * (reserved - position))

406

416

position = 0 # Only the root row actually has an offset

407

417

copied_len = osutils.pumpfile(row.spool, result)

408

418

if copied_len != (row.nodes - 1) * _PAGE_SIZE:

586

596

class _LeafNode(object):

587

597

"""A leaf node for a serialised B+Tree index."""

588

598

589

__slots__ = ('keys',)

599

__slots__ = ('keys', 'min_key', 'max_key')

590

600

591

601

def __init__(self, bytes, key_length, ref_list_length):

592

602

"""Parse bytes to create a leaf node object."""

593

603

# splitlines mangles the \r delimiters.. don't use it.

594

self.keys = dict(_btree_serializer._parse_leaf_lines(bytes,

595

key_length, ref_list_length))

604

key_list = _btree_serializer._parse_leaf_lines(bytes,

605

key_length, ref_list_length)

606

if key_list:

607

self.min_key = key_list[0][0]

608

self.max_key = key_list[-1][0]

609

else:

610

self.min_key = self.max_key = None

611

self.keys = dict(key_list)

596

612

597

613

598

614

class _InternalNode(object):

611

627

for line in lines[2:]:

612

628

if line == '':

613

629

break

630

# TODO: Switch to StaticTuple here.

614

631

nodes.append(tuple(map(intern, line.split('\0'))))

615

632

return nodes

616

633

622

639

memory except when very large walks are done.

623

640

"""

624

641

625

def __init__(self, transport, name, size):

642

def __init__(self, transport, name, size, unlimited_cache=False):

626

643

"""Create a B+Tree index object on the index name.

627

644

628

645

:param transport: The transport to read data for the index from.

632

649

the initial read (to read the root node header) can be done

633

650

without over-reading even on empty indices, and on small indices

634

651

allows single-IO to read the entire index.

652

:param unlimited_cache: If set to True, then instead of using an

653

LRUCache with size _NODE_CACHE_SIZE, we will use a dict and always

654

cache all leaf nodes.

635

655

"""

636

656

self._transport = transport

637

657

self._name = name

641

661

self._root_node = None

642

662

# Default max size is 100,000 leave values

643

663

self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)

644

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

645

# We could limit this, but even a 300k record btree has only 3k leaf

646

# nodes, and only 20 internal nodes. So the default of 100 nodes in an

647

# LRU would mean we always cache everything anyway, no need to pay the

648

# overhead of LRU

649

self._internal_node_cache = fifo_cache.FIFOCache(100)

664

if unlimited_cache:

665

self._leaf_node_cache = {}

666

self._internal_node_cache = {}

667

else:

668

self._leaf_node_cache = lru_cache.LRUCache(_NODE_CACHE_SIZE)

669

# We use a FIFO here just to prevent possible blowout. However, a

670

# 300k record btree has only 3k leaf nodes, and only 20 internal

671

# nodes. A value of 100 scales to ~100*100*100 = 1M records.

672

self._internal_node_cache = fifo_cache.FIFOCache(100)

650

673

self._key_count = None

651

674

self._row_lengths = None

652

675

self._row_offsets = None # Start of each row, [-1] is the end

684

707

if start_of_leaves is None:

685

708

start_of_leaves = self._row_offsets[-2]

686

709

if node_pos < start_of_leaves:

687

self._internal_node_cache.add(node_pos, node)

710

self._internal_node_cache[node_pos] = node

688

711

else:

689

self._leaf_node_cache.add(node_pos, node)

712

self._leaf_node_cache[node_pos] = node

690

713

found[node_pos] = node

691

714

return found

692

715

831

854

new_tips = next_tips

832

855

return final_offsets

833

856

857

def clear_cache(self):

858

"""Clear out any cached/memoized values.

859

860

This can be called at any time, but generally it is used when we have

861

extracted some information, but don't expect to be requesting any more

862

from this index.

863

"""

864

# Note that we don't touch self._root_node or self._internal_node_cache

865

# We don't expect either of those to be big, and it can save

866

# round-trips in the future. We may re-evaluate this if InternalNode

867

# memory starts to be an issue.

868

self._leaf_node_cache.clear()

869

834

870

def external_references(self, ref_list_num):

835

871

if self._root_node is None:

836

872

self._get_root_node()

1039

1075

output.append(cur_out)

1040

1076

return output

1041

1077

1078

def _walk_through_internal_nodes(self, keys):

1079

"""Take the given set of keys, and find the corresponding LeafNodes.

1080

1081

:param keys: An unsorted iterable of keys to search for

1082

:return: (nodes, index_and_keys)

1083

nodes is a dict mapping {index: LeafNode}

1084

keys_at_index is a list of tuples of [(index, [keys for Leaf])]

1085

"""

1086

# 6 seconds spent in miss_torture using the sorted() line.

1087

# Even with out of order disk IO it seems faster not to sort it when

1088

# large queries are being made.

1089

keys_at_index = [(0, sorted(keys))]

1090

1091

for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):

1092

node_indexes = [idx for idx, s_keys in keys_at_index]

1093

nodes = self._get_internal_nodes(node_indexes)

1094

1095

next_nodes_and_keys = []

1096

for node_index, sub_keys in keys_at_index:

1097

node = nodes[node_index]

1098

positions = self._multi_bisect_right(sub_keys, node.keys)

1099

node_offset = next_row_start + node.offset

1100

next_nodes_and_keys.extend([(node_offset + pos, s_keys)

1101

for pos, s_keys in positions])

1102

keys_at_index = next_nodes_and_keys

1103

# We should now be at the _LeafNodes

1104

node_indexes = [idx for idx, s_keys in keys_at_index]

1105

1106

# TODO: We may *not* want to always read all the nodes in one

1107

# big go. Consider setting a max size on this.

1108

nodes = self._get_leaf_nodes(node_indexes)

1109

return nodes, keys_at_index

1110

1042

1111

def iter_entries(self, keys):

1043

1112

"""Iterate over keys within the index.

1044

1113

1082

1151

needed_keys = keys

1083

1152

if not needed_keys:

1084

1153

return

1085

# 6 seconds spent in miss_torture using the sorted() line.

1086

# Even with out of order disk IO it seems faster not to sort it when

1087

# large queries are being made.

1088

needed_keys = sorted(needed_keys)

1089

1090

nodes_and_keys = [(0, needed_keys)]

1091

1092

for row_pos, next_row_start in enumerate(self._row_offsets[1:-1]):

1093

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1094

nodes = self._get_internal_nodes(node_indexes)

1095

1096

next_nodes_and_keys = []

1097

for node_index, sub_keys in nodes_and_keys:

1098

node = nodes[node_index]

1099

positions = self._multi_bisect_right(sub_keys, node.keys)

1100

node_offset = next_row_start + node.offset

1101

next_nodes_and_keys.extend([(node_offset + pos, s_keys)

1102

for pos, s_keys in positions])

1103

nodes_and_keys = next_nodes_and_keys

1104

# We should now be at the _LeafNodes

1105

node_indexes = [idx for idx, s_keys in nodes_and_keys]

1106

1107

# TODO: We may *not* want to always read all the nodes in one

1108

# big go. Consider setting a max size on this.

1109

1110

nodes = self._get_leaf_nodes(node_indexes)

1154

nodes, nodes_and_keys = self._walk_through_internal_nodes(needed_keys)

1111

1155

for node_index, sub_keys in nodes_and_keys:

1112

1156

if not sub_keys:

1113

1157

continue

1120

1164

else:

1121

1165

yield (self, next_sub_key, value)

1122

1166

1167

def _find_ancestors(self, keys, ref_list_num, parent_map, missing_keys):

1168

"""Find the parent_map information for the set of keys.

1169

1170

This populates the parent_map dict and missing_keys set based on the

1171

queried keys. It also can fill out an arbitrary number of parents that

1172

it finds while searching for the supplied keys.

1173

1174

It is unlikely that you want to call this directly. See

1175

"CombinedGraphIndex.find_ancestry()" for a more appropriate API.

1176

1177

:param keys: A keys whose ancestry we want to return

1178

Every key will either end up in 'parent_map' or 'missing_keys'.

1179

:param ref_list_num: This index in the ref_lists is the parents we

1180

care about.

1181

:param parent_map: {key: parent_keys} for keys that are present in this

1182

index. This may contain more entries than were in 'keys', that are

1183

reachable ancestors of the keys requested.

1184

:param missing_keys: keys which are known to be missing in this index.

1185

This may include parents that were not directly requested, but we

1186

were able to determine that they are not present in this index.

1187

:return: search_keys parents that were found but not queried to know

1188

if they are missing or present. Callers can re-query this index for

1189

those keys, and they will be placed into parent_map or missing_keys

1190

"""

1191

if not self.key_count():

1192

# We use key_count() to trigger reading the root node and

1193

# determining info about this BTreeGraphIndex

1194

# If we don't have any keys, then everything is missing

1195

missing_keys.update(keys)

1196

return set()

1197

if ref_list_num >= self.node_ref_lists:

1198

raise ValueError('No ref list %d, index has %d ref lists'

1199

% (ref_list_num, self.node_ref_lists))

1200

1201

# The main trick we are trying to accomplish is that when we find a

1202

# key listing its parents, we expect that the parent key is also likely

1203

# to sit on the same page. Allowing us to expand parents quickly

1204

# without suffering the full stack of bisecting, etc.

1205

nodes, nodes_and_keys = self._walk_through_internal_nodes(keys)

1206

1207

# These are parent keys which could not be immediately resolved on the

1208

# page where the child was present. Note that we may already be

1209

# searching for that key, and it may actually be present [or known

1210

# missing] on one of the other pages we are reading.

1211

# TODO:

1212

# We could try searching for them in the immediate previous or next

1213

# page. If they occur "later" we could put them in a pending lookup

1214

# set, and then for each node we read thereafter we could check to

1215

# see if they are present.

1216

# However, we don't know the impact of keeping this list of things

1217

# that I'm going to search for every node I come across from here on

1218

# out.

1219

# It doesn't handle the case when the parent key is missing on a

1220

# page that we *don't* read. So we already have to handle being

1221

# re-entrant for that.

1222

# Since most keys contain a date string, they are more likely to be

1223

# found earlier in the file than later, but we would know that right

1224

# away (key < min_key), and wouldn't keep searching it on every other

1225

# page that we read.

1226

# Mostly, it is an idea, one which should be benchmarked.

1227

parents_not_on_page = set()

1228

1229

for node_index, sub_keys in nodes_and_keys:

1230

if not sub_keys:

1231

continue

1232

# sub_keys is all of the keys we are looking for that should exist

1233

# on this page, if they aren't here, then they won't be found

1234

node = nodes[node_index]

1235

node_keys = node.keys

1236

parents_to_check = set()

1237

for next_sub_key in sub_keys:

1238

if next_sub_key not in node_keys:

1239

# This one is just not present in the index at all

1240

missing_keys.add(next_sub_key)

1241

else:

1242

value, refs = node_keys[next_sub_key]

1243

parent_keys = refs[ref_list_num]

1244

parent_map[next_sub_key] = parent_keys

1245

parents_to_check.update(parent_keys)

1246

# Don't look for things we've already found

1247

parents_to_check = parents_to_check.difference(parent_map)

1248

# this can be used to test the benefit of having the check loop

1249

# inlined.

1250

# parents_not_on_page.update(parents_to_check)

1251

# continue

1252

while parents_to_check:

1253

next_parents_to_check = set()

1254

for key in parents_to_check:

1255

if key in node_keys:

1256

value, refs = node_keys[key]

1257

parent_keys = refs[ref_list_num]

1258

parent_map[key] = parent_keys

1259

next_parents_to_check.update(parent_keys)

1260

else:

1261

# This parent either is genuinely missing, or should be

1262

# found on another page. Perf test whether it is better

1263

# to check if this node should fit on this page or not.

1264

# in the 'everything-in-one-pack' scenario, this *not*

1265

# doing the check is 237ms vs 243ms.

1266

# So slightly better, but I assume the standard 'lots

1267

# of packs' is going to show a reasonable improvement

1268

# from the check, because it avoids 'going around

1269

# again' for everything that is in another index

1270

# parents_not_on_page.add(key)

1271

# Missing for some reason

1272

if key < node.min_key:

1273

# in the case of bzr.dev, 3.4k/5.3k misses are

1274

# 'earlier' misses (65%)

1275

parents_not_on_page.add(key)

1276

elif key > node.max_key:

1277

# This parent key would be present on a different

1278

# LeafNode

1279

parents_not_on_page.add(key)

1280

else:

1281

# assert key != node.min_key and key != node.max_key

1282

# If it was going to be present, it would be on

1283

# *this* page, so mark it missing.

1284

missing_keys.add(key)

1285

parents_to_check = next_parents_to_check.difference(parent_map)

1286

# Might want to do another .difference() from missing_keys

1287

# parents_not_on_page could have been found on a different page, or be

1288

# known to be missing. So cull out everything that has already been

1289

# found.

1290

search_keys = parents_not_on_page.difference(

1291

parent_map).difference(missing_keys)

1292

return search_keys

1293

1123

1294

def iter_entries_prefix(self, keys):

1124

1295

"""Iterate over keys within the index using prefix matching.

1125

1296

1385

1556

1386

1557

try:

1387

1558

from bzrlib import _btree_serializer_pyx as _btree_serializer

1388

except ImportError:

1559

except ImportError, e:

1560

osutils.failed_to_load_extension(e)

1389

1561

from bzrlib import _btree_serializer_py as _btree_serializer

Older »