/brz/remove-bazaar : revision 4422.1.3

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/chk_map.py

Committer: John Arbash Meinel
Date: 2009-07-29 21:35:05 UTC
mfrom: (4576 +trunk)
mto: This revision was merged to the branch mainline in revision 4577.
Revision ID: john@arbash-meinel.com-20090729213505-tkqsvy1zfpocu75w

Merge bzr.dev 4576 in prep for NEWS

files added:
bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/help_topics/en/diverged-branches.txt

bzrlib/plugins/launchpad/test_lp_login.py

bzrlib/tests/per_repository_reference/test_get_record_stream.py

bzrlib/tests/per_repository_reference/test_get_rev_id_for_revno.py

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/test__annotator.py

bzrlib/tests/test__known_graph.py

bzrlib/util/bencode.py

doc/BUILD-NOTES

doc/Makefile

doc/_static

doc/_static/bzr icon 16.png

doc/_static/bzr.ico

doc/_static/en

doc/_static/en/quick-reference

doc/_templates

doc/_templates/index.html

doc/_templates/layout.html

doc/conf.py

doc/contents.txt

doc/developers/bug-handling.txt

doc/en/migration

doc/en/migration/index.txt

doc/en/quick-reference/index.txt

doc/en/tutorials/index.txt

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/en/user-guide/index-for-2x.txt

doc/es/quick-reference/quick-start-summary.pdf

doc/es/quick-reference/quick-start-summary.png

doc/index.ru.txt

doc/make.bat

doc/ru

doc/ru/mini-tutorial

doc/ru/mini-tutorial/index.txt

doc/ru/quick-reference

doc/ru/quick-reference/Makefile

doc/ru/quick-reference/quick-start-summary.pdf

doc/ru/quick-reference/quick-start-summary.png

doc/ru/quick-reference/quick-start-summary.svg

doc/ru/tutorials

doc/ru/tutorials/centralized_workflow.txt

doc/ru/tutorials/tutorial.txt

doc/ru/tutorials/using_bazaar_with_launchpad.txt

doc/ru/user-guide

doc/ru/user-guide/branching_a_project.txt

doc/ru/user-guide/core_concepts.txt

doc/ru/user-guide/images

doc/ru/user-guide/images/workflows_centralized.png

doc/ru/user-guide/images/workflows_centralized.svg

doc/ru/user-guide/images/workflows_gatekeeper.png

doc/ru/user-guide/images/workflows_gatekeeper.svg

doc/ru/user-guide/images/workflows_localcommit.png

doc/ru/user-guide/images/workflows_localcommit.svg

doc/ru/user-guide/images/workflows_peer.png

doc/ru/user-guide/images/workflows_peer.svg

doc/ru/user-guide/images/workflows_pqm.png

doc/ru/user-guide/images/workflows_pqm.svg

doc/ru/user-guide/images/workflows_shared.png

doc/ru/user-guide/images/workflows_shared.svg

doc/ru/user-guide/images/workflows_single.png

doc/ru/user-guide/images/workflows_single.svg

doc/ru/user-guide/index.txt

doc/ru/user-guide/introducing_bazaar.txt

doc/ru/user-guide/specifying_revisions.txt

doc/ru/user-guide/stacked.txt

doc/ru/user-guide/using_checkouts.txt

doc/ru/user-guide/zen.txt

tools/time_graph.py

tools/win32/bootstrap.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

files removed:
doc/developers/performance-contributing.txt

files renamed:
bzrlib/_btree_serializer_c.pyx => bzrlib/_btree_serializer_pyx.pyx

bzrlib/_dirstate_helpers_c.h => bzrlib/_dirstate_helpers_pyx.h

bzrlib/_dirstate_helpers_c.pyx => bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_knit_load_data_c.pyx => bzrlib/_knit_load_data_pyx.pyx

tools/doc_generate/ => bzrlib/doc_generate/

bzrlib/tests/branch_implementations/ => bzrlib/tests/per_branch/

bzrlib/tests/bzrdir_implementations/ => bzrlib/tests/per_bzrdir/

bzrlib/tests/interrepository_implementations/ => bzrlib/tests/per_interrepository/

bzrlib/tests/intertree_implementations/ => bzrlib/tests/per_intertree/

bzrlib/tests/inventory_implementations/ => bzrlib/tests/per_inventory/

bzrlib/tests/test_transport_implementations.py => bzrlib/tests/per_transport.py

bzrlib/tests/tree_implementations/ => bzrlib/tests/per_tree/

bzrlib/tests/workingtree_implementations/ => bzrlib/tests/per_workingtree/

doc/en/quick-reference/Makefile => doc/_static/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.pdf => doc/_static/en/quick-reference/bzr-quick-reference.pdf

doc/en/quick-reference/quick-start-summary.png => doc/_static/en/quick-reference/bzr-quick-reference.png

doc/en/quick-reference/quick-start-summary.svg => doc/_static/en/quick-reference/bzr-quick-reference.svg

doc/es/guia-desarrollador/ => doc/es/developer-guide/

doc/es/referencia-rapida/ => doc/es/quick-reference/

doc/es/referencia-rapida/referencia-rapida.svg => doc/es/quick-reference/quick-start-summary.svg

doc/es/notas-version/ => doc/es/release-notes/

doc/es/guia-usuario/ => doc/es/user-guide/

doc/es/referencia/ => doc/es/user-reference/

generate_docs.py => tools/generate_docs.py

files modified:
.bzrignore

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_readdir_pyx.pyx

bzrlib/annotate.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_knit.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/btree_index.py

bzrlib/bugtracker.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/chk_map.py

bzrlib/chk_serializer.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/dirstate.py

bzrlib/doc_generate/__init__.py

bzrlib/doc_generate/autodoc_bash_completion.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/doc_generate/autodoc_rstx.py

bzrlib/errors.py

bzrlib/export/dir_exporter.py

bzrlib/fetch.py

bzrlib/filters/__init__.py

bzrlib/foreign.py

bzrlib/graph.py

bzrlib/groupcompress.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/debug-flags.txt

bzrlib/help_topics/en/eol.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lru_cache.py

bzrlib/merge.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/reconfigure.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisiontree.py

bzrlib/send.py

bzrlib/serializer.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/shellcomplete.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/medium.py

bzrlib/smart/message.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/switch.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_send.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/http_server.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_bzrdir/test_push.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_lock/__init__.py

bzrlib/tests/per_lock/test_lock.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_add_inventory_by_delta.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository/test_pack.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_repository/test_write_group.py

bzrlib/tests/per_repository_reference/__init__.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_btree_index.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_eol_filters.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_filters.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_generate_docs.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_groupcompress.py

bzrlib/tests/test_help.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_request.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport_log.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/test_workingtree_4.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/local.py

bzrlib/transport/log.py

bzrlib/transport/memory.py

bzrlib/transport/ssh.py

bzrlib/transport/trace.py

bzrlib/tree.py

bzrlib/tuned_gzip.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/urlutils.py

bzrlib/versionedfile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml8.py

bzrlib/xml_serializer.py

doc/developers/cycle.txt

doc/developers/index.txt

doc/developers/inventory.txt

doc/developers/performance-roadmap.txt

doc/developers/planned-change-integration.txt

doc/developers/releasing.txt

doc/en/developer-guide/HACKING.txt

doc/en/tutorials/centralized_workflow.txt

doc/en/tutorials/tutorial.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/user-guide/images/workflows_centralized.png

doc/en/user-guide/images/workflows_gatekeeper.png

doc/en/user-guide/images/workflows_localcommit.png

doc/en/user-guide/images/workflows_peer.png

doc/en/user-guide/images/workflows_pqm.png

doc/en/user-guide/images/workflows_shared.png

doc/en/user-guide/images/workflows_single.png

doc/en/user-guide/index.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/stacked.txt

doc/es/mini-tutorial/index.txt

doc/es/quick-reference/Makefile

doc/index.es.txt

doc/index.txt

setup.py

tools/win32/build_release.py

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/chk_map.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

Updates to a CHKMap are done preferentially via the apply_delta method, to

allow optimisation of the update operation; but individual map/unmap calls are

possible and supported. All changes via map/unmap are buffered in memory until

the _save method is called to force serialisation of the tree. apply_delta

performs a _save implicitly.

possible and supported. Individual changes via map/unmap are buffered in memory

until the _save method is called to force serialisation of the tree.

apply_delta records its changes immediately by performing an implicit _save.

TODO:

-----

"""

import heapq

import time

from bzrlib import lazy_import

lazy_import.lazy_import(globals(), """

from bzrlib import versionedfile

from bzrlib import (

errors,

versionedfile,

)

""")

from bzrlib import (

errors,

lru_cache,

osutils,

registry,

107

108

of old_key is removed.

108

109

"""

109

110

delete_count = 0

111

# Check preconditions first.

112

new_items = set([key for (old, key, value) in delta if key is not None

113

and old is None])

114

existing_new = list(self.iteritems(key_filter=new_items))

115

if existing_new:

116

raise errors.InconsistentDeltaDelta(delta,

117

"New items are already in the map %r." % existing_new)

118

# Now apply changes.

110

119

for old, new, value in delta:

111

120

if old is not None and old != new:

112

121

self.unmap(old, check_remap=False)

121

130

122

131

def _ensure_root(self):

123

132

"""Ensure that the root node is an object not a key."""

124

if type(self._root_node) == tuple:

133

if type(self._root_node) is tuple:

125

134

# Demand-load the root

126

135

self._root_node = self._get_node(self._root_node)

127

136

135

144

:param node: A tuple key or node object.

136

145

:return: A node object.

137

146

"""

138

if type(node) == tuple:

147

if type(node) is tuple:

139

148

bytes = self._read_bytes(node)

140

149

return _deserialise(bytes, node,

141

150

search_key_func=self._search_key_func)

203

212

multiple pages.

204

213

:return: The root chk of the resulting CHKMap.

205

214

"""

206

result = CHKMap(store, None, search_key_func=search_key_func)

215

root_key = klass._create_directly(store, initial_value,

216

maximum_size=maximum_size, key_width=key_width,

217

search_key_func=search_key_func)

218

return root_key

219

220

@classmethod

221

def _create_via_map(klass, store, initial_value, maximum_size=0,

222

key_width=1, search_key_func=None):

223

result = klass(store, None, search_key_func=search_key_func)

207

224

result._root_node.set_maximum_size(maximum_size)

208

225

result._root_node._key_width = key_width

209

226

delta = []

210

227

for key, value in initial_value.items():

211

228

delta.append((None, key, value))

212

return result.apply_delta(delta)

229

root_key = result.apply_delta(delta)

230

return root_key

231

232

@classmethod

233

def _create_directly(klass, store, initial_value, maximum_size=0,

234

key_width=1, search_key_func=None):

235

node = LeafNode(search_key_func=search_key_func)

236

node.set_maximum_size(maximum_size)

237

node._key_width = key_width

238

node._items = dict(initial_value)

239

node._raw_size = sum([node._key_value_len(key, value)

240

for key,value in initial_value.iteritems()])

241

node._len = len(node._items)

242

node._compute_search_prefix()

243

node._compute_serialised_prefix()

244

if (node._len > 1

245

and maximum_size

246

and node._current_size() > maximum_size):

247

prefix, node_details = node._split(store)

248

if len(node_details) == 1:

249

raise AssertionError('Failed to split using node._split')

250

node = InternalNode(prefix, search_key_func=search_key_func)

251

node.set_maximum_size(maximum_size)

252

node._key_width = key_width

253

for split, subnode in node_details:

254

node.add_node(split, subnode)

255

keys = list(node.serialise(store))

256

return keys[-1]

213

257

214

258

def iter_changes(self, basis):

215

259

"""Iterate over the changes between basis and self.

449

493

return len(self._root_node)

450

494

451

495

def map(self, key, value):

452

"""Map a key tuple to value."""

496

"""Map a key tuple to value.

497

498

:param key: A key to map.

499

:param value: The value to assign to key.

500

"""

453

501

# Need a root object.

454

502

self._ensure_root()

455

503

prefix, node_details = self._root_node.map(self._store, key, value)

465

513

466

514

def _node_key(self, node):

467

515

"""Get the key for a node whether it's a tuple or node."""

468

if type(node) == tuple:

516

if type(node) is tuple:

469

517

return node

470

518

else:

471

519

return node._key

491

539

492

540

:return: The key of the root node.

493

541

"""

494

if type(self._root_node) == tuple:

542

if type(self._root_node) is tuple:

495

543

# Already saved.

496

544

return self._root_node

497

545

keys = list(self._root_node.serialise(self._store))

764

812

result[prefix] = node

765

813

else:

766

814

node = result[prefix]

767

node.map(store, key, value)

815

sub_prefix, node_details = node.map(store, key, value)

816

if len(node_details) > 1:

817

if prefix != sub_prefix:

818

# This node has been split and is now found via a different

819

# path

820

result.pop(prefix)

821

new_node = InternalNode(sub_prefix,

822

search_key_func=self._search_key_func)

823

new_node.set_maximum_size(self._maximum_size)

824

new_node._key_width = self._key_width

825

for split, node in node_details:

826

new_node.add_node(split, node)

827

result[prefix] = new_node

768

828

return common_prefix, result.items()

769

829

770

830

def map(self, store, key, value):

955

1015

# prefix is the key in self._items to use, key_filter is the key_filter

956

1016

# entries that would match this node

957

1017

keys = {}

1018

shortcut = False

958

1019

if key_filter is None:

1020

# yielding all nodes, yield whatever we have, and queue up a read

1021

# for whatever we are missing

1022

shortcut = True

959

1023

for prefix, node in self._items.iteritems():

960

if type(node) == tuple:

1024

if node.__class__ is tuple:

961

1025

keys[node] = (prefix, None)

962

1026

else:

963

1027

yield node, None

964

else:

965

# XXX defaultdict ?

1028

elif len(key_filter) == 1:

1029

# Technically, this path could also be handled by the first check

1030

# in 'self._node_width' in length_filters. However, we can handle

1031

# this case without spending any time building up the

1032

# prefix_to_keys, etc state.

1033

1034

# This is a bit ugly, but TIMEIT showed it to be by far the fastest

1035

# 0.626us list(key_filter)[0]

1036

# is a func() for list(), 2 mallocs, and a getitem

1037

# 0.489us [k for k in key_filter][0]

1038

# still has the mallocs, avoids the func() call

1039

# 0.350us iter(key_filter).next()

1040

# has a func() call, and mallocs an iterator

1041

# 0.125us for key in key_filter: pass

1042

# no func() overhead, might malloc an iterator

1043

# 0.105us for key in key_filter: break

1044

# no func() overhead, might malloc an iterator, probably

1045

# avoids checking an 'else' clause as part of the for

1046

for key in key_filter:

1047

break

1048

search_prefix = self._search_prefix_filter(key)

1049

if len(search_prefix) == self._node_width:

1050

# This item will match exactly, so just do a dict lookup, and

1051

# see what we can return

1052

shortcut = True

1053

try:

1054

node = self._items[search_prefix]

1055

except KeyError:

1056

# A given key can only match 1 child node, if it isn't

1057

# there, then we can just return nothing

1058

return

1059

if node.__class__ is tuple:

1060

keys[node] = (search_prefix, [key])

1061

else:

1062

# This is loaded, and the only thing that can match,

1063

# return

1064

yield node, [key]

1065

return

1066

if not shortcut:

1067

# First, convert all keys into a list of search prefixes

1068

# Aggregate common prefixes, and track the keys they come from

966

1069

prefix_to_keys = {}

967

1070

length_filters = {}

968

1071

for key in key_filter:

969

search_key = self._search_prefix_filter(key)

1072

search_prefix = self._search_prefix_filter(key)

970

1073

length_filter = length_filters.setdefault(

971

len(search_key), set())

972

length_filter.add(search_key)

973

prefix_to_keys.setdefault(search_key, []).append(key)

974

length_filters = length_filters.items()

975

for prefix, node in self._items.iteritems():

976

node_key_filter = []

977

for length, length_filter in length_filters:

978

sub_prefix = prefix[:length]

979

if sub_prefix in length_filter:

980

node_key_filter.extend(prefix_to_keys[sub_prefix])

981

if node_key_filter: # this key matched something, yield it

982

if type(node) == tuple:

983

keys[node] = (prefix, node_key_filter)

1074

len(search_prefix), set())

1075

length_filter.add(search_prefix)

1076

prefix_to_keys.setdefault(search_prefix, []).append(key)

1077

1078

if (self._node_width in length_filters

1079

and len(length_filters) == 1):

1080

# all of the search prefixes match exactly _node_width. This

1081

# means that everything is an exact match, and we can do a

1082

# lookup into self._items, rather than iterating over the items

1083

# dict.

1084

search_prefixes = length_filters[self._node_width]

1085

for search_prefix in search_prefixes:

1086

try:

1087

node = self._items[search_prefix]

1088

except KeyError:

1089

# We can ignore this one

1090

continue

1091

node_key_filter = prefix_to_keys[search_prefix]

1092

if node.__class__ is tuple:

1093

keys[node] = (search_prefix, node_key_filter)

984

1094

else:

985

1095

yield node, node_key_filter

1096

else:

1097

# The slow way. We walk every item in self._items, and check to

1098

# see if there are any matches

1099

length_filters = length_filters.items()

1100

for prefix, node in self._items.iteritems():

1101

node_key_filter = []

1102

for length, length_filter in length_filters:

1103

sub_prefix = prefix[:length]

1104

if sub_prefix in length_filter:

1105

node_key_filter.extend(prefix_to_keys[sub_prefix])

1106

if node_key_filter: # this key matched something, yield it

1107

if node.__class__ is tuple:

1108

keys[node] = (prefix, node_key_filter)

1109

else:

1110

yield node, node_key_filter

986

1111

if keys:

987

1112

# Look in the page cache for some more bytes

988

1113

found_keys = set()

1117

1242

:return: An iterable of the keys inserted by this operation.

1118

1243

"""

1119

1244

for node in self._items.itervalues():

1120

if type(node) == tuple:

1245

if type(node) is tuple:

1121

1246

# Never deserialised.

1122

1247

continue

1123

1248

if node._key is not None:

1134

1259

lines.append('%s\n' % (self._search_prefix,))

1135

1260

prefix_len = len(self._search_prefix)

1136

1261

for prefix, node in sorted(self._items.items()):

1137

if type(node) == tuple:

1262

if type(node) is tuple:

1138

1263

key = node[0]

1139

1264

else:

1140

1265

key = node._key[0]

1179

1304

raise AssertionError("unserialised nodes have no refs.")

1180

1305

refs = []

1181

1306

for value in self._items.itervalues():

1182

if type(value) == tuple:

1307

if type(value) is tuple:

1183

1308

refs.append(value)

1184

1309

else:

1185

1310

refs.append(value.key())

1286

1411

return node

1287

1412

1288

1413

1289

def _find_children_info(store, interesting_keys, uninteresting_keys, pb):

1290

"""Read the associated records, and determine what is interesting."""

1291

uninteresting_keys = set(uninteresting_keys)

1292

chks_to_read = uninteresting_keys.union(interesting_keys)

1293

next_uninteresting = set()

1294

next_interesting = set()

1295

uninteresting_items = set()

1296

interesting_items = set()

1297

interesting_to_yield = []

1298

for record in store.get_record_stream(chks_to_read, 'unordered', True):

1299

# records_read.add(record.key())

1300

if pb is not None:

1301

pb.tick()

1302

bytes = record.get_bytes_as('fulltext')

1303

# We don't care about search_key_func for this code, because we only

1304

# care about external references.

1305

node = _deserialise(bytes, record.key, search_key_func=None)

1306

if record.key in uninteresting_keys:

1307

if type(node) is InternalNode:

1308

next_uninteresting.update(node.refs())

1309

else:

1310

# We know we are at a LeafNode, so we can pass None for the

1311

# store

1312

uninteresting_items.update(node.iteritems(None))

1313

else:

1314

interesting_to_yield.append(record.key)

1315

if type(node) is InternalNode:

1316

next_interesting.update(node.refs())

1317

else:

1318

interesting_items.update(node.iteritems(None))

1319

return (next_uninteresting, uninteresting_items,

1320

next_interesting, interesting_to_yield, interesting_items)

1321

1322

1323

def _find_all_uninteresting(store, interesting_root_keys,

1324

uninteresting_root_keys, pb):

1325

"""Determine the full set of uninteresting keys."""

1326

# What about duplicates between interesting_root_keys and

1327

# uninteresting_root_keys?

1328

if not uninteresting_root_keys:

1329

# Shortcut case. We know there is nothing uninteresting to filter out

1330

# So we just let the rest of the algorithm do the work

1331

# We know there is nothing uninteresting, and we didn't have to read

1332

# any interesting records yet.

1333

return (set(), set(), set(interesting_root_keys), [], set())

1334

all_uninteresting_chks = set(uninteresting_root_keys)

1335

all_uninteresting_items = set()

1336

1337

# First step, find the direct children of both the interesting and

1338

# uninteresting set

1339

(uninteresting_keys, uninteresting_items,

1340

interesting_keys, interesting_to_yield,

1341

interesting_items) = _find_children_info(store, interesting_root_keys,

1342

uninteresting_root_keys,

1343

pb=pb)

1344

all_uninteresting_chks.update(uninteresting_keys)

1345

all_uninteresting_items.update(uninteresting_items)

1346

del uninteresting_items

1347

# Note: Exact matches between interesting and uninteresting do not need

1348

# to be search further. Non-exact matches need to be searched in case

1349

# there is a future exact-match

1350

uninteresting_keys.difference_update(interesting_keys)

1351

1352

# Second, find the full set of uninteresting bits reachable by the

1353

# uninteresting roots

1354

chks_to_read = uninteresting_keys

1355

while chks_to_read:

1356

next_chks = set()

1357

for record in store.get_record_stream(chks_to_read, 'unordered', False):

1358

# TODO: Handle 'absent'

1359

if pb is not None:

1360

pb.tick()

1414

class CHKMapDifference(object):

1415

"""Iterate the stored pages and key,value pairs for (new - old).

1416

1417

This class provides a generator over the stored CHK pages and the

1418

(key, value) pairs that are in any of the new maps and not in any of the

1419

old maps.

1420

1421

Note that it may yield chk pages that are common (especially root nodes),

1422

but it won't yield (key,value) pairs that are common.

1423

"""

1424

1425

def __init__(self, store, new_root_keys, old_root_keys,

1426

search_key_func, pb=None):

1427

self._store = store

1428

self._new_root_keys = new_root_keys

1429

self._old_root_keys = old_root_keys

1430

self._pb = pb

1431

# All uninteresting chks that we have seen. By the time they are added

1432

# here, they should be either fully ignored, or queued up for

1433

# processing

1434

self._all_old_chks = set(self._old_root_keys)

1435

# All items that we have seen from the old_root_keys

1436

self._all_old_items = set()

1437

# These are interesting items which were either read, or already in the

1438

# interesting queue (so we don't need to walk them again)

1439

self._processed_new_refs = set()

1440

self._search_key_func = search_key_func

1441

1442

# The uninteresting and interesting nodes to be searched

1443

self._old_queue = []

1444

self._new_queue = []

1445

# Holds the (key, value) items found when processing the root nodes,

1446

# waiting for the uninteresting nodes to be walked

1447

self._new_item_queue = []

1448

self._state = None

1449

1450

def _read_nodes_from_store(self, keys):

1451

# We chose not to use _page_cache, because we think in terms of records

1452

# to be yielded. Also, we expect to touch each page only 1 time during

1453

# this code. (We may want to evaluate saving the raw bytes into the

1454

# page cache, which would allow a working tree update after the fetch

1455

# to not have to read the bytes again.)

1456

stream = self._store.get_record_stream(keys, 'unordered', True)

1457

for record in stream:

1458

if self._pb is not None:

1459

self._pb.tick()

1460

if record.storage_kind == 'absent':

1461

raise errors.NoSuchRevision(self._store, record.key)

1361

1462

bytes = record.get_bytes_as('fulltext')

1362

# We don't care about search_key_func for this code, because we

1363

# only care about external references.

1364

node = _deserialise(bytes, record.key, search_key_func=None)

1463

node = _deserialise(bytes, record.key,

1464

search_key_func=self._search_key_func)

1365

1465

if type(node) is InternalNode:

1366

# uninteresting_prefix_chks.update(node._items.iteritems())

1367

chks = node._items.values()

1368

# TODO: We remove the entries that are already in

1369

# uninteresting_chks ?

1370

next_chks.update(chks)

1371

all_uninteresting_chks.update(chks)

1466

# Note we don't have to do node.refs() because we know that

1467

# there are no children that have been pushed into this node

1468

prefix_refs = node._items.items()

1469

items = []

1372

1470

else:

1373

all_uninteresting_items.update(node._items.iteritems())

1374

chks_to_read = next_chks

1375

return (all_uninteresting_chks, all_uninteresting_items,

1376

interesting_keys, interesting_to_yield, interesting_items)

1471

prefix_refs = []

1472

items = node._items.items()

1473

yield record, node, prefix_refs, items

1474

1475

def _read_old_roots(self):

1476

old_chks_to_enqueue = []

1477

all_old_chks = self._all_old_chks

1478

for record, node, prefix_refs, items in \

1479

self._read_nodes_from_store(self._old_root_keys):

1480

# Uninteresting node

1481

prefix_refs = [p_r for p_r in prefix_refs

1482

if p_r[1] not in all_old_chks]

1483

new_refs = [p_r[1] for p_r in prefix_refs]

1484

all_old_chks.update(new_refs)

1485

self._all_old_items.update(items)

1486

# Queue up the uninteresting references

1487

# Don't actually put them in the 'to-read' queue until we have

1488

# finished checking the interesting references

1489

old_chks_to_enqueue.extend(prefix_refs)

1490

return old_chks_to_enqueue

1491

1492

def _enqueue_old(self, new_prefixes, old_chks_to_enqueue):

1493

# At this point, we have read all the uninteresting and interesting

1494

# items, so we can queue up the uninteresting stuff, knowing that we've

1495

# handled the interesting ones

1496

for prefix, ref in old_chks_to_enqueue:

1497

not_interesting = True

1498

for i in xrange(len(prefix), 0, -1):

1499

if prefix[:i] in new_prefixes:

1500

not_interesting = False

1501

break

1502

if not_interesting:

1503

# This prefix is not part of the remaining 'interesting set'

1504

continue

1505

self._old_queue.append(ref)

1506

1507

def _read_all_roots(self):

1508

"""Read the root pages.

1509

1510

This is structured as a generator, so that the root records can be

1511

yielded up to whoever needs them without any buffering.

1512

"""

1513

# This is the bootstrap phase

1514

if not self._old_root_keys:

1515

# With no old_root_keys we can just shortcut and be ready

1516

# for _flush_new_queue

1517

self._new_queue = list(self._new_root_keys)

1518

return

1519

old_chks_to_enqueue = self._read_old_roots()

1520

# filter out any root keys that are already known to be uninteresting

1521

new_keys = set(self._new_root_keys).difference(self._all_old_chks)

1522

# These are prefixes that are present in new_keys that we are

1523

# thinking to yield

1524

new_prefixes = set()

1525

# We are about to yield all of these, so we don't want them getting

1526

# added a second time

1527

processed_new_refs = self._processed_new_refs

1528

processed_new_refs.update(new_keys)

1529

for record, node, prefix_refs, items in \

1530

self._read_nodes_from_store(new_keys):

1531

# At this level, we now know all the uninteresting references

1532

# So we filter and queue up whatever is remaining

1533

prefix_refs = [p_r for p_r in prefix_refs

1534

if p_r[1] not in self._all_old_chks

1535

and p_r[1] not in processed_new_refs]

1536

refs = [p_r[1] for p_r in prefix_refs]

1537

new_prefixes.update([p_r[0] for p_r in prefix_refs])

1538

self._new_queue.extend(refs)

1539

# TODO: We can potentially get multiple items here, however the

1540

# current design allows for this, as callers will do the work

1541

# to make the results unique. We might profile whether we

1542

# gain anything by ensuring unique return values for items

1543

new_items = [item for item in items

1544

if item not in self._all_old_items]

1545

self._new_item_queue.extend(new_items)

1546

new_prefixes.update([self._search_key_func(item[0])

1547

for item in new_items])

1548

processed_new_refs.update(refs)

1549

yield record

1550

# For new_prefixes we have the full length prefixes queued up.

1551

# However, we also need possible prefixes. (If we have a known ref to

1552

# 'ab', then we also need to include 'a'.) So expand the

1553

# new_prefixes to include all shorter prefixes

1554

for prefix in list(new_prefixes):

1555

new_prefixes.update([prefix[:i] for i in xrange(1, len(prefix))])

1556

self._enqueue_old(new_prefixes, old_chks_to_enqueue)

1557

1558

def _flush_new_queue(self):

1559

# No need to maintain the heap invariant anymore, just pull things out

1560

# and process them

1561

refs = set(self._new_queue)

1562

self._new_queue = []

1563

# First pass, flush all interesting items and convert to using direct refs

1564

all_old_chks = self._all_old_chks

1565

processed_new_refs = self._processed_new_refs

1566

all_old_items = self._all_old_items

1567

new_items = [item for item in self._new_item_queue

1568

if item not in all_old_items]

1569

self._new_item_queue = []

1570

if new_items:

1571

yield None, new_items

1572

refs = refs.difference(all_old_chks)

1573

while refs:

1574

next_refs = set()

1575

next_refs_update = next_refs.update

1576

# Inlining _read_nodes_from_store improves 'bzr branch bzr.dev'

1577

# from 1m54s to 1m51s. Consider it.

1578

for record, _, p_refs, items in self._read_nodes_from_store(refs):

1579

items = [item for item in items

1580

if item not in all_old_items]

1581

yield record, items

1582

next_refs_update([p_r[1] for p_r in p_refs])

1583

next_refs = next_refs.difference(all_old_chks)

1584

next_refs = next_refs.difference(processed_new_refs)

1585

processed_new_refs.update(next_refs)

1586

refs = next_refs

1587

1588

def _process_next_old(self):

1589

# Since we don't filter uninteresting any further than during

1590

# _read_all_roots, process the whole queue in a single pass.

1591

refs = self._old_queue

1592

self._old_queue = []

1593

all_old_chks = self._all_old_chks

1594

for record, _, prefix_refs, items in self._read_nodes_from_store(refs):

1595

self._all_old_items.update(items)

1596

refs = [r for _,r in prefix_refs if r not in all_old_chks]

1597

self._old_queue.extend(refs)

1598

all_old_chks.update(refs)

1599

1600

def _process_queues(self):

1601

while self._old_queue:

1602

self._process_next_old()

1603

return self._flush_new_queue()

1604

1605

def process(self):

1606

for record in self._read_all_roots():

1607

yield record, []

1608

for record, items in self._process_queues():

1609

yield record, items

1377

1610

1378

1611

1379

1612

def iter_interesting_nodes(store, interesting_root_keys,

1390

1623

:return: Yield

1391

1624

(interesting record, {interesting key:values})

1392

1625

"""

1393

# TODO: consider that it may be more memory efficient to use the 20-byte

1394

# sha1 string, rather than tuples of hexidecimal sha1 strings.

1395

# TODO: Try to factor out a lot of the get_record_stream() calls into a

1396

# helper function similar to _read_bytes. This function should be

1397

# able to use nodes from the _page_cache as well as actually

1398

# requesting bytes from the store.

1399

1400

(all_uninteresting_chks, all_uninteresting_items, interesting_keys,

1401

interesting_to_yield, interesting_items) = _find_all_uninteresting(store,

1402

interesting_root_keys, uninteresting_root_keys, pb)

1403

1404

# Now that we know everything uninteresting, we can yield information from

1405

# our first request

1406

interesting_items.difference_update(all_uninteresting_items)

1407

interesting_to_yield = set(interesting_to_yield) - all_uninteresting_chks

1408

if interesting_items:

1409

yield None, interesting_items

1410

if interesting_to_yield:

1411

# We request these records again, rather than buffering the root

1412

# records, most likely they are still in the _group_cache anyway.

1413

for record in store.get_record_stream(interesting_to_yield,

1414

'unordered', False):

1415

yield record, []

1416

all_uninteresting_chks.update(interesting_to_yield)

1417

interesting_keys.difference_update(all_uninteresting_chks)

1418

1419

chks_to_read = interesting_keys

1420

counter = 0

1421

while chks_to_read:

1422

next_chks = set()

1423

for record in store.get_record_stream(chks_to_read, 'unordered', False):

1424

counter += 1

1425

if pb is not None:

1426

pb.update('find chk pages', counter)

1427

# TODO: Handle 'absent'?

1428

bytes = record.get_bytes_as('fulltext')

1429

# We don't care about search_key_func for this code, because we

1430

# only care about external references.

1431

node = _deserialise(bytes, record.key, search_key_func=None)

1432

if type(node) is InternalNode:

1433

# all_uninteresting_chks grows large, as it lists all nodes we

1434

# don't want to process (including already seen interesting

1435

# nodes).

1436

# small.difference_update(large) scales O(large), but

1437

# small.difference(large) scales O(small).

1438

# Also, we know we just _deserialised this node, so we can

1439

# access the dict directly.

1440

chks = set(node._items.itervalues()).difference(

1441

all_uninteresting_chks)

1442

# Is set() and .difference_update better than:

1443

# chks = [chk for chk in node.refs()

1444

# if chk not in all_uninteresting_chks]

1445

next_chks.update(chks)

1446

# These are now uninteresting everywhere else

1447

all_uninteresting_chks.update(chks)

1448

interesting_items = []

1449

else:

1450

interesting_items = [item for item in node._items.iteritems()

1451

if item not in all_uninteresting_items]

1452

# TODO: Do we need to filter out items that we have already

1453

# seen on other pages? We don't really want to buffer the

1454

# whole thing, but it does mean that callers need to

1455

# understand they may get duplicate values.

1456

# all_uninteresting_items.update(interesting_items)

1457

yield record, interesting_items

1458

chks_to_read = next_chks

1626

iterator = CHKMapDifference(store, interesting_root_keys,

1627

uninteresting_root_keys,

1628

search_key_func=store._search_key_func,

1629

pb=pb)

1630

return iterator.process()

1459

1631

1460

1632

1461

1633

try:

Older »