/brz/remove-bazaar : revision 2625.11.3

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/index.py

Committer: Martin Pool
Date: 2007-09-14 06:31:28 UTC
mfrom: (2822 +trunk)
mto: This revision was merged to the branch mainline in revision 2823.
Revision ID: mbp@sourcefrog.net-20070914063128-0p7mh6zfb4pzdg9p

merge trunk

files added:
bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_patiencediff_c.c

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_pack.py

bzrlib/bundle/serializer/v4.py

bzrlib/email_message.py

bzrlib/mail_client.py

bzrlib/multiparent.py

bzrlib/patiencediff.py

bzrlib/plugins/multiparent.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/transport_util.py

bzrlib/transport/unlistable.py

bzrlib/util/simplemapi.py

doc/developers/bundle-format4.txt

doc/developers/directory-fingerprints.txt

doc/developers/last-modified.txt

doc/developers/missing.txt

doc/developers/revision-properties.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

tools/win32/survey.txt

files removed:
bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

doc/README.1st

doc/developers/scratch.txt

files renamed:
bzrlib/patiencediff.py => bzrlib/_patiencediff_py.py

bzrlib/tests/blackbox/test_bundle.py => bzrlib/tests/blackbox/test_send.py

doc/developers/HACKING => doc/developers/HACKING.txt

doc/bug_trackers.txt => doc/en/user-guide/bug_trackers.txt

doc/centralized_workflow.txt => doc/en/user-guide/centralized_workflow.txt

doc/configuration.txt => doc/en/user-guide/configuration.txt

doc/http_smart_server.txt => doc/en/user-guide/http_smart_server.txt

doc/index.txt => doc/en/user-guide/index.txt

doc/plugins.txt => doc/en/user-guide/plugins.txt

doc/server.txt => doc/en/user-guide/server.txt

doc/setting_up_email.txt => doc/en/user-guide/setting_up_email.txt

doc/shared_repository_layouts.txt => doc/en/user-guide/shared_repository_layouts.txt

doc/specifying_revisions.txt => doc/en/user-guide/specifying_revisions.txt

doc/tutorial.txt => doc/en/user-guide/tutorial.txt

doc/using_aliases.txt => doc/en/user-guide/using_aliases.txt

doc/version_info.txt => doc/en/user-guide/version_info.txt

files modified:
.bzrignore

INSTALL

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/annotate.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/smart/client.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/server.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_http.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers/bundles.txt

doc/developers/index.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap.txt

doc/developers/performance.dot

doc/developers/profiling.txt

doc/developers/repository.txt

setup.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/index.py

'CombinedGraphIndex',

'GraphIndex',

'GraphIndexBuilder',

'GraphIndexPrefixAdapter',

'InMemoryGraphIndex',

]

from cStringIO import StringIO

import re

from bzrlib import errors

from bzrlib.lazy_import import lazy_import

lazy_import(globals(), """

from bzrlib import trace

from bzrlib.trace import mutter

""")

from bzrlib import debug, errors

_OPTION_KEY_ELEMENTS = "key_elements="

_OPTION_LEN = "len="

_OPTION_NODE_REFS = "node_ref_lists="

_SIGNATURE = "Bazaar Graph Index 1\n"

VALUE := no-newline-no-null-bytes

"""

def __init__(self, reference_lists=0):

def __init__(self, reference_lists=0, key_elements=1):

"""Create a GraphIndex builder.

:param reference_lists: The number of node references lists for each

entry.

:param key_elements: The number of bytestrings in each key.

"""

self.reference_lists = reference_lists

self._keys = set()

self._nodes = {}

self._nodes_by_key = {}

self._key_length = key_elements

def _check_key(self, key):

"""Raise BadIndexKey if key is not a valid key for this index."""

if type(key) != tuple:

raise errors.BadIndexKey(key)

if self._key_length != len(key):

raise errors.BadIndexKey(key)

for element in key:

if not element or _whitespace_re.search(element) is not None:

raise errors.BadIndexKey(element)

def add_node(self, key, value, references=()):

"""Add a node to the index.

:param key: The key. keys must be whitespace-free utf8.

:param key: The key. keys are non-empty tuples containing

as many whitespace-free utf8 bytestrings as the key length

defined for this index.

:param references: An iterable of iterables of keys. Each is a

reference to another key.

:param value: The value to associate with the key. It may be any

bytes as long as it does not contain \0 or \n.

"""

if not key or _whitespace_re.search(key) is not None:

raise errors.BadIndexKey(key)

100

self._check_key(key)

101

if _newline_null_re.search(value) is not None:

102

raise errors.BadIndexValue(value)

103

if len(references) != self.reference_lists:

105

node_refs = []

106

for reference_list in references:

107

for reference in reference_list:

if _whitespace_re.search(reference) is not None:

raise errors.BadIndexKey(reference)

108

self._check_key(reference)

109

if reference not in self._nodes:

110

self._nodes[reference] = ('a', (), '')

111

node_refs.append(tuple(reference_list))

112

if key in self._nodes and self._nodes[key][0] == '':

113

raise errors.BadIndexDuplicateKey(key, self)

114

self._nodes[key] = ('', tuple(node_refs), value)

115

self._keys.add(key)

116

if self._key_length > 1:

117

key_dict = self._nodes_by_key

118

if self.reference_lists:

119

key_value = key, value, tuple(node_refs)

120

else:

121

key_value = key, value

122

# possibly should do this on-demand, but it seems likely it is

123

# always wanted

124

# For a key of (foo, bar, baz) create

125

# _nodes_by_key[foo][bar][baz] = key_value

126

for subkey in key[:-1]:

127

key_dict = key_dict.setdefault(subkey, {})

128

key_dict[key[-1]] = key_value

129

130

def finish(self):

131

lines = [_SIGNATURE]

132

lines.append(_OPTION_NODE_REFS + str(self.reference_lists) + '\n')

prefix_length = len(lines[0]) + len(lines[1])

133

lines.append(_OPTION_KEY_ELEMENTS + str(self._key_length) + '\n')

134

lines.append(_OPTION_LEN + str(len(self._keys)) + '\n')

135

prefix_length = sum(len(x) for x in lines)

136

# references are byte offsets. To avoid having to do nasty

137

# polynomial work to resolve offsets (references to later in the

100

138

# file cannot be determined until all the inbetween references have

125

163

# date - saves reaccumulating on the second pass

126

164

key_offset_info.append((key, non_ref_bytes, total_references))

127

165

# key is literal, value is literal, there are 3 null's, 1 NL

128

non_ref_bytes += len(key) + len(value) + 3 + 1

166

# key is variable length tuple, \x00 between elements

167

non_ref_bytes += sum(len(element) for element in key)

168

if self._key_length > 1:

169

non_ref_bytes += self._key_length - 1

170

# value is literal bytes, there are 3 null's, 1 NL.

171

non_ref_bytes += len(value) + 3 + 1

129

172

# one byte for absent if set.

130

173

if absent:

131

174

non_ref_bytes += 1

159

202

for reference in ref_list:

160

203

ref_addresses.append(format_string % key_addresses[reference])

161

204

flattened_references.append('\r'.join(ref_addresses))

162

lines.append("%s\0%s\0%s\0%s\n" % (key, absent,

205

string_key = '\x00'.join(key)

206

lines.append("%s\x00%s\x00%s\x00%s\n" % (string_key, absent,

163

207

'\t'.join(flattened_references), value))

164

208

lines.append('\n')

165

209

result = StringIO(''.join(lines))

177

221

Each node has the same number of key reference lists. Each key reference

178

222

list can be empty or an arbitrary length. The value is an opaque NULL

179

223

terminated string without any newlines. The storage of the index is

180

hidden in the interface: keys and key references are always bytestrings,

181

never the internal representation (e.g. dictionary offsets).

224

hidden in the interface: keys and key references are always tuples of

225

bytestrings, never the internal representation (e.g. dictionary offsets).

182

226

183

227

It is presumed that the index will not be mutated - it is static data.

184

228

196

240

"""

197

241

self._transport = transport

198

242

self._name = name

199

200

def iter_all_entries(self):

201

"""Iterate over all keys within the index.

202

203

:return: An iterable of (key, value) or (key, value, reference_lists).

204

The former tuple is used when there are no reference lists in the

205

index, making the API compatible with simple key:value index types.

206

There is no defined order for the result iteration - it will be in

207

the most efficient order for the index.

243

self._nodes = None

244

self._key_count = None

245

self._keys_by_offset = None

246

self._nodes_by_key = None

247

248

def _buffer_all(self):

249

"""Buffer all the index data.

250

251

Mutates self._nodes and self.keys_by_offset.

208

252

"""

253

if 'index' in debug.debug_flags:

254

mutter('Reading entire index %s', self._transport.abspath(self._name))

209

255

stream = self._transport.get(self._name)

210

256

self._read_prefix(stream)

257

expected_elements = 3 + self._key_length

211

258

line_count = 0

212

self.keys_by_offset = {}

259

# raw data keyed by offset

260

self._keys_by_offset = {}

261

# ready-to-return key:value or key:value, node_ref_lists

262

self._nodes = {}

263

self._nodes_by_key = {}

213

264

trailers = 0

214

265

pos = stream.tell()

215

266

for line in stream.readlines():

216

267

if line == '\n':

217

268

trailers += 1

218

269

continue

219

key, absent, references, value = line.split('\0')

270

elements = line.split('\0')

271

if len(elements) != expected_elements:

272

raise errors.BadIndexData(self)

273

# keys are tuples

274

key = tuple(elements[:self._key_length])

275

absent, references, value = elements[-3:]

220

276

value = value[:-1] # remove the newline

221

277

ref_lists = []

222

278

for ref_string in references.split('\t'):

224

280

int(ref) for ref in ref_string.split('\r') if ref

225

281

]))

226

282

ref_lists = tuple(ref_lists)

227

self.keys_by_offset[pos] = (key, absent, ref_lists, value)

283

self._keys_by_offset[pos] = (key, absent, ref_lists, value)

228

284

pos += len(line)

229

for key, absent, references, value in self.keys_by_offset.itervalues():

285

for key, absent, references, value in self._keys_by_offset.itervalues():

230

286

if absent:

231

287

continue

232

288

# resolve references:

233

289

if self.node_ref_lists:

234

290

node_refs = []

235

291

for ref_list in references:

236

node_refs.append(tuple([self.keys_by_offset[ref][0] for ref in ref_list]))

237

yield (key, value, tuple(node_refs))

292

node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))

293

node_value = (value, tuple(node_refs))

238

294

else:

239

yield (key, value)

295

node_value = value

296

self._nodes[key] = node_value

297

if self._key_length > 1:

298

subkey = list(reversed(key[:-1]))

299

key_dict = self._nodes_by_key

300

if self.node_ref_lists:

301

key_value = key, node_value[0], node_value[1]

302

else:

303

key_value = key, node_value

304

# possibly should do this on-demand, but it seems likely it is

305

# always wanted

306

# For a key of (foo, bar, baz) create

307

# _nodes_by_key[foo][bar][baz] = key_value

308

for subkey in key[:-1]:

309

key_dict = key_dict.setdefault(subkey, {})

310

key_dict[key[-1]] = key_value

311

# cache the keys for quick set intersections

312

self._keys = set(self._nodes)

240

313

if trailers != 1:

241

314

# there must be one line - the empty trailer line.

242

315

raise errors.BadIndexData(self)

243

316

317

def iter_all_entries(self):

318

"""Iterate over all keys within the index.

319

320

:return: An iterable of (key, value) or (key, value, reference_lists).

321

The former tuple is used when there are no reference lists in the

322

index, making the API compatible with simple key:value index types.

323

There is no defined order for the result iteration - it will be in

324

the most efficient order for the index.

325

"""

326

if 'evil' in debug.debug_flags:

327

trace.mutter_callsite(2,

328

"iter_all_entries scales with size of history.")

329

if self._nodes is None:

330

self._buffer_all()

331

if self.node_ref_lists:

332

for key, (value, node_ref_lists) in self._nodes.iteritems():

333

yield self, key, value, node_ref_lists

334

else:

335

for key, value in self._nodes.iteritems():

336

yield self, key, value

337

244

338

def _read_prefix(self, stream):

245

339

signature = stream.read(len(self._signature()))

246

340

if not signature == self._signature():

252

346

self.node_ref_lists = int(options_line[len(_OPTION_NODE_REFS):-1])

253

347

except ValueError:

254

348

raise errors.BadIndexOptions(self)

349

options_line = stream.readline()

350

if not options_line.startswith(_OPTION_KEY_ELEMENTS):

351

raise errors.BadIndexOptions(self)

352

try:

353

self._key_length = int(options_line[len(_OPTION_KEY_ELEMENTS):-1])

354

except ValueError:

355

raise errors.BadIndexOptions(self)

356

options_line = stream.readline()

357

if not options_line.startswith(_OPTION_LEN):

358

raise errors.BadIndexOptions(self)

359

try:

360

self._key_count = int(options_line[len(_OPTION_LEN):-1])

361

except ValueError:

362

raise errors.BadIndexOptions(self)

255

363

256

364

def iter_entries(self, keys):

257

365

"""Iterate over keys within the index.

264

372

keys = set(keys)

265

373

if not keys:

266

374

return

267

for node in self.iter_all_entries():

268

if not keys:

269

return

270

if node[0] in keys:

271

yield node

272

keys.remove(node[0])

375

if self._nodes is None:

376

self._buffer_all()

377

keys = keys.intersection(self._keys)

378

if self.node_ref_lists:

379

for key in keys:

380

value, node_refs = self._nodes[key]

381

yield self, key, value, node_refs

382

else:

383

for key in keys:

384

yield self, key, self._nodes[key]

385

386

def iter_entries_prefix(self, keys):

387

"""Iterate over keys within the index using prefix matching.

388

389

Prefix matching is applied within the tuple of a key, not to within

390

the bytestring of each key element. e.g. if you have the keys ('foo',

391

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

392

only the former key is returned.

393

394

:param keys: An iterable providing the key prefixes to be retrieved.

395

Each key prefix takes the form of a tuple the length of a key, but

396

with the last N elements 'None' rather than a regular bytestring.

397

The first element cannot be 'None'.

398

:return: An iterable as per iter_all_entries, but restricted to the

399

keys with a matching prefix to those supplied. No additional keys

400

will be returned, and every match that is in the index will be

401

returned.

402

"""

403

keys = set(keys)

404

if not keys:

405

return

406

# load data - also finds key lengths

407

if self._nodes is None:

408

self._buffer_all()

409

if self._key_length == 1:

410

for key in keys:

411

# sanity check

412

if key[0] is None:

413

raise errors.BadIndexKey(key)

414

if len(key) != self._key_length:

415

raise errors.BadIndexKey(key)

416

if self.node_ref_lists:

417

value, node_refs = self._nodes[key]

418

yield self, key, value, node_refs

419

else:

420

yield self, key, self._nodes[key]

421

return

422

for key in keys:

423

# sanity check

424

if key[0] is None:

425

raise errors.BadIndexKey(key)

426

if len(key) != self._key_length:

427

raise errors.BadIndexKey(key)

428

# find what it refers to:

429

key_dict = self._nodes_by_key

430

elements = list(key)

431

# find the subdict whose contents should be returned.

432

try:

433

while len(elements) and elements[0] is not None:

434

key_dict = key_dict[elements[0]]

435

elements.pop(0)

436

except KeyError:

437

# a non-existant lookup.

438

continue

439

if len(elements):

440

dicts = [key_dict]

441

while dicts:

442

key_dict = dicts.pop(-1)

443

# can't be empty or would not exist

444

item, value = key_dict.iteritems().next()

445

if type(value) == dict:

446

# push keys

447

dicts.extend(key_dict.itervalues())

448

else:

449

# yield keys

450

for value in key_dict.itervalues():

451

# each value is the key:value:node refs tuple

452

# ready to yield.

453

yield (self, ) + value

454

else:

455

# the last thing looked up was a terminal element

456

yield (self, ) + key_dict

457

458

def key_count(self):

459

"""Return an estimate of the number of keys in this index.

460

461

For GraphIndex the estimate is exact.

462

"""

463

if self._key_count is None:

464

# really this should just read the prefix

465

self._buffer_all()

466

return self._key_count

273

467

274

468

def _signature(self):

275

469

"""The file signature for this index type."""

323

517

seen_keys = set()

324

518

for index in self._indices:

325

519

for node in index.iter_all_entries():

326

if node[0] not in seen_keys:

520

if node[1] not in seen_keys:

327

521

yield node

328

seen_keys.add(node[0])

522

seen_keys.add(node[1])

329

523

330

524

def iter_entries(self, keys):

331

525

"""Iterate over keys within the index.

343

537

if not keys:

344

538

return

345

539

for node in index.iter_entries(keys):

346

keys.remove(node[0])

347

yield node

540

keys.remove(node[1])

541

yield node

542

543

def iter_entries_prefix(self, keys):

544

"""Iterate over keys within the index using prefix matching.

545

546

Duplicate keys across child indices are presumed to have the same

547

value and are only reported once.

548

549

Prefix matching is applied within the tuple of a key, not to within

550

the bytestring of each key element. e.g. if you have the keys ('foo',

551

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

552

only the former key is returned.

553

554

:param keys: An iterable providing the key prefixes to be retrieved.

555

Each key prefix takes the form of a tuple the length of a key, but

556

with the last N elements 'None' rather than a regular bytestring.

557

The first element cannot be 'None'.

558

:return: An iterable as per iter_all_entries, but restricted to the

559

keys with a matching prefix to those supplied. No additional keys

560

will be returned, and every match that is in the index will be

561

returned.

562

"""

563

keys = set(keys)

564

if not keys:

565

return

566

seen_keys = set()

567

for index in self._indices:

568

for node in index.iter_entries_prefix(keys):

569

if node[1] in seen_keys:

570

continue

571

seen_keys.add(node[1])

572

yield node

573

574

def key_count(self):

575

"""Return an estimate of the number of keys in this index.

576

577

For CombinedGraphIndex this is approximated by the sum of the keys of

578

the child indices. As child indices may have duplicate keys this can

579

have a maximum error of the number of child indices * largest number of

580

keys in any index.

581

"""

582

return sum((index.key_count() for index in self._indices), 0)

348

583

349

584

def validate(self):

350

585

"""Validate that everything in the index can be accessed."""

365

600

366

601

:param nodes: An iterable of (key, node_refs, value) entries to add.

367

602

"""

368

for (key, value, node_refs) in nodes:

369

self.add_node(key, value, node_refs)

603

if self.reference_lists:

604

for (key, value, node_refs) in nodes:

605

self.add_node(key, value, node_refs)

606

else:

607

for (key, value) in nodes:

608

self.add_node(key, value)

370

609

371

610

def iter_all_entries(self):

372

611

"""Iterate over all keys within the index

375

614

defined order for the result iteration - it will be in the most

376

615

efficient order for the index (in this case dictionary hash order).

377

616

"""

617

if 'evil' in debug.debug_flags:

618

trace.mutter_callsite(2,

619

"iter_all_entries scales with size of history.")

378

620

if self.reference_lists:

379

621

for key, (absent, references, value) in self._nodes.iteritems():

380

622

if not absent:

381

yield key, value, references

623

yield self, key, value, references

382

624

else:

383

625

for key, (absent, references, value) in self._nodes.iteritems():

384

626

if not absent:

385

yield key, value

627

yield self, key, value

386

628

387

629

def iter_entries(self, keys):

388

630

"""Iterate over keys within the index.

394

636

"""

395

637

keys = set(keys)

396

638

if self.reference_lists:

397

for key in keys.intersection(self._nodes):

639

for key in keys.intersection(self._keys):

398

640

node = self._nodes[key]

399

641

if not node[0]:

400

yield key, node[2], node[1]

642

yield self, key, node[2], node[1]

401

643

else:

402

for key in keys.intersection(self._nodes):

644

for key in keys.intersection(self._keys):

403

645

node = self._nodes[key]

404

646

if not node[0]:

405

yield key, node[2]

647

yield self, key, node[2]

648

649

def iter_entries_prefix(self, keys):

650

"""Iterate over keys within the index using prefix matching.

651

652

Prefix matching is applied within the tuple of a key, not to within

653

the bytestring of each key element. e.g. if you have the keys ('foo',

654

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

655

only the former key is returned.

656

657

:param keys: An iterable providing the key prefixes to be retrieved.

658

Each key prefix takes the form of a tuple the length of a key, but

659

with the last N elements 'None' rather than a regular bytestring.

660

The first element cannot be 'None'.

661

:return: An iterable as per iter_all_entries, but restricted to the

662

keys with a matching prefix to those supplied. No additional keys

663

will be returned, and every match that is in the index will be

664

returned.

665

"""

666

# XXX: To much duplication with the GraphIndex class; consider finding

667

# a good place to pull out the actual common logic.

668

keys = set(keys)

669

if not keys:

670

return

671

if self._key_length == 1:

672

for key in keys:

673

# sanity check

674

if key[0] is None:

675

raise errors.BadIndexKey(key)

676

if len(key) != self._key_length:

677

raise errors.BadIndexKey(key)

678

node = self._nodes[key]

679

if node[0]:

680

continue

681

if self.reference_lists:

682

yield self, key, node[2], node[1]

683

else:

684

yield self, key, node[2]

685

return

686

for key in keys:

687

# sanity check

688

if key[0] is None:

689

raise errors.BadIndexKey(key)

690

if len(key) != self._key_length:

691

raise errors.BadIndexKey(key)

692

# find what it refers to:

693

key_dict = self._nodes_by_key

694

elements = list(key)

695

# find the subdict to return

696

try:

697

while len(elements) and elements[0] is not None:

698

key_dict = key_dict[elements[0]]

699

elements.pop(0)

700

except KeyError:

701

# a non-existant lookup.

702

continue

703

if len(elements):

704

dicts = [key_dict]

705

while dicts:

706

key_dict = dicts.pop(-1)

707

# can't be empty or would not exist

708

item, value = key_dict.iteritems().next()

709

if type(value) == dict:

710

# push keys

711

dicts.extend(key_dict.itervalues())

712

else:

713

# yield keys

714

for value in key_dict.itervalues():

715

yield (self, ) + value

716

else:

717

yield (self, ) + key_dict

718

719

def key_count(self):

720

"""Return an estimate of the number of keys in this index.

721

722

For InMemoryGraphIndex the estimate is exact.

723

"""

724

return len(self._keys)

406

725

407

726

def validate(self):

408

727

"""In memory index's have no known corruption at the moment."""

728

729

730

class GraphIndexPrefixAdapter(object):

731

"""An adapter between GraphIndex with different key lengths.

732

733

Queries against this will emit queries against the adapted Graph with the

734

prefix added, queries for all items use iter_entries_prefix. The returned

735

nodes will have their keys and node references adjusted to remove the

736

prefix. Finally, an add_nodes_callback can be supplied - when called the

737

nodes and references being added will have prefix prepended.

738

"""

739

740

def __init__(self, adapted, prefix, missing_key_length,

741

add_nodes_callback=None):

742

"""Construct an adapter against adapted with prefix."""

743

self.adapted = adapted

744

self.prefix_key = prefix + (None,)*missing_key_length

745

self.prefix = prefix

746

self.prefix_len = len(prefix)

747

self.add_nodes_callback = add_nodes_callback

748

749

def add_nodes(self, nodes):

750

"""Add nodes to the index.

751

752

:param nodes: An iterable of (key, node_refs, value) entries to add.

753

"""

754

# save nodes in case its an iterator

755

nodes = tuple(nodes)

756

translated_nodes = []

757

try:

758

# Add prefix_key to each reference node_refs is a tuple of tuples,

759

# so split it apart, and add prefix_key to the internal reference

760

for (key, value, node_refs) in nodes:

761

adjusted_references = (

762

tuple(tuple(self.prefix + ref_node for ref_node in ref_list)

763

for ref_list in node_refs))

764

translated_nodes.append((self.prefix + key, value,

765

adjusted_references))

766

except ValueError:

767

# XXX: TODO add an explicit interface for getting the reference list

768

# status, to handle this bit of user-friendliness in the API more

769

# explicitly.

770

for (key, value) in nodes:

771

translated_nodes.append((self.prefix + key, value))

772

self.add_nodes_callback(translated_nodes)

773

774

def add_node(self, key, value, references=()):

775

"""Add a node to the index.

776

777

:param key: The key. keys are non-empty tuples containing

778

as many whitespace-free utf8 bytestrings as the key length

779

defined for this index.

780

:param references: An iterable of iterables of keys. Each is a

781

reference to another key.

782

:param value: The value to associate with the key. It may be any

783

bytes as long as it does not contain \0 or \n.

784

"""

785

self.add_nodes(((key, value, references), ))

786

787

def _strip_prefix(self, an_iter):

788

"""Strip prefix data from nodes and return it."""

789

for node in an_iter:

790

# cross checks

791

if node[1][:self.prefix_len] != self.prefix:

792

raise errors.BadIndexData(self)

793

for ref_list in node[3]:

794

for ref_node in ref_list:

795

if ref_node[:self.prefix_len] != self.prefix:

796

raise errors.BadIndexData(self)

797

yield node[0], node[1][self.prefix_len:], node[2], (

798

tuple(tuple(ref_node[self.prefix_len:] for ref_node in ref_list)

799

for ref_list in node[3]))

800

801

def iter_all_entries(self):

802

"""Iterate over all keys within the index

803

804

iter_all_entries is implemented against the adapted index using

805

iter_entries_prefix.

806

807

:return: An iterable of (key, reference_lists, value). There is no

808

defined order for the result iteration - it will be in the most

809

efficient order for the index (in this case dictionary hash order).

810

"""

811

return self._strip_prefix(self.adapted.iter_entries_prefix([self.prefix_key]))

812

813

def iter_entries(self, keys):

814

"""Iterate over keys within the index.

815

816

:param keys: An iterable providing the keys to be retrieved.

817

:return: An iterable of (key, reference_lists, value). There is no

818

defined order for the result iteration - it will be in the most

819

efficient order for the index (keys iteration order in this case).

820

"""

821

return self._strip_prefix(self.adapted.iter_entries(

822

self.prefix + key for key in keys))

823

824

def iter_entries_prefix(self, keys):

825

"""Iterate over keys within the index using prefix matching.

826

827

Prefix matching is applied within the tuple of a key, not to within

828

the bytestring of each key element. e.g. if you have the keys ('foo',

829

'bar'), ('foobar', 'gam') and do a prefix search for ('foo', None) then

830

only the former key is returned.

831

832

:param keys: An iterable providing the key prefixes to be retrieved.

833

Each key prefix takes the form of a tuple the length of a key, but

834

with the last N elements 'None' rather than a regular bytestring.

835

The first element cannot be 'None'.

836

:return: An iterable as per iter_all_entries, but restricted to the

837

keys with a matching prefix to those supplied. No additional keys

838

will be returned, and every match that is in the index will be

839

returned.

840

"""

841

return self._strip_prefix(self.adapted.iter_entries_prefix(

842

self.prefix + key for key in keys))

843

844

def key_count(self):

845

"""Return an estimate of the number of keys in this index.

846

847

For GraphIndexPrefixAdapter this is relatively expensive - key

848

iteration with the prefix is done.

849

"""

850

return len(list(self.iter_all_entries()))

851

852

def validate(self):

853

"""Call the adapted's validate."""

854

self.adapted.validate()

Older »