/brz/remove-bazaar : revision 2625.11.3

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

Committer: Martin Pool
Date: 2007-09-14 06:31:28 UTC
mfrom: (2822 +trunk)
mto: This revision was merged to the branch mainline in revision 2823.
Revision ID: mbp@sourcefrog.net-20070914063128-0p7mh6zfb4pzdg9p

merge trunk

files added:
bzrlib/_dirstate_helpers_c.h

bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/_patiencediff_c.c

bzrlib/benchmarks/bench_dirstate.py

bzrlib/benchmarks/bench_pack.py

bzrlib/bundle/serializer/v4.py

bzrlib/email_message.py

bzrlib/mail_client.py

bzrlib/multiparent.py

bzrlib/patiencediff.py

bzrlib/plugins/multiparent.py

bzrlib/tests/blackbox/test_bundle_info.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/commands/test_update.py

bzrlib/tests/inventory_implementations

bzrlib/tests/inventory_implementations/__init__.py

bzrlib/tests/inventory_implementations/basics.py

bzrlib/tests/repository_implementations/test_fetch.py

bzrlib/tests/repository_implementations/test_has_same_location.py

bzrlib/tests/repository_implementations/test_write_group.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_mail_client.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/test_win32utils.py

bzrlib/tests/transport_util.py

bzrlib/transport/unlistable.py

bzrlib/util/simplemapi.py

doc/developers/bundle-format4.txt

doc/developers/directory-fingerprints.txt

doc/developers/last-modified.txt

doc/developers/missing.txt

doc/developers/revision-properties.txt

doc/developers/update.txt

doc/en

doc/en/developer-guide

doc/en/mini-tutorial

doc/en/mini-tutorial/index.txt

doc/en/quick-reference

doc/en/quick-reference/Makefile

doc/en/quick-reference/quick-start-summary.svg

doc/en/release-notes

doc/en/user-guide

doc/en/user-guide/conflicts.txt

doc/en/user-guide/hooks.txt

doc/en/user-reference

doc/en/user-reference/hooks.txt

doc/en/user-reference/index.txt

doc/index.txt

tools/win32/survey.txt

files removed:
bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

doc/README.1st

doc/developers/scratch.txt

files renamed:
bzrlib/patiencediff.py => bzrlib/_patiencediff_py.py

bzrlib/tests/blackbox/test_bundle.py => bzrlib/tests/blackbox/test_send.py

doc/developers/HACKING => doc/developers/HACKING.txt

doc/bug_trackers.txt => doc/en/user-guide/bug_trackers.txt

doc/centralized_workflow.txt => doc/en/user-guide/centralized_workflow.txt

doc/configuration.txt => doc/en/user-guide/configuration.txt

doc/http_smart_server.txt => doc/en/user-guide/http_smart_server.txt

doc/index.txt => doc/en/user-guide/index.txt

doc/plugins.txt => doc/en/user-guide/plugins.txt

doc/server.txt => doc/en/user-guide/server.txt

doc/setting_up_email.txt => doc/en/user-guide/setting_up_email.txt

doc/shared_repository_layouts.txt => doc/en/user-guide/shared_repository_layouts.txt

doc/specifying_revisions.txt => doc/en/user-guide/specifying_revisions.txt

doc/tutorial.txt => doc/en/user-guide/tutorial.txt

doc/using_aliases.txt => doc/en/user-guide/using_aliases.txt

doc/version_info.txt => doc/en/user-guide/version_info.txt

files modified:
.bzrignore

INSTALL

Makefile

NEWS

README

bzrlib/__init__.py

bzrlib/annotate.py

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/delta.py

bzrlib/deprecated_graph.py

bzrlib/diff.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/help.py

bzrlib/help_topics.py

bzrlib/index.py

bzrlib/info.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/msgeditor.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/pack.py

bzrlib/patches.py

bzrlib/plugin.py

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/smart/client.py

bzrlib/smart/protocol.py

bzrlib/smart/repository.py

bzrlib/smart/server.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/store/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/tag.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_commit.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_revision_history.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_help.py

bzrlib/tests/test_http.py

bzrlib/tests/test_index.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_lsprof.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_pack.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_tag.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/test_inv.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_inv.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/wsgi.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml6.py

bzrlib/xml_serializer.py

doc/bazaar-vcs.org.kid

doc/default.css

doc/developers/bundles.txt

doc/developers/index.txt

doc/developers/performance-contributing.txt

doc/developers/performance-roadmap.txt

doc/developers/performance.dot

doc/developers/profiling.txt

doc/developers/repository.txt

setup.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/rst2html.py

tools/rst2prettyhtml.py

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/dirstate.py

dirstate format = header line, full checksum, row count, parent details,

ghost_details, entries;

header line = "#bazaar dirstate flat format 2", NL;

header line = "#bazaar dirstate flat format 3", NL;

full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;

row count = "num_entries: ", digit, NL;

row count = "num_entries: ", WHOLE_NUMBER, NL;

parent_details = WHOLE NUMBER, {REVISION_ID}* NL;

ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;

entries = {entry};

118

where we need id->path mapping; we also usually read the whole file, so

119

I'm going to skip that for the moment, as we have the ability to locate

120

via bisect any path in any tree, and if we lookup things by path, we can

121

accumulate a id->path mapping as we go, which will tend to match what we

121

accumulate an id->path mapping as we go, which will tend to match what we

122

looked for.

123

124

I plan to implement this asap, so please speak up now to alter/tweak the

143

Locking:

144

Eventually reuse dirstate objects across locks IFF the dirstate file has not

145

been modified, but will require that we flush/ignore cached stat-hit data

146

because we wont want to restat all files on disk just because a lock was

146

because we won't want to restat all files on disk just because a lock was

147

acquired, yet we cannot trust the data after the previous lock was released.

148

149

Memory representation:

162

manageable number. Will scale badly on trees with 10K entries in a

163

single directory. compare with Inventory.InventoryDirectory which has

164

a dictionary for the children. No bisect capability, can only probe for

165

exact matches, or grab all elements and sorta.

166

- Whats the risk of error here? Once we have the base format being processed

165

exact matches, or grab all elements and sort.

166

- What's the risk of error here? Once we have the base format being processed

167

we should have a net win regardless of optimality. So we are going to

168

go with what seems reasonably.

168

go with what seems reasonable.

169

open questions:

170

171

maybe we should do a test profile of these core structure - 10K simulated searches/lookups/etc?

171

Maybe we should do a test profile of the core structure - 10K simulated

172

searches/lookups/etc?

172

173

174

Objects for each row?

174

175

The lifetime of Dirstate objects is current per lock, but see above for

199

200

201

"""

201

202

203

import bisect

203

204

import binascii

204

import bisect

205

import errno

206

import os

207

from stat import S_IEXEC

220

)

221

222

223

class _Bisector(object):

224

"""This just keeps track of information as we are bisecting."""

225

226

227

223

def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):

228

224

"""Convert stat values into a packed representation."""

229

225

# jam 20060614 it isn't really worth removing more entries if we

230

226

# are going to leave it in packed form.

231

227

# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms

232

# With all entries filesize is 5.9M and read time is mabye 280ms

228

# With all entries, filesize is 5.9M and read time is maybe 280ms

233

229

# well within the noise margin

234

230

235

231

# base64 encoding always adds a final newline, so strip it off

462

458

if self._id_index:

463

459

self._id_index.setdefault(entry_key[2], set()).add(entry_key)

464

460

465

def _bisect(self, dir_name_list):

461

def _bisect(self, paths):

466

462

"""Bisect through the disk structure for specific rows.

467

463

468

:param dir_name_list: A list of (dir, name) pairs.

469

:return: A dict mapping (dir, name) => entry for found entries. Missing

464

:param paths: A list of paths to find

465

:return: A dict mapping path => entries for found entries. Missing

470

466

entries will not be in the map.

467

The list is not sorted, and entries will be populated

468

based on when they were read.

471

469

"""

472

470

self._requires_lock()

473

471

# We need the file pointer to be right after the initial header block

493

491

found = {}

494

492

495

493

# Avoid infinite seeking

496

max_count = 30*len(dir_name_list)

494

max_count = 30*len(paths)

497

495

count = 0

498

496

# pending is a list of places to look.

499

497

# each entry is a tuple of low, high, dir_names

501

499

# high -> the last byte offset (inclusive)

502

500

# dir_names -> The list of (dir, name) pairs that should be found in

503

501

# the [low, high] range

504

pending = [(low, high, dir_name_list)]

502

pending = [(low, high, paths)]

505

503

506

504

page_size = self._bisect_page_size

507

505

560

558

# Find what entries we are looking for, which occur before and

561

559

# after this first record.

562

560

after = start

563

first_dir_name = (first_fields[1], first_fields[2])

564

first_loc = bisect.bisect_left(cur_files, first_dir_name)

561

if first_fields[1]:

562

first_path = first_fields[1] + '/' + first_fields[2]

563

else:

564

first_path = first_fields[2]

565

first_loc = _bisect_path_left(cur_files, first_path)

565

566

567

# These exist before the current location

567

568

pre = cur_files[:first_loc]

584

585

else:

585

586

after = mid + len(block)

586

587

last_dir_name = (last_fields[1], last_fields[2])

588

last_loc = bisect.bisect_right(post, last_dir_name)

588

if last_fields[1]:

589

last_path = last_fields[1] + '/' + last_fields[2]

590

else:

591

last_path = last_fields[2]

592

last_loc = _bisect_path_right(post, last_path)

589

593

590

594

middle_files = post[:last_loc]

591

595

post = post[last_loc:]

596

600

# Either we will find them here, or we can mark them as

597

601

# missing.

598

602

599

if middle_files[0] == first_dir_name:

603

if middle_files[0] == first_path:

600

604

# We might need to go before this location

601

pre.append(first_dir_name)

602

if middle_files[-1] == last_dir_name:

603

post.insert(0, last_dir_name)

605

pre.append(first_path)

606

if middle_files[-1] == last_path:

607

post.insert(0, last_path)

604

608

605

609

# Find out what paths we have

606

paths = {first_dir_name:[first_fields]}

607

# last_dir_name might == first_dir_name so we need to be

610

paths = {first_path:[first_fields]}

611

# last_path might == first_path so we need to be

608

612

# careful if we should append rather than overwrite

609

613

if last_entry_num != first_entry_num:

610

paths.setdefault(last_dir_name, []).append(last_fields)

614

paths.setdefault(last_path, []).append(last_fields)

611

615

for num in xrange(first_entry_num+1, last_entry_num):

612

616

# TODO: jam 20070223 We are already splitting here, so

613

617

# shouldn't we just split the whole thing rather

614

618

# than doing the split again in add_one_record?

615

619

fields = entries[num].split('\0')

616

dir_name = (fields[1], fields[2])

617

paths.setdefault(dir_name, []).append(fields)

620

if fields[1]:

621

path = fields[1] + '/' + fields[2]

622

else:

623

path = fields[2]

624

paths.setdefault(path, []).append(fields)

618

625

619

for dir_name in middle_files:

620

for fields in paths.get(dir_name, []):

626

for path in middle_files:

627

for fields in paths.get(path, []):

621

628

# offset by 1 because of the opening '\0'

622

629

# consider changing fields_to_entry to avoid the

623

630

# extra list slice

624

631

entry = fields_to_entry(fields[1:])

625

found.setdefault(dir_name, []).append(entry)

632

found.setdefault(path, []).append(entry)

626

633

627

634

# Now we have split up everything into pre, middle, and post, and

628

635

# we have handled everything that fell in 'middle'.

645

652

_bisect_dirblocks is meant to find the contents of directories, which

646

653

differs from _bisect, which only finds individual entries.

647

654

648

:param dir_list: An sorted list of directory names ['', 'dir', 'foo'].

655

:param dir_list: A sorted list of directory names ['', 'dir', 'foo'].

649

656

:return: A map from dir => entries_for_dir

650

657

"""

651

658

# TODO: jam 20070223 A lot of the bisecting logic could be shared

818

825

819

826

return found

820

827

821

def _bisect_recursive(self, dir_name_list):

828

def _bisect_recursive(self, paths):

822

829

"""Bisect for entries for all paths and their children.

823

830

824

831

This will use bisect to find all records for the supplied paths. It

837

844

# Directories that have been read

838

845

processed_dirs = set()

839

846

# Get the ball rolling with the first bisect for all entries.

840

newly_found = self._bisect(dir_name_list)

847

newly_found = self._bisect(paths)

841

848

842

849

while newly_found:

843

850

# Directories that need to be read

867

874

if dir_name[0] in pending_dirs:

868

875

# This entry will be found in the dir search

869

876

continue

870

# TODO: We need to check if this entry has

871

# already been found. Otherwise we might be

872

# hitting infinite recursion.

873

877

if dir_name not in found_dir_names:

874

paths_to_search.add(dir_name)

878

paths_to_search.add(tree_info[1])

875

879

# Now we have a list of paths to look for directly, and

876

880

# directory blocks that need to be read.

877

881

# newly_found is mixing the keys between (dir, name) and path

1327

1331

be attempted.

1328

1332

:return: A tuple describing where the path is located, or should be

1329

1333

inserted. The tuple contains four fields: the block index, the row

1330

index, anda two booleans are True when the directory is present, and

1331

when the entire path is present. There is no guarantee that either

1334

index, the directory is present (boolean), the entire path is

1335

present (boolean). There is no guarantee that either

1332

1336

coordinate is currently reachable unless the found field for it is

1333

1337

True. For instance, a directory not present in the searched tree

1334

1338

may be returned with a value one greater than the current highest

1356

1360

return block_index, entry_index, True, False

1357

1361

1358

1362

def _get_entry(self, tree_index, fileid_utf8=None, path_utf8=None):

1359

"""Get the dirstate entry for path in tree tree_index

1363

"""Get the dirstate entry for path in tree tree_index.

1360

1364

1361

1365

If either file_id or path is supplied, it is used as the key to lookup.

1362

1366

If both are supplied, the fastest lookup is used, and an error is

1401

1405

continue

1402

1406

# WARNING: DO not change this code to use _get_block_entry_index

1403

1407

# as that function is not suitable: it does not use the key

1404

# to lookup, and thus the wront coordinates are returned.

1408

# to lookup, and thus the wrong coordinates are returned.

1405

1409

block = self._dirblocks[block_index][1]

1406

1410

entry_index, present = self._find_entry_index(key, block)

1407

1411

if present:

1508

1512

return self._id_index

1509

1513

1510

1514

def _get_output_lines(self, lines):

1511

"""format lines for final output.

1515

"""Format lines for final output.

1512

1516

1513

:param lines: A sequece of lines containing the parents list and the

1517

:param lines: A sequence of lines containing the parents list and the

1514

1518

path lines.

1515

1519

"""

1516

1520

output_lines = [DirState.HEADER_FORMAT_3]

1524

1528

return output_lines

1525

1529

1526

1530

def _make_deleted_row(self, fileid_utf8, parents):

1527

"""Return a deleted for for fileid_utf8."""

1531

"""Return a deleted row for fileid_utf8."""

1528

1532

return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT,

1529

1533

''), parents

1530

1534

1550

1554

"""

1551

1555

self._read_header_if_needed()

1552

1556

if self._dirblock_state == DirState.NOT_IN_MEMORY:

1553

# move the _state_file pointer to after the header (in case bisect

1554

# has been called in the mean time)

1555

self._state_file.seek(self._end_of_header)

1556

text = self._state_file.read()

1557

# TODO: check the crc checksums. crc_measured = zlib.crc32(text)

1558

1559

fields = text.split('\0')

1560

# Remove the last blank entry

1561

trailing = fields.pop()

1562

assert trailing == ''

1563

# consider turning fields into a tuple.

1564

1565

# skip the first field which is the trailing null from the header.

1566

cur = 1

1567

# Each line now has an extra '\n' field which is not used

1568

# so we just skip over it

1569

# entry size:

1570

# 3 fields for the key

1571

# + number of fields per tree_data (5) * tree count

1572

# + newline

1573

num_present_parents = self._num_present_parents()

1574

tree_count = 1 + num_present_parents

1575

entry_size = self._fields_per_entry()

1576

expected_field_count = entry_size * self._num_entries

1577

field_count = len(fields)

1578

# this checks our adjustment, and also catches file too short.

1579

assert field_count - cur == expected_field_count, \

1580

'field count incorrect %s != %s, entry_size=%s, '\

1581

'num_entries=%s fields=%r' % (

1582

field_count - cur, expected_field_count, entry_size,

1583

self._num_entries, fields)

1584

1585

if num_present_parents == 1:

1586

# Bind external functions to local names

1587

_int = int

1588

# We access all fields in order, so we can just iterate over

1589

# them. Grab an straight iterator over the fields. (We use an

1590

# iterator because we don't want to do a lot of additions, nor

1591

# do we want to do a lot of slicing)

1592

next = iter(fields).next

1593

# Move the iterator to the current position

1594

for x in xrange(cur):

1595

next()

1596

# The two blocks here are deliberate: the root block and the

1597

# contents-of-root block.

1598

self._dirblocks = [('', []), ('', [])]

1599

current_block = self._dirblocks[0][1]

1600

current_dirname = ''

1601

append_entry = current_block.append

1602

for count in xrange(self._num_entries):

1603

dirname = next()

1604

name = next()

1605

file_id = next()

1606

if dirname != current_dirname:

1607

# new block - different dirname

1608

current_block = []

1609

current_dirname = dirname

1610

self._dirblocks.append((current_dirname, current_block))

1611

append_entry = current_block.append

1612

# we know current_dirname == dirname, so re-use it to avoid

1613

# creating new strings

1614

entry = ((current_dirname, name, file_id),

1615

[(# Current Tree

1616

next(), # minikind

1617

next(), # fingerprint

1618

_int(next()), # size

1619

next() == 'y', # executable

1620

next(), # packed_stat or revision_id

1621

1622

( # Parent 1

1623

next(), # minikind

1624

next(), # fingerprint

1625

_int(next()), # size

1626

next() == 'y', # executable

1627

next(), # packed_stat or revision_id

1628

1629

])

1630

trailing = next()

1631

assert trailing == '\n'

1632

# append the entry to the current block

1633

append_entry(entry)

1634

self._split_root_dirblock_into_contents()

1635

else:

1636

fields_to_entry = self._get_fields_to_entry()

1637

entries = [fields_to_entry(fields[pos:pos+entry_size])

1638

for pos in xrange(cur, field_count, entry_size)]

1639

self._entries_to_current_state(entries)

1640

# To convert from format 2 => format 3

1641

# self._dirblocks = sorted(self._dirblocks,

1642

# key=lambda blk:blk[0].split('/'))

1643

# To convert from format 3 => format 2

1644

# self._dirblocks = sorted(self._dirblocks)

1645

self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

1557

_read_dirblocks(self)

1646

1558

1647

1559

def _read_header(self):

1648

1560

"""This reads in the metadata header, and the parent ids.

1676

1588

self._read_header()

1677

1589

1678

1590

def _read_prelude(self):

1679

"""Read in the prelude header of the dirstate file

1591

"""Read in the prelude header of the dirstate file.

1680

1592

1681

1593

This only reads in the stuff that is not connected to the crc

1682

1594

checksum. The position will be correct to read in the rest of

1699

1611

1700

1612

We reuse the existing file, because that prevents race conditions with

1701

1613

file creation, and use oslocks on it to prevent concurrent modification

1702

and reads - because dirstates incremental data aggretation is not

1614

and reads - because dirstate's incremental data aggregation is not

1703

1615

compatible with reading a modified file, and replacing a file in use by

1704

another process is impossible on windows.

1616

another process is impossible on Windows.

1705

1617

1706

1618

A dirstate in read only mode should be smart enough though to validate

1707

1619

that the file has not changed, and otherwise discard its cache and

1768

1680

"path_id %r is not a plain string" % (new_id,)

1769

1681

self._read_dirblocks_if_needed()

1770

1682

if len(path):

1771

# logic not written

1683

# TODO: logic not written

1772

1684

raise NotImplementedError(self.set_path_id)

1773

1685

# TODO: check new id is unique

1774

1686

entry = self._get_entry(0, path_utf8=path)

1876

1788

# this file id is at a different path in one of the

1877

1789

# other trees, so put absent pointers there

1878

1790

# This is the vertical axis in the matrix, all pointing

1879

# tot he real path.

1791

# to the real path.

1880

1792

by_path[entry_key][tree_index] = ('r', path_utf8, 0, False, '')

1881

1793

# by path consistency: Insert into an existing path record (trivial), or

1882

1794

# add a new one with relocation pointers for the other tree indexes.

1951

1863

new_iterator = new_inv.iter_entries_by_dir()

1952

1864

# we will be modifying the dirstate, so we need a stable iterator. In

1953

1865

# future we might write one, for now we just clone the state into a

1954

# list - which is a shallow copy, so each

1866

# list - which is a shallow copy.

1955

1867

old_iterator = iter(list(self._iter_entries()))

1956

1868

# both must have roots so this is safe:

1957

1869

current_new = new_iterator.next()

1976

1888

current_new_minikind = \

1977

1889

DirState._kind_to_minikind[current_new[1].kind]

1978

1890

if current_new_minikind == 't':

1979

fingerprint = current_new[1].reference_revision

1891

fingerprint = current_new[1].reference_revision or ''

1980

1892

else:

1981

1893

fingerprint = ''

1982

1894

else:

2025

1937

def _make_absent(self, current_old):

2026

1938

"""Mark current_old - an entry - as absent for tree 0.

2027

1939

2028

:return: True if this was the last details entry for they entry key:

1940

:return: True if this was the last details entry for the entry key:

2029

1941

that is, if the underlying block has had the entry removed, thus

2030

1942

shrinking in length.

2031

1943

"""

2032

1944

# build up paths that this id will be left at after the change is made,

2033

1945

# so we can update their cross references in tree 0

2034

1946

all_remaining_keys = set()

2035

# Dont check the working tree, because its going.

1947

# Dont check the working tree, because it's going.

2036

1948

for details in current_old[1][1:]:

2037

1949

if details[0] not in ('a', 'r'): # absent, relocated

2038

1950

all_remaining_keys.add(current_old[0])

2327

2239

self._split_path_cache = {}

2328

2240

2329

2241

def lock_read(self):

2330

"""Acquire a read lock on the dirstate"""

2242

"""Acquire a read lock on the dirstate."""

2331

2243

if self._lock_token is not None:

2332

2244

raise errors.LockContention(self._lock_token)

2333

2245

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2340

2252

self._wipe_state()

2341

2253

2342

2254

def lock_write(self):

2343

"""Acquire a write lock on the dirstate"""

2255

"""Acquire a write lock on the dirstate."""

2344

2256

if self._lock_token is not None:

2345

2257

raise errors.LockContention(self._lock_token)

2346

2258

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2353

2265

self._wipe_state()

2354

2266

2355

2267

def unlock(self):

2356

"""Drop any locks held on the dirstate"""

2268

"""Drop any locks held on the dirstate."""

2357

2269

if self._lock_token is None:

2358

2270

raise errors.LockNotHeld(self)

2359

2271

# TODO: jam 20070301 Rather than wiping completely, if the blocks are

2367

2279

self._split_path_cache = {}

2368

2280

2369

2281

def _requires_lock(self):

2370

"""Checks that a lock is currently held by someone on the dirstate"""

2282

"""Check that a lock is currently held by someone on the dirstate."""

2371

2283

if not self._lock_token:

2372

2284

raise errors.ObjectNotLocked(self)

2373

2285

2374

2286

2375

def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):

2376

"""Return the index where to insert dirname into the dirblocks.

2377

2378

The return value idx is such that all directories blocks in dirblock[:idx]

2379

have names < dirname, and all blocks in dirblock[idx:] have names >=

2380

dirname.

2381

2382

Optional args lo (default 0) and hi (default len(dirblocks)) bound the

2383

slice of a to be searched.

2384

"""

2385

if hi is None:

2386

hi = len(dirblocks)

2387

try:

2388

dirname_split = cache[dirname]

2389

except KeyError:

2390

dirname_split = dirname.split('/')

2391

cache[dirname] = dirname_split

2392

while lo < hi:

2393

mid = (lo+hi)//2

2394

# Grab the dirname for the current dirblock

2395

cur = dirblocks[mid][0]

2396

try:

2397

cur_split = cache[cur]

2398

except KeyError:

2399

cur_split = cur.split('/')

2400

cache[cur] = cur_split

2401

if cur_split < dirname_split: lo = mid+1

2402

else: hi = mid

2403

return lo

2287

# Try to load the compiled form if possible

2288

try:

2289

from bzrlib._dirstate_helpers_c import (

2290

_read_dirblocks_c as _read_dirblocks,

2291

bisect_dirblock_c as bisect_dirblock,

2292

_bisect_path_left_c as _bisect_path_left,

2293

_bisect_path_right_c as _bisect_path_right,

2294

cmp_by_dirs_c as cmp_by_dirs,

2295

)

2296

except ImportError:

2297

from bzrlib._dirstate_helpers_py import (

2298

_read_dirblocks_py as _read_dirblocks,

2299

bisect_dirblock_py as bisect_dirblock,

2300

_bisect_path_left_py as _bisect_path_left,

2301

_bisect_path_right_py as _bisect_path_right,

2302

cmp_by_dirs_py as cmp_by_dirs,

2303

)

Older »