1
# Copyright (C) 2006-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""DirState objects record the state of a directory and its bzr metadata.
19
Pseudo EBNF grammar for the state file. Fields are separated by NULLs, and
20
lines by NL. The field delimiters are ommitted in the grammar, line delimiters
21
are not - this is done for clarity of reading. All string data is in utf8.
25
MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";
28
WHOLE_NUMBER = {digit}, digit;
30
REVISION_ID = a non-empty utf8 string;
32
dirstate format = header line, full checksum, row count, parent details,
33
ghost_details, entries;
34
header line = "#bazaar dirstate flat format 3", NL;
35
full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;
36
row count = "num_entries: ", WHOLE_NUMBER, NL;
37
parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
38
ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;
40
entry = entry_key, current_entry_details, {parent_entry_details};
41
entry_key = dirname, basename, fileid;
42
current_entry_details = common_entry_details, working_entry_details;
43
parent_entry_details = common_entry_details, history_entry_details;
44
common_entry_details = MINIKIND, fingerprint, size, executable
45
working_entry_details = packed_stat
46
history_entry_details = REVISION_ID;
49
fingerprint = a nonempty utf8 sequence with meaning defined by minikind.
51
Given this definition, the following is useful to know::
53
entry (aka row) - all the data for a given key.
54
entry[0]: The key (dirname, basename, fileid)
58
entry[1]: The tree(s) data for this path and id combination.
59
entry[1][0]: The current tree
60
entry[1][1]: The second tree
62
For an entry for a tree, we have (using tree 0 - current tree) to demonstrate::
64
entry[1][0][0]: minikind
65
entry[1][0][1]: fingerprint
67
entry[1][0][3]: executable
68
entry[1][0][4]: packed_stat
72
entry[1][1][4]: revision_id
74
There may be multiple rows at the root, one per id present in the root, so the
75
in memory root row is now::
77
self._dirblocks[0] -> ('', [entry ...]),
79
and the entries in there are::
83
entries[0][2]: file_id
84
entries[1][0]: The tree data for the current tree for this fileid at /
89
'r' is a relocated entry: This path is not present in this tree with this
90
id, but the id can be found at another location. The fingerprint is
91
used to point to the target location.
92
'a' is an absent entry: In that tree the id is not present at this path.
93
'd' is a directory entry: This path in this tree is a directory with the
94
current file id. There is no fingerprint for directories.
95
'f' is a file entry: As for directory, but it's a file. The fingerprint is
96
the sha1 value of the file's canonical form, i.e. after any read
97
filters have been applied to the convenience form stored in the working
99
'l' is a symlink entry: As for directory, but a symlink. The fingerprint is
101
't' is a reference to a nested subtree; the fingerprint is the referenced
106
The entries on disk and in memory are ordered according to the following keys::
108
directory, as a list of components
112
--- Format 1 had the following different definition: ---
116
rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,
117
WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,
119
PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,
120
basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,
123
PARENT ROW's are emitted for every parent that is not in the ghosts details
124
line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then
125
each row will have a PARENT ROW for foo and baz, but not for bar.
128
In any tree, a kind of 'moved' indicates that the fingerprint field
129
(which we treat as opaque data specific to the 'kind' anyway) has the
130
details for the id of this row in that tree.
132
I'm strongly tempted to add a id->path index as well, but I think that
133
where we need id->path mapping; we also usually read the whole file, so
134
I'm going to skip that for the moment, as we have the ability to locate
135
via bisect any path in any tree, and if we lookup things by path, we can
136
accumulate an id->path mapping as we go, which will tend to match what we
139
I plan to implement this asap, so please speak up now to alter/tweak the
140
design - and once we stabilise on this, I'll update the wiki page for
143
The rationale for all this is that we want fast operations for the
144
common case (diff/status/commit/merge on all files) and extremely fast
145
operations for the less common but still occurs a lot status/diff/commit
146
on specific files). Operations on specific files involve a scan for all
147
the children of a path, *in every involved tree*, which the current
148
format did not accommodate.
152
1. Fast end to end use for bzr's top 5 uses cases. (commmit/diff/status/merge/???)
153
2. fall back current object model as needed.
154
3. scale usably to the largest trees known today - say 50K entries. (mozilla
155
is an example of this)
160
Eventually reuse dirstate objects across locks IFF the dirstate file has not
161
been modified, but will require that we flush/ignore cached stat-hit data
162
because we won't want to restat all files on disk just because a lock was
163
acquired, yet we cannot trust the data after the previous lock was released.
165
Memory representation::
167
vector of all directories, and vector of the childen ?
169
root_entrie = (direntry for root, [parent_direntries_for_root]),
171
('', ['data for achild', 'data for bchild', 'data for cchild'])
172
('dir', ['achild', 'cchild', 'echild'])
174
- single bisect to find N subtrees from a path spec
175
- in-order for serialisation - this is 'dirblock' grouping.
176
- insertion of a file '/a' affects only the '/' child-vector, that is, to
177
insert 10K elements from scratch does not generates O(N^2) memoves of a
178
single vector, rather each individual, which tends to be limited to a
179
manageable number. Will scale badly on trees with 10K entries in a
180
single directory. compare with Inventory.InventoryDirectory which has
181
a dictionary for the children. No bisect capability, can only probe for
182
exact matches, or grab all elements and sort.
183
- What's the risk of error here? Once we have the base format being processed
184
we should have a net win regardless of optimality. So we are going to
185
go with what seems reasonable.
189
Maybe we should do a test profile of the core structure - 10K simulated
190
searches/lookups/etc?
192
Objects for each row?
193
The lifetime of Dirstate objects is current per lock, but see above for
194
possible extensions. The lifetime of a row from a dirstate is expected to be
195
very short in the optimistic case: which we are optimising for. For instance,
196
subtree status will determine from analysis of the disk data what rows need to
197
be examined at all, and will be able to determine from a single row whether
198
that file has altered or not, so we are aiming to process tens of thousands of
199
entries each second within the dirstate context, before exposing anything to
200
the larger codebase. This suggests we want the time for a single file
201
comparison to be < 0.1 milliseconds. That would give us 10000 paths per second
202
processed, and to scale to 100 thousand we'll another order of magnitude to do
203
that. Now, as the lifetime for all unchanged entries is the time to parse, stat
204
the file on disk, and then immediately discard, the overhead of object creation
205
becomes a significant cost.
207
Figures: Creating a tuple from 3 elements was profiled at 0.0625
208
microseconds, whereas creating a object which is subclassed from tuple was
209
0.500 microseconds, and creating an object with 3 elements and slots was 3
210
microseconds long. 0.1 milliseconds is 100 microseconds, and ideally we'll get
211
down to 10 microseconds for the total processing - having 33% of that be object
212
creation is a huge overhead. There is a potential cost in using tuples within
213
each row which is that the conditional code to do comparisons may be slower
214
than method invocation, but method invocation is known to be slow due to stack
215
frame creation, so avoiding methods in these tight inner loops in unfortunately
216
desirable. We can consider a pyrex version of this with objects in future if
221
from __future__ import absolute_import
227
from stat import S_IEXEC
247
from ..sixish import (
255
# This is the Windows equivalent of ENOTDIR
256
# It is defined in pywin32.winerror, but we don't want a strong dependency for
257
# just an error code.
258
ERROR_PATH_NOT_FOUND = 3
259
ERROR_DIRECTORY = 267
262
class DirstateCorrupt(errors.BzrError):
264
_fmt = "The dirstate file (%(state)s) appears to be corrupt: %(msg)s"
266
def __init__(self, state, msg):
267
errors.BzrError.__init__(self)
272
class SHA1Provider(object):
273
"""An interface for getting sha1s of a file."""
275
def sha1(self, abspath):
276
"""Return the sha1 of a file given its absolute path.
278
:param abspath: May be a filesystem encoded absolute path
281
raise NotImplementedError(self.sha1)
283
def stat_and_sha1(self, abspath):
284
"""Return the stat and sha1 of a file given its absolute path.
286
:param abspath: May be a filesystem encoded absolute path
289
Note: the stat should be the stat of the physical file
290
while the sha may be the sha of its canonical content.
292
raise NotImplementedError(self.stat_and_sha1)
295
class DefaultSHA1Provider(SHA1Provider):
296
"""A SHA1Provider that reads directly from the filesystem."""
298
def sha1(self, abspath):
299
"""Return the sha1 of a file given its absolute path."""
300
return osutils.sha_file_by_name(abspath)
302
def stat_and_sha1(self, abspath):
303
"""Return the stat and sha1 of a file given its absolute path."""
304
file_obj = file(abspath, 'rb')
306
statvalue = os.fstat(file_obj.fileno())
307
sha1 = osutils.sha_file(file_obj)
310
return statvalue, sha1
313
class DirState(object):
314
"""Record directory and metadata state for fast access.
316
A dirstate is a specialised data structure for managing local working
317
tree state information. Its not yet well defined whether it is platform
318
specific, and if it is how we detect/parameterize that.
320
Dirstates use the usual lock_write, lock_read and unlock mechanisms.
321
Unlike most bzr disk formats, DirStates must be locked for reading, using
322
lock_read. (This is an os file lock internally.) This is necessary
323
because the file can be rewritten in place.
325
DirStates must be explicitly written with save() to commit changes; just
326
unlocking them does not write the changes to disk.
329
_kind_to_minikind = {
335
'tree-reference': b't',
337
_minikind_to_kind = {
343
b't': 'tree-reference',
345
_stat_to_minikind = {
350
_to_yesno = {True: b'y', False: b'n'} # TODO profile the performance gain
351
# of using int conversion rather than a dict here. AND BLAME ANDREW IF
354
# TODO: jam 20070221 Figure out what to do if we have a record that exceeds
355
# the BISECT_PAGE_SIZE. For now, we just have to make it large enough
356
# that we are sure a single record will always fit.
357
BISECT_PAGE_SIZE = 4096
360
IN_MEMORY_UNMODIFIED = 1
361
IN_MEMORY_MODIFIED = 2
362
IN_MEMORY_HASH_MODIFIED = 3 # Only hash-cache updates
364
# A pack_stat (the x's) that is just noise and will never match the output
367
NULL_PARENT_DETAILS = static_tuple.StaticTuple(b'a', b'', 0, False, b'')
369
HEADER_FORMAT_2 = b'#bazaar dirstate flat format 2\n'
370
HEADER_FORMAT_3 = b'#bazaar dirstate flat format 3\n'
372
def __init__(self, path, sha1_provider, worth_saving_limit=0):
373
"""Create a DirState object.
375
:param path: The path at which the dirstate file on disk should live.
376
:param sha1_provider: an object meeting the SHA1Provider interface.
377
:param worth_saving_limit: when the exact number of hash changed
378
entries is known, only bother saving the dirstate if more than
379
this count of entries have changed.
380
-1 means never save hash changes, 0 means always save hash changes.
382
# _header_state and _dirblock_state represent the current state
383
# of the dirstate metadata and the per-row data respectiely.
384
# NOT_IN_MEMORY indicates that no data is in memory
385
# IN_MEMORY_UNMODIFIED indicates that what we have in memory
386
# is the same as is on disk
387
# IN_MEMORY_MODIFIED indicates that we have a modified version
388
# of what is on disk.
389
# In future we will add more granularity, for instance _dirblock_state
390
# will probably support partially-in-memory as a separate variable,
391
# allowing for partially-in-memory unmodified and partially-in-memory
393
self._header_state = DirState.NOT_IN_MEMORY
394
self._dirblock_state = DirState.NOT_IN_MEMORY
395
# If true, an error has been detected while updating the dirstate, and
396
# for safety we're not going to commit to disk.
397
self._changes_aborted = False
401
self._state_file = None
402
self._filename = path
403
self._lock_token = None
404
self._lock_state = None
405
self._id_index = None
406
# a map from packed_stat to sha's.
407
self._packed_stat_index = None
408
self._end_of_header = None
409
self._cutoff_time = None
410
self._split_path_cache = {}
411
self._bisect_page_size = DirState.BISECT_PAGE_SIZE
412
self._sha1_provider = sha1_provider
413
if 'hashcache' in debug.debug_flags:
414
self._sha1_file = self._sha1_file_and_mutter
416
self._sha1_file = self._sha1_provider.sha1
417
# These two attributes provide a simple cache for lookups into the
418
# dirstate in-memory vectors. By probing respectively for the last
419
# block, and for the next entry, we save nearly 2 bisections per path
421
self._last_block_index = None
422
self._last_entry_index = None
423
# The set of known hash changes
424
self._known_hash_changes = set()
425
# How many hash changed entries can we have without saving
426
self._worth_saving_limit = worth_saving_limit
427
self._config_stack = config.LocationStack(urlutils.local_path_to_url(
432
(self.__class__.__name__, self._filename)
434
def _mark_modified(self, hash_changed_entries=None, header_modified=False):
435
"""Mark this dirstate as modified.
437
:param hash_changed_entries: if non-None, mark just these entries as
438
having their hash modified.
439
:param header_modified: mark the header modified as well, not just the
442
#trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries)
443
if hash_changed_entries:
444
self._known_hash_changes.update([e[0] for e in hash_changed_entries])
445
if self._dirblock_state in (DirState.NOT_IN_MEMORY,
446
DirState.IN_MEMORY_UNMODIFIED):
447
# If the dirstate is already marked a IN_MEMORY_MODIFIED, then
448
# that takes precedence.
449
self._dirblock_state = DirState.IN_MEMORY_HASH_MODIFIED
451
# TODO: Since we now have a IN_MEMORY_HASH_MODIFIED state, we
452
# should fail noisily if someone tries to set
453
# IN_MEMORY_MODIFIED but we don't have a write-lock!
454
# We don't know exactly what changed so disable smart saving
455
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
457
self._header_state = DirState.IN_MEMORY_MODIFIED
459
def _mark_unmodified(self):
460
"""Mark this dirstate as unmodified."""
461
self._header_state = DirState.IN_MEMORY_UNMODIFIED
462
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
463
self._known_hash_changes = set()
465
def add(self, path, file_id, kind, stat, fingerprint):
466
"""Add a path to be tracked.
468
:param path: The path within the dirstate - '' is the root, 'foo' is the
469
path foo within the root, 'foo/bar' is the path bar within foo
471
:param file_id: The file id of the path being added.
472
:param kind: The kind of the path, as a string like 'file',
474
:param stat: The output of os.lstat for the path.
475
:param fingerprint: The sha value of the file's canonical form (i.e.
476
after any read filters have been applied),
477
or the target of a symlink,
478
or the referenced revision id for tree-references,
479
or '' for directories.
482
# find the block its in.
483
# find the location in the block.
484
# check its not there
486
#------- copied from inventory.ensure_normalized_name - keep synced.
487
# --- normalized_filename wants a unicode basename only, so get one.
488
dirname, basename = osutils.split(path)
489
# we dont import normalized_filename directly because we want to be
490
# able to change the implementation at runtime for tests.
491
norm_name, can_access = osutils.normalized_filename(basename)
492
if norm_name != basename:
496
raise errors.InvalidNormalization(path)
497
# you should never have files called . or ..; just add the directory
498
# in the parent, or according to the special treatment for the root
499
if basename == '.' or basename == '..':
500
raise errors.InvalidEntryName(path)
501
# now that we've normalised, we need the correct utf8 path and
502
# dirname and basename elements. This single encode and split should be
503
# faster than three separate encodes.
504
utf8path = (dirname + '/' + basename).strip('/').encode('utf8')
505
dirname, basename = osutils.split(utf8path)
506
# uses __class__ for speed; the check is needed for safety
507
if file_id.__class__ is not bytes:
508
raise AssertionError(
509
"must be a utf8 file_id not %s" % (type(file_id), ))
510
# Make sure the file_id does not exist in this tree
512
file_id_entry = self._get_entry(0, fileid_utf8=file_id, include_deleted=True)
513
if file_id_entry != (None, None):
514
if file_id_entry[1][0][0] == b'a':
515
if file_id_entry[0] != (dirname, basename, file_id):
516
# set the old name's current operation to rename
517
self.update_minimal(file_id_entry[0],
523
rename_from = file_id_entry[0][0:2]
525
path = osutils.pathjoin(file_id_entry[0][0], file_id_entry[0][1])
526
kind = DirState._minikind_to_kind[file_id_entry[1][0][0]]
527
info = '%s:%s' % (kind, path)
528
raise errors.DuplicateFileId(file_id, info)
529
first_key = (dirname, basename, '')
530
block_index, present = self._find_block_index_from_key(first_key)
532
# check the path is not in the tree
533
block = self._dirblocks[block_index][1]
534
entry_index, _ = self._find_entry_index(first_key, block)
535
while (entry_index < len(block) and
536
block[entry_index][0][0:2] == first_key[0:2]):
537
if block[entry_index][1][0][0] not in (b'a', b'r'):
538
# this path is in the dirstate in the current tree.
539
raise Exception("adding already added path!")
542
# The block where we want to put the file is not present. But it
543
# might be because the directory was empty, or not loaded yet. Look
544
# for a parent entry, if not found, raise NotVersionedError
545
parent_dir, parent_base = osutils.split(dirname)
546
parent_block_idx, parent_entry_idx, _, parent_present = \
547
self._get_block_entry_index(parent_dir, parent_base, 0)
548
if not parent_present:
549
raise errors.NotVersionedError(path, str(self))
550
self._ensure_block(parent_block_idx, parent_entry_idx, dirname)
551
block = self._dirblocks[block_index][1]
552
entry_key = (dirname, basename, file_id)
555
packed_stat = DirState.NULLSTAT
558
packed_stat = pack_stat(stat)
559
parent_info = self._empty_parent_info()
560
minikind = DirState._kind_to_minikind[kind]
561
if rename_from is not None:
563
old_path_utf8 = '%s/%s' % rename_from
565
old_path_utf8 = rename_from[1]
566
parent_info[0] = (b'r', old_path_utf8, 0, False, b'')
568
entry_data = entry_key, [
569
(minikind, fingerprint, size, False, packed_stat),
571
elif kind == 'directory':
572
entry_data = entry_key, [
573
(minikind, b'', 0, False, packed_stat),
575
elif kind == 'symlink':
576
entry_data = entry_key, [
577
(minikind, fingerprint, size, False, packed_stat),
579
elif kind == 'tree-reference':
580
entry_data = entry_key, [
581
(minikind, fingerprint, 0, False, packed_stat),
584
raise errors.BzrError('unknown kind %r' % kind)
585
entry_index, present = self._find_entry_index(entry_key, block)
587
block.insert(entry_index, entry_data)
589
if block[entry_index][1][0][0] != b'a':
590
raise AssertionError(" %r(%r) already added" % (basename, file_id))
591
block[entry_index][1][0] = entry_data[1][0]
593
if kind == 'directory':
594
# insert a new dirblock
595
self._ensure_block(block_index, entry_index, utf8path)
596
self._mark_modified()
598
self._add_to_id_index(self._id_index, entry_key)
600
def _bisect(self, paths):
601
"""Bisect through the disk structure for specific rows.
603
:param paths: A list of paths to find
604
:return: A dict mapping path => entries for found entries. Missing
605
entries will not be in the map.
606
The list is not sorted, and entries will be populated
607
based on when they were read.
609
self._requires_lock()
610
# We need the file pointer to be right after the initial header block
611
self._read_header_if_needed()
612
# If _dirblock_state was in memory, we should just return info from
613
# there, this function is only meant to handle when we want to read
615
if self._dirblock_state != DirState.NOT_IN_MEMORY:
616
raise AssertionError("bad dirblock state %r" % self._dirblock_state)
618
# The disk representation is generally info + '\0\n\0' at the end. But
619
# for bisecting, it is easier to treat this as '\0' + info + '\0\n'
620
# Because it means we can sync on the '\n'
621
state_file = self._state_file
622
file_size = os.fstat(state_file.fileno()).st_size
623
# We end up with 2 extra fields, we should have a trailing '\n' to
624
# ensure that we read the whole record, and we should have a precursur
625
# '' which ensures that we start after the previous '\n'
626
entry_field_count = self._fields_per_entry() + 1
628
low = self._end_of_header
629
high = file_size - 1 # Ignore the final '\0'
630
# Map from (dir, name) => entry
633
# Avoid infinite seeking
634
max_count = 30*len(paths)
636
# pending is a list of places to look.
637
# each entry is a tuple of low, high, dir_names
638
# low -> the first byte offset to read (inclusive)
639
# high -> the last byte offset (inclusive)
640
# dir_names -> The list of (dir, name) pairs that should be found in
641
# the [low, high] range
642
pending = [(low, high, paths)]
644
page_size = self._bisect_page_size
646
fields_to_entry = self._get_fields_to_entry()
649
low, high, cur_files = pending.pop()
651
if not cur_files or low >= high:
656
if count > max_count:
657
raise errors.BzrError('Too many seeks, most likely a bug.')
659
mid = max(low, (low+high-page_size)/2)
662
# limit the read size, so we don't end up reading data that we have
664
read_size = min(page_size, (high-mid)+1)
665
block = state_file.read(read_size)
668
entries = block.split(b'\n')
671
# We didn't find a '\n', so we cannot have found any records.
672
# So put this range back and try again. But we know we have to
673
# increase the page size, because a single read did not contain
674
# a record break (so records must be larger than page_size)
676
pending.append((low, high, cur_files))
679
# Check the first and last entries, in case they are partial, or if
680
# we don't care about the rest of this page
682
first_fields = entries[0].split(b'\0')
683
if len(first_fields) < entry_field_count:
684
# We didn't get the complete first entry
685
# so move start, and grab the next, which
686
# should be a full entry
687
start += len(entries[0])+1
688
first_fields = entries[1].split(b'\0')
691
if len(first_fields) <= 2:
692
# We didn't even get a filename here... what do we do?
693
# Try a large page size and repeat this query
695
pending.append((low, high, cur_files))
698
# Find what entries we are looking for, which occur before and
699
# after this first record.
702
first_path = first_fields[1] + b'/' + first_fields[2]
704
first_path = first_fields[2]
705
first_loc = _bisect_path_left(cur_files, first_path)
707
# These exist before the current location
708
pre = cur_files[:first_loc]
709
# These occur after the current location, which may be in the
710
# data we read, or might be after the last entry
711
post = cur_files[first_loc:]
713
if post and len(first_fields) >= entry_field_count:
714
# We have files after the first entry
716
# Parse the last entry
717
last_entry_num = len(entries)-1
718
last_fields = entries[last_entry_num].split(b'\0')
719
if len(last_fields) < entry_field_count:
720
# The very last hunk was not complete,
721
# read the previous hunk
722
after = mid + len(block) - len(entries[-1])
724
last_fields = entries[last_entry_num].split(b'\0')
726
after = mid + len(block)
729
last_path = last_fields[1] + b'/' + last_fields[2]
731
last_path = last_fields[2]
732
last_loc = _bisect_path_right(post, last_path)
734
middle_files = post[:last_loc]
735
post = post[last_loc:]
738
# We have files that should occur in this block
739
# (>= first, <= last)
740
# Either we will find them here, or we can mark them as
743
if middle_files[0] == first_path:
744
# We might need to go before this location
745
pre.append(first_path)
746
if middle_files[-1] == last_path:
747
post.insert(0, last_path)
749
# Find out what paths we have
750
paths = {first_path:[first_fields]}
751
# last_path might == first_path so we need to be
752
# careful if we should append rather than overwrite
753
if last_entry_num != first_entry_num:
754
paths.setdefault(last_path, []).append(last_fields)
755
for num in range(first_entry_num+1, last_entry_num):
756
# TODO: jam 20070223 We are already splitting here, so
757
# shouldn't we just split the whole thing rather
758
# than doing the split again in add_one_record?
759
fields = entries[num].split(b'\0')
761
path = fields[1] + b'/' + fields[2]
764
paths.setdefault(path, []).append(fields)
766
for path in middle_files:
767
for fields in paths.get(path, []):
768
# offset by 1 because of the opening '\0'
769
# consider changing fields_to_entry to avoid the
771
entry = fields_to_entry(fields[1:])
772
found.setdefault(path, []).append(entry)
774
# Now we have split up everything into pre, middle, and post, and
775
# we have handled everything that fell in 'middle'.
776
# We add 'post' first, so that we prefer to seek towards the
777
# beginning, so that we will tend to go as early as we need, and
778
# then only seek forward after that.
780
pending.append((after, high, post))
782
pending.append((low, start-1, pre))
784
# Consider that we may want to return the directory entries in sorted
785
# order. For now, we just return them in whatever order we found them,
786
# and leave it up to the caller if they care if it is ordered or not.
789
def _bisect_dirblocks(self, dir_list):
790
"""Bisect through the disk structure to find entries in given dirs.
792
_bisect_dirblocks is meant to find the contents of directories, which
793
differs from _bisect, which only finds individual entries.
795
:param dir_list: A sorted list of directory names ['', 'dir', 'foo'].
796
:return: A map from dir => entries_for_dir
798
# TODO: jam 20070223 A lot of the bisecting logic could be shared
799
# between this and _bisect. It would require parameterizing the
800
# inner loop with a function, though. We should evaluate the
801
# performance difference.
802
self._requires_lock()
803
# We need the file pointer to be right after the initial header block
804
self._read_header_if_needed()
805
# If _dirblock_state was in memory, we should just return info from
806
# there, this function is only meant to handle when we want to read
808
if self._dirblock_state != DirState.NOT_IN_MEMORY:
809
raise AssertionError("bad dirblock state %r" % self._dirblock_state)
810
# The disk representation is generally info + '\0\n\0' at the end. But
811
# for bisecting, it is easier to treat this as '\0' + info + '\0\n'
812
# Because it means we can sync on the '\n'
813
state_file = self._state_file
814
file_size = os.fstat(state_file.fileno()).st_size
815
# We end up with 2 extra fields, we should have a trailing '\n' to
816
# ensure that we read the whole record, and we should have a precursur
817
# '' which ensures that we start after the previous '\n'
818
entry_field_count = self._fields_per_entry() + 1
820
low = self._end_of_header
821
high = file_size - 1 # Ignore the final '\0'
822
# Map from dir => entry
825
# Avoid infinite seeking
826
max_count = 30*len(dir_list)
828
# pending is a list of places to look.
829
# each entry is a tuple of low, high, dir_names
830
# low -> the first byte offset to read (inclusive)
831
# high -> the last byte offset (inclusive)
832
# dirs -> The list of directories that should be found in
833
# the [low, high] range
834
pending = [(low, high, dir_list)]
836
page_size = self._bisect_page_size
838
fields_to_entry = self._get_fields_to_entry()
841
low, high, cur_dirs = pending.pop()
843
if not cur_dirs or low >= high:
848
if count > max_count:
849
raise errors.BzrError('Too many seeks, most likely a bug.')
851
mid = max(low, (low+high-page_size)/2)
854
# limit the read size, so we don't end up reading data that we have
856
read_size = min(page_size, (high-mid)+1)
857
block = state_file.read(read_size)
860
entries = block.split(b'\n')
863
# We didn't find a '\n', so we cannot have found any records.
864
# So put this range back and try again. But we know we have to
865
# increase the page size, because a single read did not contain
866
# a record break (so records must be larger than page_size)
868
pending.append((low, high, cur_dirs))
871
# Check the first and last entries, in case they are partial, or if
872
# we don't care about the rest of this page
874
first_fields = entries[0].split(b'\0')
875
if len(first_fields) < entry_field_count:
876
# We didn't get the complete first entry
877
# so move start, and grab the next, which
878
# should be a full entry
879
start += len(entries[0])+1
880
first_fields = entries[1].split(b'\0')
883
if len(first_fields) <= 1:
884
# We didn't even get a dirname here... what do we do?
885
# Try a large page size and repeat this query
887
pending.append((low, high, cur_dirs))
890
# Find what entries we are looking for, which occur before and
891
# after this first record.
893
first_dir = first_fields[1]
894
first_loc = bisect.bisect_left(cur_dirs, first_dir)
896
# These exist before the current location
897
pre = cur_dirs[:first_loc]
898
# These occur after the current location, which may be in the
899
# data we read, or might be after the last entry
900
post = cur_dirs[first_loc:]
902
if post and len(first_fields) >= entry_field_count:
903
# We have records to look at after the first entry
905
# Parse the last entry
906
last_entry_num = len(entries)-1
907
last_fields = entries[last_entry_num].split(b'\0')
908
if len(last_fields) < entry_field_count:
909
# The very last hunk was not complete,
910
# read the previous hunk
911
after = mid + len(block) - len(entries[-1])
913
last_fields = entries[last_entry_num].split(b'\0')
915
after = mid + len(block)
917
last_dir = last_fields[1]
918
last_loc = bisect.bisect_right(post, last_dir)
920
middle_files = post[:last_loc]
921
post = post[last_loc:]
924
# We have files that should occur in this block
925
# (>= first, <= last)
926
# Either we will find them here, or we can mark them as
929
if middle_files[0] == first_dir:
930
# We might need to go before this location
931
pre.append(first_dir)
932
if middle_files[-1] == last_dir:
933
post.insert(0, last_dir)
935
# Find out what paths we have
936
paths = {first_dir:[first_fields]}
937
# last_dir might == first_dir so we need to be
938
# careful if we should append rather than overwrite
939
if last_entry_num != first_entry_num:
940
paths.setdefault(last_dir, []).append(last_fields)
941
for num in range(first_entry_num+1, last_entry_num):
942
# TODO: jam 20070223 We are already splitting here, so
943
# shouldn't we just split the whole thing rather
944
# than doing the split again in add_one_record?
945
fields = entries[num].split(b'\0')
946
paths.setdefault(fields[1], []).append(fields)
948
for cur_dir in middle_files:
949
for fields in paths.get(cur_dir, []):
950
# offset by 1 because of the opening '\0'
951
# consider changing fields_to_entry to avoid the
953
entry = fields_to_entry(fields[1:])
954
found.setdefault(cur_dir, []).append(entry)
956
# Now we have split up everything into pre, middle, and post, and
957
# we have handled everything that fell in 'middle'.
958
# We add 'post' first, so that we prefer to seek towards the
959
# beginning, so that we will tend to go as early as we need, and
960
# then only seek forward after that.
962
pending.append((after, high, post))
964
pending.append((low, start-1, pre))
968
def _bisect_recursive(self, paths):
969
"""Bisect for entries for all paths and their children.
971
This will use bisect to find all records for the supplied paths. It
972
will then continue to bisect for any records which are marked as
973
directories. (and renames?)
975
:param paths: A sorted list of (dir, name) pairs
976
eg: [('', 'a'), ('', 'f'), ('a/b', 'c')]
977
:return: A dictionary mapping (dir, name, file_id) => [tree_info]
979
# Map from (dir, name, file_id) => [tree_info]
982
found_dir_names = set()
984
# Directories that have been read
985
processed_dirs = set()
986
# Get the ball rolling with the first bisect for all entries.
987
newly_found = self._bisect(paths)
990
# Directories that need to be read
992
paths_to_search = set()
993
for entry_list in viewvalues(newly_found):
994
for dir_name_id, trees_info in entry_list:
995
found[dir_name_id] = trees_info
996
found_dir_names.add(dir_name_id[:2])
998
for tree_info in trees_info:
999
minikind = tree_info[0]
1000
if minikind == b'd':
1002
# We already processed this one as a directory,
1003
# we don't need to do the extra work again.
1005
subdir, name, file_id = dir_name_id
1006
path = osutils.pathjoin(subdir, name)
1008
if path not in processed_dirs:
1009
pending_dirs.add(path)
1010
elif minikind == b'r':
1011
# Rename, we need to directly search the target
1012
# which is contained in the fingerprint column
1013
dir_name = osutils.split(tree_info[1])
1014
if dir_name[0] in pending_dirs:
1015
# This entry will be found in the dir search
1017
if dir_name not in found_dir_names:
1018
paths_to_search.add(tree_info[1])
1019
# Now we have a list of paths to look for directly, and
1020
# directory blocks that need to be read.
1021
# newly_found is mixing the keys between (dir, name) and path
1022
# entries, but that is okay, because we only really care about the
1024
newly_found = self._bisect(sorted(paths_to_search))
1025
newly_found.update(self._bisect_dirblocks(sorted(pending_dirs)))
1026
processed_dirs.update(pending_dirs)
1029
def _discard_merge_parents(self):
1030
"""Discard any parents trees beyond the first.
1032
Note that if this fails the dirstate is corrupted.
1034
After this function returns the dirstate contains 2 trees, neither of
1037
self._read_header_if_needed()
1038
parents = self.get_parent_ids()
1039
if len(parents) < 1:
1041
# only require all dirblocks if we are doing a full-pass removal.
1042
self._read_dirblocks_if_needed()
1043
dead_patterns = {(b'a', b'r'), (b'a', b'a'), (b'r', b'r'), (b'r', b'a')}
1044
def iter_entries_removable():
1045
for block in self._dirblocks:
1046
deleted_positions = []
1047
for pos, entry in enumerate(block[1]):
1049
if (entry[1][0][0], entry[1][1][0]) in dead_patterns:
1050
deleted_positions.append(pos)
1051
if deleted_positions:
1052
if len(deleted_positions) == len(block[1]):
1055
for pos in reversed(deleted_positions):
1057
# if the first parent is a ghost:
1058
if parents[0] in self.get_ghosts():
1059
empty_parent = [DirState.NULL_PARENT_DETAILS]
1060
for entry in iter_entries_removable():
1061
entry[1][1:] = empty_parent
1063
for entry in iter_entries_removable():
1067
self._parents = [parents[0]]
1068
self._mark_modified(header_modified=True)
1070
def _empty_parent_info(self):
1071
return [DirState.NULL_PARENT_DETAILS] * (len(self._parents) -
1074
def _ensure_block(self, parent_block_index, parent_row_index, dirname):
1075
"""Ensure a block for dirname exists.
1077
This function exists to let callers which know that there is a
1078
directory dirname ensure that the block for it exists. This block can
1079
fail to exist because of demand loading, or because a directory had no
1080
children. In either case it is not an error. It is however an error to
1081
call this if there is no parent entry for the directory, and thus the
1082
function requires the coordinates of such an entry to be provided.
1084
The root row is special cased and can be indicated with a parent block
1087
:param parent_block_index: The index of the block in which dirname's row
1089
:param parent_row_index: The index in the parent block where the row
1091
:param dirname: The utf8 dirname to ensure there is a block for.
1092
:return: The index for the block.
1094
if dirname == b'' and parent_row_index == 0 and parent_block_index == 0:
1095
# This is the signature of the root row, and the
1096
# contents-of-root row is always index 1
1098
# the basename of the directory must be the end of its full name.
1099
if not (parent_block_index == -1 and
1100
parent_block_index == -1 and dirname == b''):
1101
if not dirname.endswith(
1102
self._dirblocks[parent_block_index][1][parent_row_index][0][1]):
1103
raise AssertionError("bad dirname %r" % dirname)
1104
block_index, present = self._find_block_index_from_key((dirname, b'', b''))
1106
## In future, when doing partial parsing, this should load and
1107
# populate the entire block.
1108
self._dirblocks.insert(block_index, (dirname, []))
1111
def _entries_to_current_state(self, new_entries):
1112
"""Load new_entries into self.dirblocks.
1114
Process new_entries into the current state object, making them the active
1115
state. The entries are grouped together by directory to form dirblocks.
1117
:param new_entries: A sorted list of entries. This function does not sort
1118
to prevent unneeded overhead when callers have a sorted list already.
1121
if new_entries[0][0][0:2] != (b'', b''):
1122
raise AssertionError(
1123
"Missing root row %r" % (new_entries[0][0],))
1124
# The two blocks here are deliberate: the root block and the
1125
# contents-of-root block.
1126
self._dirblocks = [(b'', []), (b'', [])]
1127
current_block = self._dirblocks[0][1]
1128
current_dirname = b''
1129
root_key = (b'', b'')
1130
append_entry = current_block.append
1131
for entry in new_entries:
1132
if entry[0][0] != current_dirname:
1133
# new block - different dirname
1135
current_dirname = entry[0][0]
1136
self._dirblocks.append((current_dirname, current_block))
1137
append_entry = current_block.append
1138
# append the entry to the current block
1140
self._split_root_dirblock_into_contents()
1142
def _split_root_dirblock_into_contents(self):
1143
"""Split the root dirblocks into root and contents-of-root.
1145
After parsing by path, we end up with root entries and contents-of-root
1146
entries in the same block. This loop splits them out again.
1148
# The above loop leaves the "root block" entries mixed with the
1149
# "contents-of-root block". But we don't want an if check on
1150
# all entries, so instead we just fix it up here.
1151
if self._dirblocks[1] != (b'', []):
1152
raise ValueError("bad dirblock start %r" % (self._dirblocks[1],))
1154
contents_of_root_block = []
1155
for entry in self._dirblocks[0][1]:
1156
if not entry[0][1]: # This is a root entry
1157
root_block.append(entry)
1159
contents_of_root_block.append(entry)
1160
self._dirblocks[0] = (b'', root_block)
1161
self._dirblocks[1] = (b'', contents_of_root_block)
1163
def _entries_for_path(self, path):
1164
"""Return a list with all the entries that match path for all ids."""
1165
dirname, basename = os.path.split(path)
1166
key = (dirname, basename, b'')
1167
block_index, present = self._find_block_index_from_key(key)
1169
# the block which should contain path is absent.
1172
block = self._dirblocks[block_index][1]
1173
entry_index, _ = self._find_entry_index(key, block)
1174
# we may need to look at multiple entries at this path: walk while the specific_files match.
1175
while (entry_index < len(block) and
1176
block[entry_index][0][0:2] == key[0:2]):
1177
result.append(block[entry_index])
1181
def _entry_to_line(self, entry):
1182
"""Serialize entry to a NULL delimited line ready for _get_output_lines.
1184
:param entry: An entry_tuple as defined in the module docstring.
1186
entire_entry = list(entry[0])
1187
for tree_number, tree_data in enumerate(entry[1]):
1188
# (minikind, fingerprint, size, executable, tree_specific_string)
1189
entire_entry.extend(tree_data)
1190
# 3 for the key, 5 for the fields per tree.
1191
tree_offset = 3 + tree_number * 5
1193
entire_entry[tree_offset + 0] = tree_data[0]
1195
entire_entry[tree_offset + 2] = b'%d' % tree_data[2]
1197
entire_entry[tree_offset + 3] = DirState._to_yesno[tree_data[3]]
1198
return b'\0'.join(entire_entry)
1200
def _fields_per_entry(self):
1201
"""How many null separated fields should be in each entry row.
1203
Each line now has an extra '\\n' field which is not used
1204
so we just skip over it
1207
3 fields for the key
1208
+ number of fields per tree_data (5) * tree count
1211
tree_count = 1 + self._num_present_parents()
1212
return 3 + 5 * tree_count + 1
1214
def _find_block(self, key, add_if_missing=False):
1215
"""Return the block that key should be present in.
1217
:param key: A dirstate entry key.
1218
:return: The block tuple.
1220
block_index, present = self._find_block_index_from_key(key)
1222
if not add_if_missing:
1223
# check to see if key is versioned itself - we might want to
1224
# add it anyway, because dirs with no entries dont get a
1225
# dirblock at parse time.
1226
# This is an uncommon branch to take: most dirs have children,
1227
# and most code works with versioned paths.
1228
parent_base, parent_name = osutils.split(key[0])
1229
if not self._get_block_entry_index(parent_base, parent_name, 0)[3]:
1230
# some parent path has not been added - its an error to add
1232
raise errors.NotVersionedError(key[0:2], str(self))
1233
self._dirblocks.insert(block_index, (key[0], []))
1234
return self._dirblocks[block_index]
1236
def _find_block_index_from_key(self, key):
1237
"""Find the dirblock index for a key.
1239
:return: The block index, True if the block for the key is present.
1241
if key[0:2] == (b'', b''):
1244
if (self._last_block_index is not None and
1245
self._dirblocks[self._last_block_index][0] == key[0]):
1246
return self._last_block_index, True
1249
block_index = bisect_dirblock(self._dirblocks, key[0], 1,
1250
cache=self._split_path_cache)
1251
# _right returns one-past-where-key is so we have to subtract
1252
# one to use it. we use _right here because there are two
1253
# '' blocks - the root, and the contents of root
1254
# we always have a minimum of 2 in self._dirblocks: root and
1255
# root-contents, and for '', we get 2 back, so this is
1256
# simple and correct:
1257
present = (block_index < len(self._dirblocks) and
1258
self._dirblocks[block_index][0] == key[0])
1259
self._last_block_index = block_index
1260
# Reset the entry index cache to the beginning of the block.
1261
self._last_entry_index = -1
1262
return block_index, present
1264
def _find_entry_index(self, key, block):
1265
"""Find the entry index for a key in a block.
1267
:return: The entry index, True if the entry for the key is present.
1269
len_block = len(block)
1271
if self._last_entry_index is not None:
1273
entry_index = self._last_entry_index + 1
1274
# A hit is when the key is after the last slot, and before or
1275
# equal to the next slot.
1276
if ((entry_index > 0 and block[entry_index - 1][0] < key) and
1277
key <= block[entry_index][0]):
1278
self._last_entry_index = entry_index
1279
present = (block[entry_index][0] == key)
1280
return entry_index, present
1283
entry_index = bisect.bisect_left(block, (key, []))
1284
present = (entry_index < len_block and
1285
block[entry_index][0] == key)
1286
self._last_entry_index = entry_index
1287
return entry_index, present
1290
def from_tree(tree, dir_state_filename, sha1_provider=None):
1291
"""Create a dirstate from a bzr Tree.
1293
:param tree: The tree which should provide parent information and
1295
:param sha1_provider: an object meeting the SHA1Provider interface.
1296
If None, a DefaultSHA1Provider is used.
1297
:return: a DirState object which is currently locked for writing.
1298
(it was locked by DirState.initialize)
1300
result = DirState.initialize(dir_state_filename,
1301
sha1_provider=sha1_provider)
1303
with tree.lock_read():
1305
parent_ids = tree.get_parent_ids()
1306
num_parents = len(parent_ids)
1308
for parent_id in parent_ids:
1309
parent_tree = tree.branch.repository.revision_tree(parent_id)
1310
parent_trees.append((parent_id, parent_tree))
1311
parent_tree.lock_read()
1312
result.set_parent_trees(parent_trees, [])
1313
result.set_state_from_inventory(tree.root_inventory)
1315
for revid, parent_tree in parent_trees:
1316
parent_tree.unlock()
1318
# The caller won't have a chance to unlock this, so make sure we
1324
def _check_delta_is_valid(self, delta):
1325
return list(inventory._check_delta_unique_ids(
1326
inventory._check_delta_unique_old_paths(
1327
inventory._check_delta_unique_new_paths(
1328
inventory._check_delta_ids_match_entry(
1329
inventory._check_delta_ids_are_valid(
1330
inventory._check_delta_new_path_entry_both_or_None(delta)))))))
1332
def update_by_delta(self, delta):
1333
"""Apply an inventory delta to the dirstate for tree 0
1335
This is the workhorse for apply_inventory_delta in dirstate based
1338
:param delta: An inventory delta. See Inventory.apply_delta for
1341
self._read_dirblocks_if_needed()
1342
encode = cache_utf8.encode
1345
# Accumulate parent references (path_utf8, id), to check for parentless
1346
# items or items placed under files/links/tree-references. We get
1347
# references from every item in the delta that is not a deletion and
1348
# is not itself the root.
1350
# Added ids must not be in the dirstate already. This set holds those
1353
# This loop transforms the delta to single atomic operations that can
1354
# be executed and validated.
1355
delta = sorted(self._check_delta_is_valid(delta), reverse=True)
1356
for old_path, new_path, file_id, inv_entry in delta:
1357
if file_id.__class__ is not bytes:
1358
raise AssertionError(
1359
"must be a utf8 file_id not %s" % (type(file_id), ))
1360
if (file_id in insertions) or (file_id in removals):
1361
self._raise_invalid(old_path or new_path, file_id,
1363
if old_path is not None:
1364
old_path = old_path.encode('utf-8')
1365
removals[file_id] = old_path
1367
new_ids.add(file_id)
1368
if new_path is not None:
1369
if inv_entry is None:
1370
self._raise_invalid(new_path, file_id,
1371
"new_path with no entry")
1372
new_path = new_path.encode('utf-8')
1373
dirname_utf8, basename = osutils.split(new_path)
1375
parents.add((dirname_utf8, inv_entry.parent_id))
1376
key = (dirname_utf8, basename, file_id)
1377
minikind = DirState._kind_to_minikind[inv_entry.kind]
1378
if minikind == b't':
1379
fingerprint = inv_entry.reference_revision or b''
1382
insertions[file_id] = (key, minikind, inv_entry.executable,
1383
fingerprint, new_path)
1384
# Transform moves into delete+add pairs
1385
if None not in (old_path, new_path):
1386
for child in self._iter_child_entries(0, old_path):
1387
if child[0][2] in insertions or child[0][2] in removals:
1389
child_dirname = child[0][0]
1390
child_basename = child[0][1]
1391
minikind = child[1][0][0]
1392
fingerprint = child[1][0][4]
1393
executable = child[1][0][3]
1394
old_child_path = osutils.pathjoin(child_dirname,
1396
removals[child[0][2]] = old_child_path
1397
child_suffix = child_dirname[len(old_path):]
1398
new_child_dirname = (new_path + child_suffix)
1399
key = (new_child_dirname, child_basename, child[0][2])
1400
new_child_path = osutils.pathjoin(new_child_dirname,
1402
insertions[child[0][2]] = (key, minikind, executable,
1403
fingerprint, new_child_path)
1404
self._check_delta_ids_absent(new_ids, delta, 0)
1406
self._apply_removals(viewitems(removals))
1407
self._apply_insertions(viewvalues(insertions))
1409
self._after_delta_check_parents(parents, 0)
1410
except errors.BzrError as e:
1411
self._changes_aborted = True
1412
if 'integrity error' not in str(e):
1414
# _get_entry raises BzrError when a request is inconsistent; we
1415
# want such errors to be shown as InconsistentDelta - and that
1416
# fits the behaviour we trigger.
1417
raise errors.InconsistentDeltaDelta(delta,
1418
"error from _get_entry. %s" % (e,))
1420
def _apply_removals(self, removals):
1421
for file_id, path in sorted(removals, reverse=True,
1422
key=operator.itemgetter(1)):
1423
dirname, basename = osutils.split(path)
1424
block_i, entry_i, d_present, f_present = \
1425
self._get_block_entry_index(dirname, basename, 0)
1427
entry = self._dirblocks[block_i][1][entry_i]
1429
self._raise_invalid(path, file_id,
1430
"Wrong path for old path.")
1431
if not f_present or entry[1][0][0] in (b'a', b'r'):
1432
self._raise_invalid(path, file_id,
1433
"Wrong path for old path.")
1434
if file_id != entry[0][2]:
1435
self._raise_invalid(path, file_id,
1436
"Attempt to remove path has wrong id - found %r."
1438
self._make_absent(entry)
1439
# See if we have a malformed delta: deleting a directory must not
1440
# leave crud behind. This increases the number of bisects needed
1441
# substantially, but deletion or renames of large numbers of paths
1442
# is rare enough it shouldn't be an issue (famous last words?) RBC
1444
block_i, entry_i, d_present, f_present = \
1445
self._get_block_entry_index(path, b'', 0)
1447
# The dir block is still present in the dirstate; this could
1448
# be due to it being in a parent tree, or a corrupt delta.
1449
for child_entry in self._dirblocks[block_i][1]:
1450
if child_entry[1][0][0] not in (b'r', b'a'):
1451
self._raise_invalid(path, entry[0][2],
1452
"The file id was deleted but its children were "
1455
def _apply_insertions(self, adds):
1457
for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1458
self.update_minimal(key, minikind, executable, fingerprint,
1459
path_utf8=path_utf8)
1460
except errors.NotVersionedError:
1461
self._raise_invalid(path_utf8.decode('utf8'), key[2],
1464
def update_basis_by_delta(self, delta, new_revid):
1465
"""Update the parents of this tree after a commit.
1467
This gives the tree one parent, with revision id new_revid. The
1468
inventory delta is applied to the current basis tree to generate the
1469
inventory for the parent new_revid, and all other parent trees are
1472
Note that an exception during the operation of this method will leave
1473
the dirstate in a corrupt state where it should not be saved.
1475
:param new_revid: The new revision id for the trees parent.
1476
:param delta: An inventory delta (see apply_inventory_delta) describing
1477
the changes from the current left most parent revision to new_revid.
1479
self._read_dirblocks_if_needed()
1480
self._discard_merge_parents()
1481
if self._ghosts != []:
1482
raise NotImplementedError(self.update_basis_by_delta)
1483
if len(self._parents) == 0:
1484
# setup a blank tree, the most simple way.
1485
empty_parent = DirState.NULL_PARENT_DETAILS
1486
for entry in self._iter_entries():
1487
entry[1].append(empty_parent)
1488
self._parents.append(new_revid)
1490
self._parents[0] = new_revid
1492
delta = sorted(self._check_delta_is_valid(delta), reverse=True)
1496
# The paths this function accepts are unicode and must be encoded as we
1498
encode = cache_utf8.encode
1499
inv_to_entry = self._inv_entry_to_details
1500
# delta is now (deletes, changes), (adds) in reverse lexographical
1502
# deletes in reverse lexographic order are safe to process in situ.
1503
# renames are not, as a rename from any path could go to a path
1504
# lexographically lower, so we transform renames into delete, add pairs,
1505
# expanding them recursively as needed.
1506
# At the same time, to reduce interface friction we convert the input
1507
# inventory entries to dirstate.
1508
root_only = (b'', b'')
1509
# Accumulate parent references (path_utf8, id), to check for parentless
1510
# items or items placed under files/links/tree-references. We get
1511
# references from every item in the delta that is not a deletion and
1512
# is not itself the root.
1514
# Added ids must not be in the dirstate already. This set holds those
1517
for old_path, new_path, file_id, inv_entry in delta:
1518
if file_id.__class__ is not bytes:
1519
raise AssertionError(
1520
"must be a utf8 file_id not %s" % (type(file_id), ))
1521
if inv_entry is not None and file_id != inv_entry.file_id:
1522
self._raise_invalid(new_path, file_id,
1523
"mismatched entry file_id %r" % inv_entry)
1524
if new_path is None:
1525
new_path_utf8 = None
1527
if inv_entry is None:
1528
self._raise_invalid(new_path, file_id,
1529
"new_path with no entry")
1530
new_path_utf8 = encode(new_path)
1531
# note the parent for validation
1532
dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
1534
parents.add((dirname_utf8, inv_entry.parent_id))
1535
if old_path is None:
1536
old_path_utf8 = None
1538
old_path_utf8 = encode(old_path)
1539
if old_path is None:
1540
adds.append((None, new_path_utf8, file_id,
1541
inv_to_entry(inv_entry), True))
1542
new_ids.add(file_id)
1543
elif new_path is None:
1544
deletes.append((old_path_utf8, None, file_id, None, True))
1545
elif (old_path, new_path) == root_only:
1546
# change things in-place
1547
# Note: the case of a parent directory changing its file_id
1548
# tends to break optimizations here, because officially
1549
# the file has actually been moved, it just happens to
1550
# end up at the same path. If we can figure out how to
1551
# handle that case, we can avoid a lot of add+delete
1552
# pairs for objects that stay put.
1553
# elif old_path == new_path:
1554
changes.append((old_path_utf8, new_path_utf8, file_id,
1555
inv_to_entry(inv_entry)))
1558
# Because renames must preserve their children we must have
1559
# processed all relocations and removes before hand. The sort
1560
# order ensures we've examined the child paths, but we also
1561
# have to execute the removals, or the split to an add/delete
1562
# pair will result in the deleted item being reinserted, or
1563
# renamed items being reinserted twice - and possibly at the
1564
# wrong place. Splitting into a delete/add pair also simplifies
1565
# the handling of entries with ('f', ...), ('r' ...) because
1566
# the target of the 'r' is old_path here, and we add that to
1567
# deletes, meaning that the add handler does not need to check
1568
# for 'r' items on every pass.
1569
self._update_basis_apply_deletes(deletes)
1571
# Split into an add/delete pair recursively.
1572
adds.append((old_path_utf8, new_path_utf8, file_id,
1573
inv_to_entry(inv_entry), False))
1574
# Expunge deletes that we've seen so that deleted/renamed
1575
# children of a rename directory are handled correctly.
1576
new_deletes = reversed(list(
1577
self._iter_child_entries(1, old_path_utf8)))
1578
# Remove the current contents of the tree at orig_path, and
1579
# reinsert at the correct new path.
1580
for entry in new_deletes:
1581
child_dirname, child_basename, child_file_id = entry[0]
1583
source_path = child_dirname + b'/' + child_basename
1585
source_path = child_basename
1588
new_path_utf8 + source_path[len(old_path_utf8):]
1590
if old_path_utf8 == b'':
1591
raise AssertionError("cannot rename directory to"
1593
target_path = source_path[len(old_path_utf8) + 1:]
1594
adds.append((None, target_path, entry[0][2], entry[1][1], False))
1596
(source_path, target_path, entry[0][2], None, False))
1598
(old_path_utf8, new_path_utf8, file_id, None, False))
1600
self._check_delta_ids_absent(new_ids, delta, 1)
1602
# Finish expunging deletes/first half of renames.
1603
self._update_basis_apply_deletes(deletes)
1604
# Reinstate second half of renames and new paths.
1605
self._update_basis_apply_adds(adds)
1606
# Apply in-situ changes.
1607
self._update_basis_apply_changes(changes)
1609
self._after_delta_check_parents(parents, 1)
1610
except errors.BzrError as e:
1611
self._changes_aborted = True
1612
if 'integrity error' not in str(e):
1614
# _get_entry raises BzrError when a request is inconsistent; we
1615
# want such errors to be shown as InconsistentDelta - and that
1616
# fits the behaviour we trigger.
1617
raise errors.InconsistentDeltaDelta(delta,
1618
"error from _get_entry. %s" % (e,))
1620
self._mark_modified(header_modified=True)
1621
self._id_index = None
1624
def _check_delta_ids_absent(self, new_ids, delta, tree_index):
1625
"""Check that none of the file_ids in new_ids are present in a tree."""
1628
id_index = self._get_id_index()
1629
for file_id in new_ids:
1630
for key in id_index.get(file_id, ()):
1631
block_i, entry_i, d_present, f_present = \
1632
self._get_block_entry_index(key[0], key[1], tree_index)
1634
# In a different tree
1636
entry = self._dirblocks[block_i][1][entry_i]
1637
if entry[0][2] != file_id:
1638
# Different file_id, so not what we want.
1640
self._raise_invalid(("%s/%s" % key[0:2]).decode('utf8'), file_id,
1641
"This file_id is new in the delta but already present in "
1644
def _raise_invalid(self, path, file_id, reason):
1645
self._changes_aborted = True
1646
raise errors.InconsistentDelta(path, file_id, reason)
1648
def _update_basis_apply_adds(self, adds):
1649
"""Apply a sequence of adds to tree 1 during update_basis_by_delta.
1651
They may be adds, or renames that have been split into add/delete
1654
:param adds: A sequence of adds. Each add is a tuple:
1655
(None, new_path_utf8, file_id, (entry_details), real_add). real_add
1656
is False when the add is the second half of a remove-and-reinsert
1657
pair created to handle renames and deletes.
1659
# Adds are accumulated partly from renames, so can be in any input
1661
# TODO: we may want to sort in dirblocks order. That way each entry
1662
# will end up in the same directory, allowing the _get_entry
1663
# fast-path for looking up 2 items in the same dir work.
1664
adds.sort(key=lambda x: x[1])
1665
# adds is now in lexographic order, which places all parents before
1666
# their children, so we can process it linearly.
1667
st = static_tuple.StaticTuple
1668
for old_path, new_path, file_id, new_details, real_add in adds:
1669
dirname, basename = osutils.split(new_path)
1670
entry_key = st(dirname, basename, file_id)
1671
block_index, present = self._find_block_index_from_key(entry_key)
1673
# The block where we want to put the file is not present.
1674
# However, it might have just been an empty directory. Look for
1675
# the parent in the basis-so-far before throwing an error.
1676
parent_dir, parent_base = osutils.split(dirname)
1677
parent_block_idx, parent_entry_idx, _, parent_present = \
1678
self._get_block_entry_index(parent_dir, parent_base, 1)
1679
if not parent_present:
1680
self._raise_invalid(new_path, file_id,
1681
"Unable to find block for this record."
1682
" Was the parent added?")
1683
self._ensure_block(parent_block_idx, parent_entry_idx, dirname)
1685
block = self._dirblocks[block_index][1]
1686
entry_index, present = self._find_entry_index(entry_key, block)
1688
if old_path is not None:
1689
self._raise_invalid(new_path, file_id,
1690
'considered a real add but still had old_path at %s'
1693
entry = block[entry_index]
1694
basis_kind = entry[1][1][0]
1695
if basis_kind == b'a':
1696
entry[1][1] = new_details
1697
elif basis_kind == b'r':
1698
raise NotImplementedError()
1700
self._raise_invalid(new_path, file_id,
1701
"An entry was marked as a new add"
1702
" but the basis target already existed")
1704
# The exact key was not found in the block. However, we need to
1705
# check if there is a key next to us that would have matched.
1706
# We only need to check 2 locations, because there are only 2
1708
for maybe_index in range(entry_index-1, entry_index+1):
1709
if maybe_index < 0 or maybe_index >= len(block):
1711
maybe_entry = block[maybe_index]
1712
if maybe_entry[0][:2] != (dirname, basename):
1713
# Just a random neighbor
1715
if maybe_entry[0][2] == file_id:
1716
raise AssertionError(
1717
'_find_entry_index didnt find a key match'
1718
' but walking the data did, for %s'
1720
basis_kind = maybe_entry[1][1][0]
1721
if basis_kind not in (b'a', b'r'):
1722
self._raise_invalid(new_path, file_id,
1723
"we have an add record for path, but the path"
1724
" is already present with another file_id %s"
1725
% (maybe_entry[0][2],))
1727
entry = (entry_key, [DirState.NULL_PARENT_DETAILS,
1729
block.insert(entry_index, entry)
1731
active_kind = entry[1][0][0]
1732
if active_kind == b'a':
1733
# The active record shows up as absent, this could be genuine,
1734
# or it could be present at some other location. We need to
1736
id_index = self._get_id_index()
1737
# The id_index may not be perfectly accurate for tree1, because
1738
# we haven't been keeping it updated. However, it should be
1739
# fine for tree0, and that gives us enough info for what we
1741
keys = id_index.get(file_id, ())
1743
block_i, entry_i, d_present, f_present = \
1744
self._get_block_entry_index(key[0], key[1], 0)
1747
active_entry = self._dirblocks[block_i][1][entry_i]
1748
if (active_entry[0][2] != file_id):
1749
# Some other file is at this path, we don't need to
1752
real_active_kind = active_entry[1][0][0]
1753
if real_active_kind in (b'a', b'r'):
1754
# We found a record, which was not *this* record,
1755
# which matches the file_id, but is not actually
1756
# present. Something seems *really* wrong.
1757
self._raise_invalid(new_path, file_id,
1758
"We found a tree0 entry that doesnt make sense")
1759
# Now, we've found a tree0 entry which matches the file_id
1760
# but is at a different location. So update them to be
1762
active_dir, active_name = active_entry[0][:2]
1764
active_path = active_dir + '/' + active_name
1766
active_path = active_name
1767
active_entry[1][1] = st('r', new_path, 0, False, '')
1768
entry[1][0] = st('r', active_path, 0, False, '')
1769
elif active_kind == 'r':
1770
raise NotImplementedError()
1772
new_kind = new_details[0]
1774
self._ensure_block(block_index, entry_index, new_path)
1776
def _update_basis_apply_changes(self, changes):
1777
"""Apply a sequence of changes to tree 1 during update_basis_by_delta.
1779
:param adds: A sequence of changes. Each change is a tuple:
1780
(path_utf8, path_utf8, file_id, (entry_details))
1782
for old_path, new_path, file_id, new_details in changes:
1783
# the entry for this file_id must be in tree 0.
1784
entry = self._get_entry(1, file_id, new_path)
1785
if entry[0] is None or entry[1][1][0] in (b'a', b'r'):
1786
self._raise_invalid(new_path, file_id,
1787
'changed entry considered not present')
1788
entry[1][1] = new_details
1790
def _update_basis_apply_deletes(self, deletes):
1791
"""Apply a sequence of deletes to tree 1 during update_basis_by_delta.
1793
They may be deletes, or renames that have been split into add/delete
1796
:param deletes: A sequence of deletes. Each delete is a tuple:
1797
(old_path_utf8, new_path_utf8, file_id, None, real_delete).
1798
real_delete is True when the desired outcome is an actual deletion
1799
rather than the rename handling logic temporarily deleting a path
1800
during the replacement of a parent.
1802
null = DirState.NULL_PARENT_DETAILS
1803
for old_path, new_path, file_id, _, real_delete in deletes:
1804
if real_delete != (new_path is None):
1805
self._raise_invalid(old_path, file_id, "bad delete delta")
1806
# the entry for this file_id must be in tree 1.
1807
dirname, basename = osutils.split(old_path)
1808
block_index, entry_index, dir_present, file_present = \
1809
self._get_block_entry_index(dirname, basename, 1)
1810
if not file_present:
1811
self._raise_invalid(old_path, file_id,
1812
'basis tree does not contain removed entry')
1813
entry = self._dirblocks[block_index][1][entry_index]
1814
# The state of the entry in the 'active' WT
1815
active_kind = entry[1][0][0]
1816
if entry[0][2] != file_id:
1817
self._raise_invalid(old_path, file_id,
1818
'mismatched file_id in tree 1')
1820
old_kind = entry[1][1][0]
1821
if active_kind in 'ar':
1822
# The active tree doesn't have this file_id.
1823
# The basis tree is changing this record. If this is a
1824
# rename, then we don't want the record here at all
1825
# anymore. If it is just an in-place change, we want the
1826
# record here, but we'll add it if we need to. So we just
1828
if active_kind == 'r':
1829
active_path = entry[1][0][1]
1830
active_entry = self._get_entry(0, file_id, active_path)
1831
if active_entry[1][1][0] != 'r':
1832
self._raise_invalid(old_path, file_id,
1833
"Dirstate did not have matching rename entries")
1834
elif active_entry[1][0][0] in 'ar':
1835
self._raise_invalid(old_path, file_id,
1836
"Dirstate had a rename pointing at an inactive"
1838
active_entry[1][1] = null
1839
del self._dirblocks[block_index][1][entry_index]
1841
# This was a directory, and the active tree says it
1842
# doesn't exist, and now the basis tree says it doesn't
1843
# exist. Remove its dirblock if present
1845
present) = self._find_block_index_from_key(
1848
dir_block = self._dirblocks[dir_block_index][1]
1850
# This entry is empty, go ahead and just remove it
1851
del self._dirblocks[dir_block_index]
1853
# There is still an active record, so just mark this
1856
block_i, entry_i, d_present, f_present = \
1857
self._get_block_entry_index(old_path, '', 1)
1859
dir_block = self._dirblocks[block_i][1]
1860
for child_entry in dir_block:
1861
child_basis_kind = child_entry[1][1][0]
1862
if child_basis_kind not in 'ar':
1863
self._raise_invalid(old_path, file_id,
1864
"The file id was deleted but its children were "
1867
def _after_delta_check_parents(self, parents, index):
1868
"""Check that parents required by the delta are all intact.
1870
:param parents: An iterable of (path_utf8, file_id) tuples which are
1871
required to be present in tree 'index' at path_utf8 with id file_id
1873
:param index: The column in the dirstate to check for parents in.
1875
for dirname_utf8, file_id in parents:
1876
# Get the entry - the ensures that file_id, dirname_utf8 exists and
1877
# has the right file id.
1878
entry = self._get_entry(index, file_id, dirname_utf8)
1879
if entry[1] is None:
1880
self._raise_invalid(dirname_utf8.decode('utf8'),
1881
file_id, "This parent is not present.")
1882
# Parents of things must be directories
1883
if entry[1][index][0] != 'd':
1884
self._raise_invalid(dirname_utf8.decode('utf8'),
1885
file_id, "This parent is not a directory.")
1887
def _observed_sha1(self, entry, sha1, stat_value,
1888
_stat_to_minikind=_stat_to_minikind):
1889
"""Note the sha1 of a file.
1891
:param entry: The entry the sha1 is for.
1892
:param sha1: The observed sha1.
1893
:param stat_value: The os.lstat for the file.
1896
minikind = _stat_to_minikind[stat_value.st_mode & 0o170000]
1901
if self._cutoff_time is None:
1902
self._sha_cutoff_time()
1903
if (stat_value.st_mtime < self._cutoff_time
1904
and stat_value.st_ctime < self._cutoff_time):
1905
entry[1][0] = ('f', sha1, stat_value.st_size, entry[1][0][3],
1906
pack_stat(stat_value))
1907
self._mark_modified([entry])
1909
def _sha_cutoff_time(self):
1910
"""Return cutoff time.
1912
Files modified more recently than this time are at risk of being
1913
undetectably modified and so can't be cached.
1915
# Cache the cutoff time as long as we hold a lock.
1916
# time.time() isn't super expensive (approx 3.38us), but
1917
# when you call it 50,000 times it adds up.
1918
# For comparison, os.lstat() costs 7.2us if it is hot.
1919
self._cutoff_time = int(time.time()) - 3
1920
return self._cutoff_time
1922
def _lstat(self, abspath, entry):
1923
"""Return the os.lstat value for this path."""
1924
return os.lstat(abspath)
1926
def _sha1_file_and_mutter(self, abspath):
1927
# when -Dhashcache is turned on, this is monkey-patched in to log
1929
trace.mutter("dirstate sha1 " + abspath)
1930
return self._sha1_provider.sha1(abspath)
1932
def _is_executable(self, mode, old_executable):
1933
"""Is this file executable?"""
1934
return bool(S_IEXEC & mode)
1936
def _is_executable_win32(self, mode, old_executable):
1937
"""On win32 the executable bit is stored in the dirstate."""
1938
return old_executable
1940
if sys.platform == 'win32':
1941
_is_executable = _is_executable_win32
1943
def _read_link(self, abspath, old_link):
1944
"""Read the target of a symlink"""
1945
# TODO: jam 200700301 On Win32, this could just return the value
1946
# already in memory. However, this really needs to be done at a
1947
# higher level, because there either won't be anything on disk,
1948
# or the thing on disk will be a file.
1949
fs_encoding = osutils._fs_enc
1950
if isinstance(abspath, text_type):
1951
# abspath is defined as the path to pass to lstat. readlink is
1952
# buggy in python < 2.6 (it doesn't encode unicode path into FS
1953
# encoding), so we need to encode ourselves knowing that unicode
1954
# paths are produced by UnicodeDirReader on purpose.
1955
abspath = abspath.encode(fs_encoding)
1956
target = os.readlink(abspath)
1957
if fs_encoding not in ('utf-8', 'ascii'):
1958
# Change encoding if needed
1959
target = target.decode(fs_encoding).encode('UTF-8')
1962
def get_ghosts(self):
1963
"""Return a list of the parent tree revision ids that are ghosts."""
1964
self._read_header_if_needed()
1967
def get_lines(self):
1968
"""Serialise the entire dirstate to a sequence of lines."""
1969
if (self._header_state == DirState.IN_MEMORY_UNMODIFIED and
1970
self._dirblock_state == DirState.IN_MEMORY_UNMODIFIED):
1971
# read what's on disk.
1972
self._state_file.seek(0)
1973
return self._state_file.readlines()
1975
lines.append(self._get_parents_line(self.get_parent_ids()))
1976
lines.append(self._get_ghosts_line(self._ghosts))
1977
lines.extend(self._iter_entry_lines())
1978
return self._get_output_lines(lines)
1980
def _get_ghosts_line(self, ghost_ids):
1981
"""Create a line for the state file for ghost information."""
1982
return b'\0'.join([b'%d' % len(ghost_ids)] + ghost_ids)
1984
def _get_parents_line(self, parent_ids):
1985
"""Create a line for the state file for parents information."""
1986
return b'\0'.join([b'%d' % len(parent_ids)] + parent_ids)
1988
def _iter_entry_lines(self):
1989
"""Create lines for entries."""
1990
return map(self._entry_to_line, self._iter_entries())
1992
def _get_fields_to_entry(self):
1993
"""Get a function which converts entry fields into a entry record.
1995
This handles size and executable, as well as parent records.
1997
:return: A function which takes a list of fields, and returns an
1998
appropriate record for storing in memory.
2000
# This is intentionally unrolled for performance
2001
num_present_parents = self._num_present_parents()
2002
if num_present_parents == 0:
2003
def fields_to_entry_0_parents(fields, _int=int):
2004
path_name_file_id_key = (fields[0], fields[1], fields[2])
2005
return (path_name_file_id_key, [
2007
fields[3], # minikind
2008
fields[4], # fingerprint
2009
_int(fields[5]), # size
2010
fields[6] == 'y', # executable
2011
fields[7], # packed_stat or revision_id
2013
return fields_to_entry_0_parents
2014
elif num_present_parents == 1:
2015
def fields_to_entry_1_parent(fields, _int=int):
2016
path_name_file_id_key = (fields[0], fields[1], fields[2])
2017
return (path_name_file_id_key, [
2019
fields[3], # minikind
2020
fields[4], # fingerprint
2021
_int(fields[5]), # size
2022
fields[6] == 'y', # executable
2023
fields[7], # packed_stat or revision_id
2026
fields[8], # minikind
2027
fields[9], # fingerprint
2028
_int(fields[10]), # size
2029
fields[11] == 'y', # executable
2030
fields[12], # packed_stat or revision_id
2033
return fields_to_entry_1_parent
2034
elif num_present_parents == 2:
2035
def fields_to_entry_2_parents(fields, _int=int):
2036
path_name_file_id_key = (fields[0], fields[1], fields[2])
2037
return (path_name_file_id_key, [
2039
fields[3], # minikind
2040
fields[4], # fingerprint
2041
_int(fields[5]), # size
2042
fields[6] == 'y', # executable
2043
fields[7], # packed_stat or revision_id
2046
fields[8], # minikind
2047
fields[9], # fingerprint
2048
_int(fields[10]), # size
2049
fields[11] == 'y', # executable
2050
fields[12], # packed_stat or revision_id
2053
fields[13], # minikind
2054
fields[14], # fingerprint
2055
_int(fields[15]), # size
2056
fields[16] == 'y', # executable
2057
fields[17], # packed_stat or revision_id
2060
return fields_to_entry_2_parents
2062
def fields_to_entry_n_parents(fields, _int=int):
2063
path_name_file_id_key = (fields[0], fields[1], fields[2])
2064
trees = [(fields[cur], # minikind
2065
fields[cur+1], # fingerprint
2066
_int(fields[cur+2]), # size
2067
fields[cur+3] == 'y', # executable
2068
fields[cur+4], # stat or revision_id
2069
) for cur in range(3, len(fields)-1, 5)]
2070
return path_name_file_id_key, trees
2071
return fields_to_entry_n_parents
2073
def get_parent_ids(self):
2074
"""Return a list of the parent tree ids for the directory state."""
2075
self._read_header_if_needed()
2076
return list(self._parents)
2078
def _get_block_entry_index(self, dirname, basename, tree_index):
2079
"""Get the coordinates for a path in the state structure.
2081
:param dirname: The utf8 dirname to lookup.
2082
:param basename: The utf8 basename to lookup.
2083
:param tree_index: The index of the tree for which this lookup should
2085
:return: A tuple describing where the path is located, or should be
2086
inserted. The tuple contains four fields: the block index, the row
2087
index, the directory is present (boolean), the entire path is
2088
present (boolean). There is no guarantee that either
2089
coordinate is currently reachable unless the found field for it is
2090
True. For instance, a directory not present in the searched tree
2091
may be returned with a value one greater than the current highest
2092
block offset. The directory present field will always be True when
2093
the path present field is True. The directory present field does
2094
NOT indicate that the directory is present in the searched tree,
2095
rather it indicates that there are at least some files in some
2098
self._read_dirblocks_if_needed()
2099
key = dirname, basename, b''
2100
block_index, present = self._find_block_index_from_key(key)
2102
# no such directory - return the dir index and 0 for the row.
2103
return block_index, 0, False, False
2104
block = self._dirblocks[block_index][1] # access the entries only
2105
entry_index, present = self._find_entry_index(key, block)
2106
# linear search through entries at this path to find the one
2108
while entry_index < len(block) and block[entry_index][0][1] == basename:
2109
if block[entry_index][1][tree_index][0] not in (b'a', b'r'):
2110
# neither absent or relocated
2111
return block_index, entry_index, True, True
2113
return block_index, entry_index, True, False
2115
def _get_entry(self, tree_index, fileid_utf8=None, path_utf8=None,
2116
include_deleted=False):
2117
"""Get the dirstate entry for path in tree tree_index.
2119
If either file_id or path is supplied, it is used as the key to lookup.
2120
If both are supplied, the fastest lookup is used, and an error is
2121
raised if they do not both point at the same row.
2123
:param tree_index: The index of the tree we wish to locate this path
2124
in. If the path is present in that tree, the entry containing its
2125
details is returned, otherwise (None, None) is returned
2126
0 is the working tree, higher indexes are successive parent
2128
:param fileid_utf8: A utf8 file_id to look up.
2129
:param path_utf8: An utf8 path to be looked up.
2130
:param include_deleted: If True, and performing a lookup via
2131
fileid_utf8 rather than path_utf8, return an entry for deleted
2133
:return: The dirstate entry tuple for path, or (None, None)
2135
self._read_dirblocks_if_needed()
2136
if path_utf8 is not None:
2137
if not isinstance(path_utf8, bytes):
2138
raise errors.BzrError('path_utf8 is not bytes: %s %r'
2139
% (type(path_utf8), path_utf8))
2140
# path lookups are faster
2141
dirname, basename = osutils.split(path_utf8)
2142
block_index, entry_index, dir_present, file_present = \
2143
self._get_block_entry_index(dirname, basename, tree_index)
2144
if not file_present:
2146
entry = self._dirblocks[block_index][1][entry_index]
2147
if not (entry[0][2] and entry[1][tree_index][0] not in (b'a', b'r')):
2148
raise AssertionError('unversioned entry?')
2150
if entry[0][2] != fileid_utf8:
2151
self._changes_aborted = True
2152
raise errors.BzrError('integrity error ? : mismatching'
2153
' tree_index, file_id and path')
2156
possible_keys = self._get_id_index().get(fileid_utf8, ())
2157
if not possible_keys:
2159
for key in possible_keys:
2160
block_index, present = \
2161
self._find_block_index_from_key(key)
2162
# strange, probably indicates an out of date
2163
# id index - for now, allow this.
2166
# WARNING: DO not change this code to use _get_block_entry_index
2167
# as that function is not suitable: it does not use the key
2168
# to lookup, and thus the wrong coordinates are returned.
2169
block = self._dirblocks[block_index][1]
2170
entry_index, present = self._find_entry_index(key, block)
2172
entry = self._dirblocks[block_index][1][entry_index]
2173
# TODO: We might want to assert that entry[0][2] ==
2175
# GZ 2017-06-09: Hoist set of minkinds somewhere
2176
if entry[1][tree_index][0] in {b'f', b'd', b'l', b't'}:
2177
# this is the result we are looking for: the
2178
# real home of this file_id in this tree.
2180
if entry[1][tree_index][0] == b'a':
2181
# there is no home for this entry in this tree
2185
if entry[1][tree_index][0] != b'r':
2186
raise AssertionError(
2187
"entry %r has invalid minikind %r for tree %r" \
2189
entry[1][tree_index][0],
2191
real_path = entry[1][tree_index][1]
2192
return self._get_entry(tree_index, fileid_utf8=fileid_utf8,
2193
path_utf8=real_path)
2197
def initialize(cls, path, sha1_provider=None):
2198
"""Create a new dirstate on path.
2200
The new dirstate will be an empty tree - that is it has no parents,
2201
and only a root node - which has id ROOT_ID.
2203
:param path: The name of the file for the dirstate.
2204
:param sha1_provider: an object meeting the SHA1Provider interface.
2205
If None, a DefaultSHA1Provider is used.
2206
:return: A write-locked DirState object.
2208
# This constructs a new DirState object on a path, sets the _state_file
2209
# to a new empty file for that path. It then calls _set_data() with our
2210
# stock empty dirstate information - a root with ROOT_ID, no children,
2211
# and no parents. Finally it calls save() to ensure that this data will
2213
if sha1_provider is None:
2214
sha1_provider = DefaultSHA1Provider()
2215
result = cls(path, sha1_provider)
2216
# root dir and root dir contents with no children.
2217
empty_tree_dirblocks = [(b'', []), (b'', [])]
2218
# a new root directory, with a NULLSTAT.
2219
empty_tree_dirblocks[0][1].append(
2220
((b'', b'', inventory.ROOT_ID), [
2221
(b'd', b'', 0, False, DirState.NULLSTAT),
2225
result._set_data([], empty_tree_dirblocks)
2233
def _inv_entry_to_details(inv_entry):
2234
"""Convert an inventory entry (from a revision tree) to state details.
2236
:param inv_entry: An inventory entry whose sha1 and link targets can be
2237
relied upon, and which has a revision set.
2238
:return: A details tuple - the details for a single tree at a path +
2241
kind = inv_entry.kind
2242
minikind = DirState._kind_to_minikind[kind]
2243
tree_data = inv_entry.revision
2244
if kind == 'directory':
2248
elif kind == 'symlink':
2249
if inv_entry.symlink_target is None:
2252
fingerprint = inv_entry.symlink_target.encode('utf8')
2255
elif kind == 'file':
2256
fingerprint = inv_entry.text_sha1 or b''
2257
size = inv_entry.text_size or 0
2258
executable = inv_entry.executable
2259
elif kind == 'tree-reference':
2260
fingerprint = inv_entry.reference_revision or b''
2264
raise Exception("can't pack %s" % inv_entry)
2265
return static_tuple.StaticTuple(minikind, fingerprint, size,
2266
executable, tree_data)
2268
def _iter_child_entries(self, tree_index, path_utf8):
2269
"""Iterate over all the entries that are children of path_utf.
2271
This only returns entries that are present (not in 'a', 'r') in
2272
tree_index. tree_index data is not refreshed, so if tree 0 is used,
2273
results may differ from that obtained if paths were statted to
2274
determine what ones were directories.
2276
Asking for the children of a non-directory will return an empty
2280
next_pending_dirs = [path_utf8]
2281
absent = (b'a', b'r')
2282
while next_pending_dirs:
2283
pending_dirs = next_pending_dirs
2284
next_pending_dirs = []
2285
for path in pending_dirs:
2286
block_index, present = self._find_block_index_from_key(
2288
if block_index == 0:
2290
if len(self._dirblocks) == 1:
2291
# asked for the children of the root with no other
2295
# children of a non-directory asked for.
2297
block = self._dirblocks[block_index]
2298
for entry in block[1]:
2299
kind = entry[1][tree_index][0]
2300
if kind not in absent:
2304
path = entry[0][0] + b'/' + entry[0][1]
2307
next_pending_dirs.append(path)
2309
def _iter_entries(self):
2310
"""Iterate over all the entries in the dirstate.
2312
Each yelt item is an entry in the standard format described in the
2313
docstring of breezy.dirstate.
2315
self._read_dirblocks_if_needed()
2316
for directory in self._dirblocks:
2317
for entry in directory[1]:
2320
def _get_id_index(self):
2321
"""Get an id index of self._dirblocks.
2323
This maps from file_id => [(directory, name, file_id)] entries where
2324
that file_id appears in one of the trees.
2326
if self._id_index is None:
2328
for key, tree_details in self._iter_entries():
2329
self._add_to_id_index(id_index, key)
2330
self._id_index = id_index
2331
return self._id_index
2333
def _add_to_id_index(self, id_index, entry_key):
2334
"""Add this entry to the _id_index mapping."""
2335
# This code used to use a set for every entry in the id_index. However,
2336
# it is *rare* to have more than one entry. So a set is a large
2337
# overkill. And even when we do, we won't ever have more than the
2338
# number of parent trees. Which is still a small number (rarely >2). As
2339
# such, we use a simple tuple, and do our own uniqueness checks. While
2340
# the 'in' check is O(N) since N is nicely bounded it shouldn't ever
2341
# cause quadratic failure.
2342
file_id = entry_key[2]
2343
entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
2344
if file_id not in id_index:
2345
id_index[file_id] = static_tuple.StaticTuple(entry_key,)
2347
entry_keys = id_index[file_id]
2348
if entry_key not in entry_keys:
2349
id_index[file_id] = entry_keys + (entry_key,)
2351
def _remove_from_id_index(self, id_index, entry_key):
2352
"""Remove this entry from the _id_index mapping.
2354
It is an programming error to call this when the entry_key is not
2357
file_id = entry_key[2]
2358
entry_keys = list(id_index[file_id])
2359
entry_keys.remove(entry_key)
2360
id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
2362
def _get_output_lines(self, lines):
2363
"""Format lines for final output.
2365
:param lines: A sequence of lines containing the parents list and the
2368
output_lines = [DirState.HEADER_FORMAT_3]
2369
lines.append(b'') # a final newline
2370
inventory_text = b'\0\n\0'.join(lines)
2371
output_lines.append(b'crc32: %d\n' % (zlib.crc32(inventory_text),))
2372
# -3, 1 for num parents, 1 for ghosts, 1 for final newline
2373
num_entries = len(lines)-3
2374
output_lines.append(b'num_entries: %d\n' % (num_entries,))
2375
output_lines.append(inventory_text)
2378
def _make_deleted_row(self, fileid_utf8, parents):
2379
"""Return a deleted row for fileid_utf8."""
2380
return (b'/', b'RECYCLED.BIN', b'file', fileid_utf8, 0, DirState.NULLSTAT,
2383
def _num_present_parents(self):
2384
"""The number of parent entries in each record row."""
2385
return len(self._parents) - len(self._ghosts)
2388
def on_file(cls, path, sha1_provider=None, worth_saving_limit=0):
2389
"""Construct a DirState on the file at path "path".
2391
:param path: The path at which the dirstate file on disk should live.
2392
:param sha1_provider: an object meeting the SHA1Provider interface.
2393
If None, a DefaultSHA1Provider is used.
2394
:param worth_saving_limit: when the exact number of hash changed
2395
entries is known, only bother saving the dirstate if more than
2396
this count of entries have changed. -1 means never save.
2397
:return: An unlocked DirState object, associated with the given path.
2399
if sha1_provider is None:
2400
sha1_provider = DefaultSHA1Provider()
2401
result = cls(path, sha1_provider,
2402
worth_saving_limit=worth_saving_limit)
2405
def _read_dirblocks_if_needed(self):
2406
"""Read in all the dirblocks from the file if they are not in memory.
2408
This populates self._dirblocks, and sets self._dirblock_state to
2409
IN_MEMORY_UNMODIFIED. It is not currently ready for incremental block
2412
self._read_header_if_needed()
2413
if self._dirblock_state == DirState.NOT_IN_MEMORY:
2414
_read_dirblocks(self)
2416
def _read_header(self):
2417
"""This reads in the metadata header, and the parent ids.
2419
After reading in, the file should be positioned at the null
2420
just before the start of the first record in the file.
2422
:return: (expected crc checksum, number of entries, parent list)
2424
self._read_prelude()
2425
parent_line = self._state_file.readline()
2426
info = parent_line.split(b'\0')
2427
num_parents = int(info[0])
2428
self._parents = info[1:-1]
2429
ghost_line = self._state_file.readline()
2430
info = ghost_line.split(b'\0')
2431
num_ghosts = int(info[1])
2432
self._ghosts = info[2:-1]
2433
self._header_state = DirState.IN_MEMORY_UNMODIFIED
2434
self._end_of_header = self._state_file.tell()
2436
def _read_header_if_needed(self):
2437
"""Read the header of the dirstate file if needed."""
2438
# inline this as it will be called a lot
2439
if not self._lock_token:
2440
raise errors.ObjectNotLocked(self)
2441
if self._header_state == DirState.NOT_IN_MEMORY:
2444
def _read_prelude(self):
2445
"""Read in the prelude header of the dirstate file.
2447
This only reads in the stuff that is not connected to the crc
2448
checksum. The position will be correct to read in the rest of
2449
the file and check the checksum after this point.
2450
The next entry in the file should be the number of parents,
2451
and their ids. Followed by a newline.
2453
header = self._state_file.readline()
2454
if header != DirState.HEADER_FORMAT_3:
2455
raise errors.BzrError(
2456
'invalid header line: %r' % (header,))
2457
crc_line = self._state_file.readline()
2458
if not crc_line.startswith(b'crc32: '):
2459
raise errors.BzrError('missing crc32 checksum: %r' % crc_line)
2460
self.crc_expected = int(crc_line[len(b'crc32: '):-1])
2461
num_entries_line = self._state_file.readline()
2462
if not num_entries_line.startswith(b'num_entries: '):
2463
raise errors.BzrError('missing num_entries line')
2464
self._num_entries = int(num_entries_line[len(b'num_entries: '):-1])
2466
def sha1_from_stat(self, path, stat_result):
2467
"""Find a sha1 given a stat lookup."""
2468
return self._get_packed_stat_index().get(pack_stat(stat_result), None)
2470
def _get_packed_stat_index(self):
2471
"""Get a packed_stat index of self._dirblocks."""
2472
if self._packed_stat_index is None:
2474
for key, tree_details in self._iter_entries():
2475
if tree_details[0][0] == b'f':
2476
index[tree_details[0][4]] = tree_details[0][1]
2477
self._packed_stat_index = index
2478
return self._packed_stat_index
2481
"""Save any pending changes created during this session.
2483
We reuse the existing file, because that prevents race conditions with
2484
file creation, and use oslocks on it to prevent concurrent modification
2485
and reads - because dirstate's incremental data aggregation is not
2486
compatible with reading a modified file, and replacing a file in use by
2487
another process is impossible on Windows.
2489
A dirstate in read only mode should be smart enough though to validate
2490
that the file has not changed, and otherwise discard its cache and
2491
start over, to allow for fine grained read lock duration, so 'status'
2492
wont block 'commit' - for example.
2494
if self._changes_aborted:
2495
# Should this be a warning? For now, I'm expecting that places that
2496
# mark it inconsistent will warn, making a warning here redundant.
2497
trace.mutter('Not saving DirState because '
2498
'_changes_aborted is set.')
2500
# TODO: Since we now distinguish IN_MEMORY_MODIFIED from
2501
# IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail
2502
# to save an IN_MEMORY_HASH_MODIFIED, and fail *noisily* if we
2503
# fail to save IN_MEMORY_MODIFIED
2504
if not self._worth_saving():
2507
grabbed_write_lock = False
2508
if self._lock_state != 'w':
2509
grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
2510
# Switch over to the new lock, as the old one may be closed.
2511
# TODO: jam 20070315 We should validate the disk file has
2512
# not changed contents, since temporary_write_lock may
2513
# not be an atomic operation.
2514
self._lock_token = new_lock
2515
self._state_file = new_lock.f
2516
if not grabbed_write_lock:
2517
# We couldn't grab a write lock, so we switch back to a read one
2520
lines = self.get_lines()
2521
self._state_file.seek(0)
2522
self._state_file.writelines(lines)
2523
self._state_file.truncate()
2524
self._state_file.flush()
2525
self._maybe_fdatasync()
2526
self._mark_unmodified()
2528
if grabbed_write_lock:
2529
self._lock_token = self._lock_token.restore_read_lock()
2530
self._state_file = self._lock_token.f
2531
# TODO: jam 20070315 We should validate the disk file has
2532
# not changed contents. Since restore_read_lock may
2533
# not be an atomic operation.
2535
def _maybe_fdatasync(self):
2536
"""Flush to disk if possible and if not configured off."""
2537
if self._config_stack.get('dirstate.fdatasync'):
2538
osutils.fdatasync(self._state_file.fileno())
2540
def _worth_saving(self):
2541
"""Is it worth saving the dirstate or not?"""
2542
if (self._header_state == DirState.IN_MEMORY_MODIFIED
2543
or self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2545
if self._dirblock_state == DirState.IN_MEMORY_HASH_MODIFIED:
2546
if self._worth_saving_limit == -1:
2547
# We never save hash changes when the limit is -1
2549
# If we're using smart saving and only a small number of
2550
# entries have changed their hash, don't bother saving. John has
2551
# suggested using a heuristic here based on the size of the
2552
# changed files and/or tree. For now, we go with a configurable
2553
# number of changes, keeping the calculation time
2554
# as low overhead as possible. (This also keeps all existing
2555
# tests passing as the default is 0, i.e. always save.)
2556
if len(self._known_hash_changes) >= self._worth_saving_limit:
2560
def _set_data(self, parent_ids, dirblocks):
2561
"""Set the full dirstate data in memory.
2563
This is an internal function used to completely replace the objects
2564
in memory state. It puts the dirstate into state 'full-dirty'.
2566
:param parent_ids: A list of parent tree revision ids.
2567
:param dirblocks: A list containing one tuple for each directory in the
2568
tree. Each tuple contains the directory path and a list of entries
2569
found in that directory.
2571
# our memory copy is now authoritative.
2572
self._dirblocks = dirblocks
2573
self._mark_modified(header_modified=True)
2574
self._parents = list(parent_ids)
2575
self._id_index = None
2576
self._packed_stat_index = None
2578
def set_path_id(self, path, new_id):
2579
"""Change the id of path to new_id in the current working tree.
2581
:param path: The path inside the tree to set - '' is the root, 'foo'
2582
is the path foo in the root.
2583
:param new_id: The new id to assign to the path. This must be a utf8
2584
file id (not unicode, and not None).
2586
self._read_dirblocks_if_needed()
2588
# TODO: logic not written
2589
raise NotImplementedError(self.set_path_id)
2590
# TODO: check new id is unique
2591
entry = self._get_entry(0, path_utf8=path)
2592
if entry[0][2] == new_id:
2593
# Nothing to change.
2595
if new_id.__class__ != bytes:
2596
raise AssertionError(
2597
"must be a utf8 file_id not %s" % (type(new_id), ))
2598
# mark the old path absent, and insert a new root path
2599
self._make_absent(entry)
2600
self.update_minimal((b'', b'', new_id), b'd',
2601
path_utf8=b'', packed_stat=entry[1][0][4])
2602
self._mark_modified()
2604
def set_parent_trees(self, trees, ghosts):
2605
"""Set the parent trees for the dirstate.
2607
:param trees: A list of revision_id, tree tuples. tree must be provided
2608
even if the revision_id refers to a ghost: supply an empty tree in
2610
:param ghosts: A list of the revision_ids that are ghosts at the time
2613
# TODO: generate a list of parent indexes to preserve to save
2614
# processing specific parent trees. In the common case one tree will
2615
# be preserved - the left most parent.
2616
# TODO: if the parent tree is a dirstate, we might want to walk them
2617
# all by path in parallel for 'optimal' common-case performance.
2618
# generate new root row.
2619
self._read_dirblocks_if_needed()
2620
# TODO future sketch: Examine the existing parents to generate a change
2621
# map and then walk the new parent trees only, mapping them into the
2622
# dirstate. Walk the dirstate at the same time to remove unreferenced
2625
# sketch: loop over all entries in the dirstate, cherry picking
2626
# entries from the parent trees, if they are not ghost trees.
2627
# after we finish walking the dirstate, all entries not in the dirstate
2628
# are deletes, so we want to append them to the end as per the design
2629
# discussions. So do a set difference on ids with the parents to
2630
# get deletes, and add them to the end.
2631
# During the update process we need to answer the following questions:
2632
# - find other keys containing a fileid in order to create cross-path
2633
# links. We dont't trivially use the inventory from other trees
2634
# because this leads to either double touching, or to accessing
2636
# - find other keys containing a path
2637
# We accumulate each entry via this dictionary, including the root
2640
# we could do parallel iterators, but because file id data may be
2641
# scattered throughout, we dont save on index overhead: we have to look
2642
# at everything anyway. We can probably save cycles by reusing parent
2643
# data and doing an incremental update when adding an additional
2644
# parent, but for now the common cases are adding a new parent (merge),
2645
# and replacing completely (commit), and commit is more common: so
2646
# optimise merge later.
2648
# ---- start generation of full tree mapping data
2649
# what trees should we use?
2650
parent_trees = [tree for rev_id, tree in trees if rev_id not in ghosts]
2651
# how many trees do we end up with
2652
parent_count = len(parent_trees)
2653
st = static_tuple.StaticTuple
2655
# one: the current tree
2656
for entry in self._iter_entries():
2657
# skip entries not in the current tree
2658
if entry[1][0][0] in (b'a', b'r'): # absent, relocated
2660
by_path[entry[0]] = [entry[1][0]] + \
2661
[DirState.NULL_PARENT_DETAILS] * parent_count
2662
# TODO: Possibly inline this, since we know it isn't present yet
2663
# id_index[entry[0][2]] = (entry[0],)
2664
self._add_to_id_index(id_index, entry[0])
2666
# now the parent trees:
2667
for tree_index, tree in enumerate(parent_trees):
2668
# the index is off by one, adjust it.
2669
tree_index = tree_index + 1
2670
# when we add new locations for a fileid we need these ranges for
2671
# any fileid in this tree as we set the by_path[id] to:
2672
# already_processed_tree_details + new_details + new_location_suffix
2673
# the suffix is from tree_index+1:parent_count+1.
2674
new_location_suffix = [DirState.NULL_PARENT_DETAILS] * (parent_count - tree_index)
2675
# now stitch in all the entries from this tree
2677
for path, entry in tree.iter_entries_by_dir():
2678
# here we process each trees details for each item in the tree.
2679
# we first update any existing entries for the id at other paths,
2680
# then we either create or update the entry for the id at the
2681
# right path, and finally we add (if needed) a mapping from
2682
# file_id to this path. We do it in this order to allow us to
2683
# avoid checking all known paths for the id when generating a
2684
# new entry at this path: by adding the id->path mapping last,
2685
# all the mappings are valid and have correct relocation
2686
# records where needed.
2687
file_id = entry.file_id
2688
path_utf8 = path.encode('utf8')
2689
dirname, basename = osutils.split(path_utf8)
2690
if dirname == last_dirname:
2691
# Try to re-use objects as much as possible
2692
dirname = last_dirname
2694
last_dirname = dirname
2695
new_entry_key = st(dirname, basename, file_id)
2696
# tree index consistency: All other paths for this id in this tree
2697
# index must point to the correct path.
2698
entry_keys = id_index.get(file_id, ())
2699
for entry_key in entry_keys:
2700
# TODO:PROFILING: It might be faster to just update
2701
# rather than checking if we need to, and then overwrite
2702
# the one we are located at.
2703
if entry_key != new_entry_key:
2704
# this file id is at a different path in one of the
2705
# other trees, so put absent pointers there
2706
# This is the vertical axis in the matrix, all pointing
2708
by_path[entry_key][tree_index] = st(b'r', path_utf8, 0,
2710
# by path consistency: Insert into an existing path record
2711
# (trivial), or add a new one with relocation pointers for the
2712
# other tree indexes.
2713
if new_entry_key in entry_keys:
2714
# there is already an entry where this data belongs, just
2716
by_path[new_entry_key][tree_index] = \
2717
self._inv_entry_to_details(entry)
2719
# add relocated entries to the horizontal axis - this row
2720
# mapping from path,id. We need to look up the correct path
2721
# for the indexes from 0 to tree_index -1
2723
for lookup_index in range(tree_index):
2724
# boundary case: this is the first occurence of file_id
2725
# so there are no id_indexes, possibly take this out of
2727
if not len(entry_keys):
2728
new_details.append(DirState.NULL_PARENT_DETAILS)
2730
# grab any one entry, use it to find the right path.
2731
a_key = next(iter(entry_keys))
2732
if by_path[a_key][lookup_index][0] in (b'r', b'a'):
2733
# its a pointer or missing statement, use it as
2735
new_details.append(by_path[a_key][lookup_index])
2737
# we have the right key, make a pointer to it.
2738
real_path = (b'/'.join(a_key[0:2])).strip(b'/')
2739
new_details.append(st(b'r', real_path, 0, False,
2741
new_details.append(self._inv_entry_to_details(entry))
2742
new_details.extend(new_location_suffix)
2743
by_path[new_entry_key] = new_details
2744
self._add_to_id_index(id_index, new_entry_key)
2745
# --- end generation of full tree mappings
2747
# sort and output all the entries
2748
new_entries = self._sort_entries(viewitems(by_path))
2749
self._entries_to_current_state(new_entries)
2750
self._parents = [rev_id for rev_id, tree in trees]
2751
self._ghosts = list(ghosts)
2752
self._mark_modified(header_modified=True)
2753
self._id_index = id_index
2755
def _sort_entries(self, entry_list):
2756
"""Given a list of entries, sort them into the right order.
2758
This is done when constructing a new dirstate from trees - normally we
2759
try to keep everything in sorted blocks all the time, but sometimes
2760
it's easier to sort after the fact.
2762
# When sorting, we usually have 10x more entries than directories. (69k
2763
# total entries, 4k directories). So cache the results of splitting.
2764
# Saving time and objects. Also, use StaticTuple to avoid putting all
2765
# of these object into python's garbage collector.
2767
def _key(entry, _split_dirs=split_dirs, _st=static_tuple.StaticTuple):
2768
# sort by: directory parts, file name, file id
2769
dirpath, fname, file_id = entry[0]
2771
split = _split_dirs[dirpath]
2773
split = _st.from_sequence(dirpath.split(b'/'))
2774
_split_dirs[dirpath] = split
2775
return _st(split, fname, file_id)
2776
return sorted(entry_list, key=_key)
2778
def set_state_from_inventory(self, new_inv):
2779
"""Set new_inv as the current state.
2781
This API is called by tree transform, and will usually occur with
2782
existing parent trees.
2784
:param new_inv: The inventory object to set current state from.
2786
if 'evil' in debug.debug_flags:
2787
trace.mutter_callsite(1,
2788
"set_state_from_inventory called; please mutate the tree instead")
2789
tracing = 'dirstate' in debug.debug_flags
2791
trace.mutter("set_state_from_inventory trace:")
2792
self._read_dirblocks_if_needed()
2794
# Two iterators: current data and new data, both in dirblock order.
2795
# We zip them together, which tells about entries that are new in the
2796
# inventory, or removed in the inventory, or present in both and
2799
# You might think we could just synthesize a new dirstate directly
2800
# since we're processing it in the right order. However, we need to
2801
# also consider there may be any number of parent trees and relocation
2802
# pointers, and we don't want to duplicate that here.
2803
new_iterator = new_inv.iter_entries_by_dir()
2804
# we will be modifying the dirstate, so we need a stable iterator. In
2805
# future we might write one, for now we just clone the state into a
2806
# list using a copy so that we see every original item and don't have
2807
# to adjust the position when items are inserted or deleted in the
2808
# underlying dirstate.
2809
old_iterator = iter(list(self._iter_entries()))
2810
# both must have roots so this is safe:
2811
current_new = next(new_iterator)
2812
current_old = next(old_iterator)
2813
def advance(iterator):
2815
return next(iterator)
2816
except StopIteration:
2818
while current_new or current_old:
2819
# skip entries in old that are not really there
2820
if current_old and current_old[1][0][0] in (b'a', b'r'):
2821
# relocated or absent
2822
current_old = advance(old_iterator)
2825
# convert new into dirblock style
2826
new_path_utf8 = current_new[0].encode('utf8')
2827
new_dirname, new_basename = osutils.split(new_path_utf8)
2828
new_id = current_new[1].file_id
2829
new_entry_key = (new_dirname, new_basename, new_id)
2830
current_new_minikind = \
2831
DirState._kind_to_minikind[current_new[1].kind]
2832
if current_new_minikind == b't':
2833
fingerprint = current_new[1].reference_revision or b''
2835
# We normally only insert or remove records, or update
2836
# them when it has significantly changed. Then we want to
2837
# erase its fingerprint. Unaffected records should
2838
# normally not be updated at all.
2841
# for safety disable variables
2842
new_path_utf8 = new_dirname = new_basename = new_id = \
2843
new_entry_key = None
2844
# 5 cases, we dont have a value that is strictly greater than everything, so
2845
# we make both end conditions explicit
2847
# old is finished: insert current_new into the state.
2849
trace.mutter("Appending from new '%s'.",
2850
new_path_utf8.decode('utf8'))
2851
self.update_minimal(new_entry_key, current_new_minikind,
2852
executable=current_new[1].executable,
2853
path_utf8=new_path_utf8, fingerprint=fingerprint,
2855
current_new = advance(new_iterator)
2856
elif not current_new:
2859
trace.mutter("Truncating from old '%s/%s'.",
2860
current_old[0][0].decode('utf8'),
2861
current_old[0][1].decode('utf8'))
2862
self._make_absent(current_old)
2863
current_old = advance(old_iterator)
2864
elif new_entry_key == current_old[0]:
2865
# same - common case
2866
# We're looking at the same path and id in both the dirstate
2867
# and inventory, so just need to update the fields in the
2868
# dirstate from the one in the inventory.
2869
# TODO: update the record if anything significant has changed.
2870
# the minimal required trigger is if the execute bit or cached
2872
if (current_old[1][0][3] != current_new[1].executable or
2873
current_old[1][0][0] != current_new_minikind):
2875
trace.mutter("Updating in-place change '%s'.",
2876
new_path_utf8.decode('utf8'))
2877
self.update_minimal(current_old[0], current_new_minikind,
2878
executable=current_new[1].executable,
2879
path_utf8=new_path_utf8, fingerprint=fingerprint,
2881
# both sides are dealt with, move on
2882
current_old = advance(old_iterator)
2883
current_new = advance(new_iterator)
2884
elif (lt_by_dirs(new_dirname, current_old[0][0])
2885
or (new_dirname == current_old[0][0]
2886
and new_entry_key[1:] < current_old[0][1:])):
2888
# add a entry for this and advance new
2890
trace.mutter("Inserting from new '%s'.",
2891
new_path_utf8.decode('utf8'))
2892
self.update_minimal(new_entry_key, current_new_minikind,
2893
executable=current_new[1].executable,
2894
path_utf8=new_path_utf8, fingerprint=fingerprint,
2896
current_new = advance(new_iterator)
2898
# we've advanced past the place where the old key would be,
2899
# without seeing it in the new list. so it must be gone.
2901
trace.mutter("Deleting from old '%s/%s'.",
2902
current_old[0][0].decode('utf8'),
2903
current_old[0][1].decode('utf8'))
2904
self._make_absent(current_old)
2905
current_old = advance(old_iterator)
2906
self._mark_modified()
2907
self._id_index = None
2908
self._packed_stat_index = None
2910
trace.mutter("set_state_from_inventory complete.")
2912
def set_state_from_scratch(self, working_inv, parent_trees, parent_ghosts):
2913
"""Wipe the currently stored state and set it to something new.
2915
This is a hard-reset for the data we are working with.
2917
# Technically, we really want a write lock, but until we write, we
2918
# don't really need it.
2919
self._requires_lock()
2920
# root dir and root dir contents with no children. We have to have a
2921
# root for set_state_from_inventory to work correctly.
2922
empty_root = ((b'', b'', inventory.ROOT_ID),
2923
[(b'd', b'', 0, False, DirState.NULLSTAT)])
2924
empty_tree_dirblocks = [(b'', [empty_root]), (b'', [])]
2925
self._set_data([], empty_tree_dirblocks)
2926
self.set_state_from_inventory(working_inv)
2927
self.set_parent_trees(parent_trees, parent_ghosts)
2929
def _make_absent(self, current_old):
2930
"""Mark current_old - an entry - as absent for tree 0.
2932
:return: True if this was the last details entry for the entry key:
2933
that is, if the underlying block has had the entry removed, thus
2934
shrinking in length.
2936
# build up paths that this id will be left at after the change is made,
2937
# so we can update their cross references in tree 0
2938
all_remaining_keys = set()
2939
# Dont check the working tree, because it's going.
2940
for details in current_old[1][1:]:
2941
if details[0] not in (b'a', b'r'): # absent, relocated
2942
all_remaining_keys.add(current_old[0])
2943
elif details[0] == b'r': # relocated
2944
# record the key for the real path.
2945
all_remaining_keys.add(tuple(osutils.split(details[1])) + (current_old[0][2],))
2946
# absent rows are not present at any path.
2947
last_reference = current_old[0] not in all_remaining_keys
2949
# the current row consists entire of the current item (being marked
2950
# absent), and relocated or absent entries for the other trees:
2951
# Remove it, its meaningless.
2952
block = self._find_block(current_old[0])
2953
entry_index, present = self._find_entry_index(current_old[0], block[1])
2955
raise AssertionError('could not find entry for %s' % (current_old,))
2956
block[1].pop(entry_index)
2957
# if we have an id_index in use, remove this key from it for this id.
2958
if self._id_index is not None:
2959
self._remove_from_id_index(self._id_index, current_old[0])
2960
# update all remaining keys for this id to record it as absent. The
2961
# existing details may either be the record we are marking as deleted
2962
# (if there were other trees with the id present at this path), or may
2964
for update_key in all_remaining_keys:
2965
update_block_index, present = \
2966
self._find_block_index_from_key(update_key)
2968
raise AssertionError('could not find block for %s' % (update_key,))
2969
update_entry_index, present = \
2970
self._find_entry_index(update_key, self._dirblocks[update_block_index][1])
2972
raise AssertionError('could not find entry for %s' % (update_key,))
2973
update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]
2974
# it must not be absent at the moment
2975
if update_tree_details[0][0] == b'a': # absent
2976
raise AssertionError('bad row %r' % (update_tree_details,))
2977
update_tree_details[0] = DirState.NULL_PARENT_DETAILS
2978
self._mark_modified()
2979
return last_reference
2981
def update_minimal(self, key, minikind, executable=False, fingerprint=b'',
2982
packed_stat=None, size=0, path_utf8=None, fullscan=False):
2983
"""Update an entry to the state in tree 0.
2985
This will either create a new entry at 'key' or update an existing one.
2986
It also makes sure that any other records which might mention this are
2989
:param key: (dir, name, file_id) for the new entry
2990
:param minikind: The type for the entry ('f' == 'file', 'd' ==
2992
:param executable: Should the executable bit be set?
2993
:param fingerprint: Simple fingerprint for new entry: canonical-form
2994
sha1 for files, referenced revision id for subtrees, etc.
2995
:param packed_stat: Packed stat value for new entry.
2996
:param size: Size information for new entry
2997
:param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing
2999
:param fullscan: If True then a complete scan of the dirstate is being
3000
done and checking for duplicate rows should not be done. This
3001
should only be set by set_state_from_inventory and similar methods.
3003
If packed_stat and fingerprint are not given, they're invalidated in
3006
block = self._find_block(key)[1]
3007
if packed_stat is None:
3008
packed_stat = DirState.NULLSTAT
3009
# XXX: Some callers pass '' as the packed_stat, and it seems to be
3010
# sometimes present in the dirstate - this seems oddly inconsistent.
3012
entry_index, present = self._find_entry_index(key, block)
3013
new_details = (minikind, fingerprint, size, executable, packed_stat)
3014
id_index = self._get_id_index()
3016
# New record. Check there isn't a entry at this path already.
3018
low_index, _ = self._find_entry_index(key[0:2] + (b'',), block)
3019
while low_index < len(block):
3020
entry = block[low_index]
3021
if entry[0][0:2] == key[0:2]:
3022
if entry[1][0][0] not in (b'a', b'r'):
3023
# This entry has the same path (but a different id) as
3024
# the new entry we're adding, and is present in ths
3026
self._raise_invalid(
3027
(b"%s/%s" % key[0:2]).decode('utf8'), key[2],
3028
"Attempt to add item at path already occupied by "
3029
"id %r" % entry[0][2])
3033
# new entry, synthesis cross reference here,
3034
existing_keys = id_index.get(key[2], ())
3035
if not existing_keys:
3036
# not currently in the state, simplest case
3037
new_entry = key, [new_details] + self._empty_parent_info()
3039
# present at one or more existing other paths.
3040
# grab one of them and use it to generate parent
3041
# relocation/absent entries.
3042
new_entry = key, [new_details]
3043
# existing_keys can be changed as we iterate.
3044
for other_key in tuple(existing_keys):
3045
# change the record at other to be a pointer to this new
3046
# record. The loop looks similar to the change to
3047
# relocations when updating an existing record but its not:
3048
# the test for existing kinds is different: this can be
3049
# factored out to a helper though.
3050
other_block_index, present = self._find_block_index_from_key(
3053
raise AssertionError('could not find block for %s' % (
3055
other_block = self._dirblocks[other_block_index][1]
3056
other_entry_index, present = self._find_entry_index(
3057
other_key, other_block)
3059
raise AssertionError(
3060
'update_minimal: could not find other entry for %s'
3062
if path_utf8 is None:
3063
raise AssertionError('no path')
3064
# Turn this other location into a reference to the new
3065
# location. This also updates the aliased iterator
3066
# (current_old in set_state_from_inventory) so that the old
3067
# entry, if not already examined, is skipped over by that
3069
other_entry = other_block[other_entry_index]
3070
other_entry[1][0] = (b'r', path_utf8, 0, False, b'')
3071
if self._maybe_remove_row(other_block, other_entry_index,
3073
# If the row holding this was removed, we need to
3074
# recompute where this entry goes
3075
entry_index, _ = self._find_entry_index(key, block)
3078
# adds a tuple to the new details for each column
3079
# - either by copying an existing relocation pointer inside that column
3080
# - or by creating a new pointer to the right row inside that column
3081
num_present_parents = self._num_present_parents()
3082
if num_present_parents:
3083
# TODO: This re-evaluates the existing_keys set, do we need
3084
# to do that ourselves?
3085
other_key = list(existing_keys)[0]
3086
for lookup_index in range(1, num_present_parents + 1):
3087
# grab any one entry, use it to find the right path.
3088
# TODO: optimise this to reduce memory use in highly
3089
# fragmented situations by reusing the relocation
3091
update_block_index, present = \
3092
self._find_block_index_from_key(other_key)
3094
raise AssertionError('could not find block for %s' % (other_key,))
3095
update_entry_index, present = \
3096
self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
3098
raise AssertionError('update_minimal: could not find entry for %s' % (other_key,))
3099
update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
3100
if update_details[0] in (b'a', b'r'): # relocated, absent
3101
# its a pointer or absent in lookup_index's tree, use
3103
new_entry[1].append(update_details)
3105
# we have the right key, make a pointer to it.
3106
pointer_path = osutils.pathjoin(*other_key[0:2])
3107
new_entry[1].append((b'r', pointer_path, 0, False, b''))
3108
block.insert(entry_index, new_entry)
3109
self._add_to_id_index(id_index, key)
3111
# Does the new state matter?
3112
block[entry_index][1][0] = new_details
3113
# parents cannot be affected by what we do.
3114
# other occurences of this id can be found
3115
# from the id index.
3117
# tree index consistency: All other paths for this id in this tree
3118
# index must point to the correct path. We have to loop here because
3119
# we may have passed entries in the state with this file id already
3120
# that were absent - where parent entries are - and they need to be
3121
# converted to relocated.
3122
if path_utf8 is None:
3123
raise AssertionError('no path')
3124
existing_keys = id_index.get(key[2], ())
3125
if key not in existing_keys:
3126
raise AssertionError('We found the entry in the blocks, but'
3127
' the key is not in the id_index.'
3128
' key: %s, existing_keys: %s' % (key, existing_keys))
3129
for entry_key in existing_keys:
3130
# TODO:PROFILING: It might be faster to just update
3131
# rather than checking if we need to, and then overwrite
3132
# the one we are located at.
3133
if entry_key != key:
3134
# this file id is at a different path in one of the
3135
# other trees, so put absent pointers there
3136
# This is the vertical axis in the matrix, all pointing
3138
block_index, present = self._find_block_index_from_key(entry_key)
3140
raise AssertionError('not present: %r', entry_key)
3141
entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])
3143
raise AssertionError('not present: %r', entry_key)
3144
self._dirblocks[block_index][1][entry_index][1][0] = \
3145
(b'r', path_utf8, 0, False, b'')
3146
# add a containing dirblock if needed.
3147
if new_details[0] == b'd':
3148
# GZ 2017-06-09: Using pathjoin why?
3149
subdir_key = (osutils.pathjoin(*key[0:2]), b'', b'')
3150
block_index, present = self._find_block_index_from_key(subdir_key)
3152
self._dirblocks.insert(block_index, (subdir_key[0], []))
3154
self._mark_modified()
3156
def _maybe_remove_row(self, block, index, id_index):
3157
"""Remove index if it is absent or relocated across the row.
3159
id_index is updated accordingly.
3160
:return: True if we removed the row, False otherwise
3162
present_in_row = False
3163
entry = block[index]
3164
for column in entry[1]:
3165
if column[0] not in (b'a', b'r'):
3166
present_in_row = True
3168
if not present_in_row:
3170
self._remove_from_id_index(id_index, entry[0])
3174
def _validate(self):
3175
"""Check that invariants on the dirblock are correct.
3177
This can be useful in debugging; it shouldn't be necessary in
3180
This must be called with a lock held.
3182
# NOTE: This must always raise AssertionError not just assert,
3183
# otherwise it may not behave properly under python -O
3185
# TODO: All entries must have some content that's not 'a' or 'r',
3186
# otherwise it could just be removed.
3188
# TODO: All relocations must point directly to a real entry.
3190
# TODO: No repeated keys.
3193
from pprint import pformat
3194
self._read_dirblocks_if_needed()
3195
if len(self._dirblocks) > 0:
3196
if not self._dirblocks[0][0] == b'':
3197
raise AssertionError(
3198
"dirblocks don't start with root block:\n" + \
3199
pformat(self._dirblocks))
3200
if len(self._dirblocks) > 1:
3201
if not self._dirblocks[1][0] == b'':
3202
raise AssertionError(
3203
"dirblocks missing root directory:\n" + \
3204
pformat(self._dirblocks))
3205
# the dirblocks are sorted by their path components, name, and dir id
3206
dir_names = [d[0].split(b'/')
3207
for d in self._dirblocks[1:]]
3208
if dir_names != sorted(dir_names):
3209
raise AssertionError(
3210
"dir names are not in sorted order:\n" + \
3211
pformat(self._dirblocks) + \
3214
for dirblock in self._dirblocks:
3215
# within each dirblock, the entries are sorted by filename and
3217
for entry in dirblock[1]:
3218
if dirblock[0] != entry[0][0]:
3219
raise AssertionError(
3221
"doesn't match directory name in\n%r" %
3222
(entry, pformat(dirblock)))
3223
if dirblock[1] != sorted(dirblock[1]):
3224
raise AssertionError(
3225
"dirblock for %r is not sorted:\n%s" % \
3226
(dirblock[0], pformat(dirblock)))
3228
def check_valid_parent():
3229
"""Check that the current entry has a valid parent.
3231
This makes sure that the parent has a record,
3232
and that the parent isn't marked as "absent" in the
3233
current tree. (It is invalid to have a non-absent file in an absent
3236
if entry[0][0:2] == (b'', b''):
3237
# There should be no parent for the root row
3239
parent_entry = self._get_entry(tree_index, path_utf8=entry[0][0])
3240
if parent_entry == (None, None):
3241
raise AssertionError(
3242
"no parent entry for: %s in tree %s"
3243
% (this_path, tree_index))
3244
if parent_entry[1][tree_index][0] != b'd':
3245
raise AssertionError(
3246
"Parent entry for %s is not marked as a valid"
3247
" directory. %s" % (this_path, parent_entry,))
3249
# For each file id, for each tree: either
3250
# the file id is not present at all; all rows with that id in the
3251
# key have it marked as 'absent'
3252
# OR the file id is present under exactly one name; any other entries
3253
# that mention that id point to the correct name.
3255
# We check this with a dict per tree pointing either to the present
3256
# name, or None if absent.
3257
tree_count = self._num_present_parents() + 1
3258
id_path_maps = [{} for _ in range(tree_count)]
3259
# Make sure that all renamed entries point to the correct location.
3260
for entry in self._iter_entries():
3261
file_id = entry[0][2]
3262
this_path = osutils.pathjoin(entry[0][0], entry[0][1])
3263
if len(entry[1]) != tree_count:
3264
raise AssertionError(
3265
"wrong number of entry details for row\n%s" \
3266
",\nexpected %d" % \
3267
(pformat(entry), tree_count))
3268
absent_positions = 0
3269
for tree_index, tree_state in enumerate(entry[1]):
3270
this_tree_map = id_path_maps[tree_index]
3271
minikind = tree_state[0]
3272
if minikind in (b'a', b'r'):
3273
absent_positions += 1
3274
# have we seen this id before in this column?
3275
if file_id in this_tree_map:
3276
previous_path, previous_loc = this_tree_map[file_id]
3277
# any later mention of this file must be consistent with
3278
# what was said before
3279
if minikind == b'a':
3280
if previous_path is not None:
3281
raise AssertionError(
3282
"file %s is absent in row %r but also present " \
3284
(file_id, entry, previous_path))
3285
elif minikind == b'r':
3286
target_location = tree_state[1]
3287
if previous_path != target_location:
3288
raise AssertionError(
3289
"file %s relocation in row %r but also at %r" \
3290
% (file_id, entry, previous_path))
3292
# a file, directory, etc - may have been previously
3293
# pointed to by a relocation, which must point here
3294
if previous_path != this_path:
3295
raise AssertionError(
3296
"entry %r inconsistent with previous path %r "
3298
(entry, previous_path, previous_loc))
3299
check_valid_parent()
3301
if minikind == b'a':
3302
# absent; should not occur anywhere else
3303
this_tree_map[file_id] = None, this_path
3304
elif minikind == b'r':
3305
# relocation, must occur at expected location
3306
this_tree_map[file_id] = tree_state[1], this_path
3308
this_tree_map[file_id] = this_path, this_path
3309
check_valid_parent()
3310
if absent_positions == tree_count:
3311
raise AssertionError(
3312
"entry %r has no data for any tree." % (entry,))
3313
if self._id_index is not None:
3314
for file_id, entry_keys in viewitems(self._id_index):
3315
for entry_key in entry_keys:
3316
# Check that the entry in the map is pointing to the same
3318
if entry_key[2] != file_id:
3319
raise AssertionError(
3320
'file_id %r did not match entry key %s'
3321
% (file_id, entry_key))
3322
# And that from this entry key, we can look up the original
3324
block_index, present = self._find_block_index_from_key(entry_key)
3326
raise AssertionError('missing block for entry key: %r', entry_key)
3327
entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])
3329
raise AssertionError('missing entry for key: %r', entry_key)
3330
if len(entry_keys) != len(set(entry_keys)):
3331
raise AssertionError(
3332
'id_index contained non-unique data for %s'
3335
def _wipe_state(self):
3336
"""Forget all state information about the dirstate."""
3337
self._header_state = DirState.NOT_IN_MEMORY
3338
self._dirblock_state = DirState.NOT_IN_MEMORY
3339
self._changes_aborted = False
3342
self._dirblocks = []
3343
self._id_index = None
3344
self._packed_stat_index = None
3345
self._end_of_header = None
3346
self._cutoff_time = None
3347
self._split_path_cache = {}
3349
def lock_read(self):
3350
"""Acquire a read lock on the dirstate."""
3351
if self._lock_token is not None:
3352
raise errors.LockContention(self._lock_token)
3353
# TODO: jam 20070301 Rather than wiping completely, if the blocks are
3354
# already in memory, we could read just the header and check for
3355
# any modification. If not modified, we can just leave things
3357
self._lock_token = lock.ReadLock(self._filename)
3358
self._lock_state = 'r'
3359
self._state_file = self._lock_token.f
3361
return lock.LogicalLockResult(self.unlock)
3363
def lock_write(self):
3364
"""Acquire a write lock on the dirstate."""
3365
if self._lock_token is not None:
3366
raise errors.LockContention(self._lock_token)
3367
# TODO: jam 20070301 Rather than wiping completely, if the blocks are
3368
# already in memory, we could read just the header and check for
3369
# any modification. If not modified, we can just leave things
3371
self._lock_token = lock.WriteLock(self._filename)
3372
self._lock_state = 'w'
3373
self._state_file = self._lock_token.f
3375
return lock.LogicalLockResult(self.unlock, self._lock_token)
3378
"""Drop any locks held on the dirstate."""
3379
if self._lock_token is None:
3380
raise errors.LockNotHeld(self)
3381
# TODO: jam 20070301 Rather than wiping completely, if the blocks are
3382
# already in memory, we could read just the header and check for
3383
# any modification. If not modified, we can just leave things
3385
self._state_file = None
3386
self._lock_state = None
3387
self._lock_token.unlock()
3388
self._lock_token = None
3389
self._split_path_cache = {}
3391
def _requires_lock(self):
3392
"""Check that a lock is currently held by someone on the dirstate."""
3393
if not self._lock_token:
3394
raise errors.ObjectNotLocked(self)
3397
def py_update_entry(state, entry, abspath, stat_value,
3398
_stat_to_minikind=DirState._stat_to_minikind):
3399
"""Update the entry based on what is actually on disk.
3401
This function only calculates the sha if it needs to - if the entry is
3402
uncachable, or clearly different to the first parent's entry, no sha
3403
is calculated, and None is returned.
3405
:param state: The dirstate this entry is in.
3406
:param entry: This is the dirblock entry for the file in question.
3407
:param abspath: The path on disk for this file.
3408
:param stat_value: The stat value done on the path.
3409
:return: None, or The sha1 hexdigest of the file (40 bytes) or link
3410
target of a symlink.
3413
minikind = _stat_to_minikind[stat_value.st_mode & 0o170000]
3417
packed_stat = pack_stat(stat_value)
3418
(saved_minikind, saved_link_or_sha1, saved_file_size,
3419
saved_executable, saved_packed_stat) = entry[1][0]
3421
if minikind == b'd' and saved_minikind == b't':
3423
if (minikind == saved_minikind
3424
and packed_stat == saved_packed_stat):
3425
# The stat hasn't changed since we saved, so we can re-use the
3427
if minikind == b'd':
3430
# size should also be in packed_stat
3431
if saved_file_size == stat_value.st_size:
3432
return saved_link_or_sha1
3434
# If we have gotten this far, that means that we need to actually
3435
# process this entry.
3438
if minikind == b'f':
3439
executable = state._is_executable(stat_value.st_mode,
3441
if state._cutoff_time is None:
3442
state._sha_cutoff_time()
3443
if (stat_value.st_mtime < state._cutoff_time
3444
and stat_value.st_ctime < state._cutoff_time
3445
and len(entry[1]) > 1
3446
and entry[1][1][0] != b'a'):
3447
# Could check for size changes for further optimised
3448
# avoidance of sha1's. However the most prominent case of
3449
# over-shaing is during initial add, which this catches.
3450
# Besides, if content filtering happens, size and sha
3451
# are calculated at the same time, so checking just the size
3452
# gains nothing w.r.t. performance.
3453
link_or_sha1 = state._sha1_file(abspath)
3454
entry[1][0] = (b'f', link_or_sha1, stat_value.st_size,
3455
executable, packed_stat)
3457
entry[1][0] = (b'f', b'', stat_value.st_size,
3458
executable, DirState.NULLSTAT)
3459
worth_saving = False
3460
elif minikind == b'd':
3462
entry[1][0] = (b'd', b'', 0, False, packed_stat)
3463
if saved_minikind != b'd':
3464
# This changed from something into a directory. Make sure we
3465
# have a directory block for it. This doesn't happen very
3466
# often, so this doesn't have to be super fast.
3467
block_index, entry_index, dir_present, file_present = \
3468
state._get_block_entry_index(entry[0][0], entry[0][1], 0)
3469
state._ensure_block(block_index, entry_index,
3470
osutils.pathjoin(entry[0][0], entry[0][1]))
3472
worth_saving = False
3473
elif minikind == b'l':
3474
if saved_minikind == b'l':
3475
worth_saving = False
3476
link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)
3477
if state._cutoff_time is None:
3478
state._sha_cutoff_time()
3479
if (stat_value.st_mtime < state._cutoff_time
3480
and stat_value.st_ctime < state._cutoff_time):
3481
entry[1][0] = (b'l', link_or_sha1, stat_value.st_size,
3484
entry[1][0] = (b'l', b'', stat_value.st_size,
3485
False, DirState.NULLSTAT)
3487
state._mark_modified([entry])
3491
class ProcessEntryPython(object):
3493
__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
3494
"last_source_parent", "last_target_parent", "include_unchanged",
3495
"partial", "use_filesystem_for_exec", "utf8_decode",
3496
"searched_specific_files", "search_specific_files",
3497
"searched_exact_paths", "search_specific_file_parents", "seen_ids",
3498
"state", "source_index", "target_index", "want_unversioned", "tree"]
3500
def __init__(self, include_unchanged, use_filesystem_for_exec,
3501
search_specific_files, state, source_index, target_index,
3502
want_unversioned, tree):
3503
self.old_dirname_to_file_id = {}
3504
self.new_dirname_to_file_id = {}
3505
# Are we doing a partial iter_changes?
3506
self.partial = search_specific_files != {''}
3507
# Using a list so that we can access the values and change them in
3508
# nested scope. Each one is [path, file_id, entry]
3509
self.last_source_parent = [None, None]
3510
self.last_target_parent = [None, None]
3511
self.include_unchanged = include_unchanged
3512
self.use_filesystem_for_exec = use_filesystem_for_exec
3513
self.utf8_decode = cache_utf8._utf8_decode
3514
# for all search_indexs in each path at or under each element of
3515
# search_specific_files, if the detail is relocated: add the id, and
3516
# add the relocated path as one to search if its not searched already.
3517
# If the detail is not relocated, add the id.
3518
self.searched_specific_files = set()
3519
# When we search exact paths without expanding downwards, we record
3521
self.searched_exact_paths = set()
3522
self.search_specific_files = search_specific_files
3523
# The parents up to the root of the paths we are searching.
3524
# After all normal paths are returned, these specific items are returned.
3525
self.search_specific_file_parents = set()
3526
# The ids we've sent out in the delta.
3527
self.seen_ids = set()
3529
self.source_index = source_index
3530
self.target_index = target_index
3531
if target_index != 0:
3532
# A lot of code in here depends on target_index == 0
3533
raise errors.BzrError('unsupported target index')
3534
self.want_unversioned = want_unversioned
3537
def _process_entry(self, entry, path_info, pathjoin=osutils.pathjoin):
3538
"""Compare an entry and real disk to generate delta information.
3540
:param path_info: top_relpath, basename, kind, lstat, abspath for
3541
the path of entry. If None, then the path is considered absent in
3542
the target (Perhaps we should pass in a concrete entry for this ?)
3543
Basename is returned as a utf8 string because we expect this
3544
tuple will be ignored, and don't want to take the time to
3546
:return: (iter_changes_result, changed). If the entry has not been
3547
handled then changed is None. Otherwise it is False if no content
3548
or metadata changes have occurred, and True if any content or
3549
metadata change has occurred. If self.include_unchanged is True then
3550
if changed is not None, iter_changes_result will always be a result
3551
tuple. Otherwise, iter_changes_result is None unless changed is
3554
if self.source_index is None:
3555
source_details = DirState.NULL_PARENT_DETAILS
3557
source_details = entry[1][self.source_index]
3558
# GZ 2017-06-09: Eck, more sets.
3559
_fdltr = {b'f', b'd', b'l', b't', b'r'}
3560
_fdlt = {b'f', b'd', b'l', b't'}
3562
target_details = entry[1][self.target_index]
3563
target_minikind = target_details[0]
3564
if path_info is not None and target_minikind in _fdlt:
3565
if not (self.target_index == 0):
3566
raise AssertionError()
3567
link_or_sha1 = update_entry(self.state, entry,
3568
abspath=path_info[4], stat_value=path_info[3])
3569
# The entry may have been modified by update_entry
3570
target_details = entry[1][self.target_index]
3571
target_minikind = target_details[0]
3574
file_id = entry[0][2]
3575
source_minikind = source_details[0]
3576
if source_minikind in _fdltr and target_minikind in _fdlt:
3577
# claimed content in both: diff
3578
# r | fdlt | | add source to search, add id path move and perform
3579
# | | | diff check on source-target
3580
# r | fdlt | a | dangling file that was present in the basis.
3582
if source_minikind == b'r':
3583
# add the source to the search path to find any children it
3584
# has. TODO ? : only add if it is a container ?
3585
if not osutils.is_inside_any(self.searched_specific_files,
3587
self.search_specific_files.add(source_details[1])
3588
# generate the old path; this is needed for stating later
3590
old_path = source_details[1]
3591
old_dirname, old_basename = os.path.split(old_path)
3592
path = pathjoin(entry[0][0], entry[0][1])
3593
old_entry = self.state._get_entry(self.source_index,
3595
# update the source details variable to be the real
3597
if old_entry == (None, None):
3598
raise DirstateCorrupt(self.state._filename,
3599
"entry '%s/%s' is considered renamed from %r"
3600
" but source does not exist\n"
3601
"entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
3602
source_details = old_entry[1][self.source_index]
3603
source_minikind = source_details[0]
3605
old_dirname = entry[0][0]
3606
old_basename = entry[0][1]
3607
old_path = path = None
3608
if path_info is None:
3609
# the file is missing on disk, show as removed.
3610
content_change = True
3614
# source and target are both versioned and disk file is present.
3615
target_kind = path_info[2]
3616
if target_kind == 'directory':
3618
old_path = path = pathjoin(old_dirname, old_basename)
3619
self.new_dirname_to_file_id[path] = file_id
3620
if source_minikind != b'd':
3621
content_change = True
3623
# directories have no fingerprint
3624
content_change = False
3626
elif target_kind == 'file':
3627
if source_minikind != b'f':
3628
content_change = True
3630
# Check the sha. We can't just rely on the size as
3631
# content filtering may mean differ sizes actually
3632
# map to the same content
3633
if link_or_sha1 is None:
3635
statvalue, link_or_sha1 = \
3636
self.state._sha1_provider.stat_and_sha1(
3638
self.state._observed_sha1(entry, link_or_sha1,
3640
content_change = (link_or_sha1 != source_details[1])
3641
# Target details is updated at update_entry time
3642
if self.use_filesystem_for_exec:
3643
# We don't need S_ISREG here, because we are sure
3644
# we are dealing with a file.
3645
target_exec = bool(stat.S_IEXEC & path_info[3].st_mode)
3647
target_exec = target_details[3]
3648
elif target_kind == 'symlink':
3649
if source_minikind != b'l':
3650
content_change = True
3652
content_change = (link_or_sha1 != source_details[1])
3654
elif target_kind == 'tree-reference':
3655
if source_minikind != b't':
3656
content_change = True
3658
content_change = False
3662
path = pathjoin(old_dirname, old_basename)
3663
raise errors.BadFileKindError(path, path_info[2])
3664
if source_minikind == b'd':
3666
old_path = path = pathjoin(old_dirname, old_basename)
3667
self.old_dirname_to_file_id[old_path] = file_id
3668
# parent id is the entry for the path in the target tree
3669
if old_basename and old_dirname == self.last_source_parent[0]:
3670
source_parent_id = self.last_source_parent[1]
3673
source_parent_id = self.old_dirname_to_file_id[old_dirname]
3675
source_parent_entry = self.state._get_entry(self.source_index,
3676
path_utf8=old_dirname)
3677
source_parent_id = source_parent_entry[0][2]
3678
if source_parent_id == entry[0][2]:
3679
# This is the root, so the parent is None
3680
source_parent_id = None
3682
self.last_source_parent[0] = old_dirname
3683
self.last_source_parent[1] = source_parent_id
3684
new_dirname = entry[0][0]
3685
if entry[0][1] and new_dirname == self.last_target_parent[0]:
3686
target_parent_id = self.last_target_parent[1]
3689
target_parent_id = self.new_dirname_to_file_id[new_dirname]
3691
# TODO: We don't always need to do the lookup, because the
3692
# parent entry will be the same as the source entry.
3693
target_parent_entry = self.state._get_entry(self.target_index,
3694
path_utf8=new_dirname)
3695
if target_parent_entry == (None, None):
3696
raise AssertionError(
3697
"Could not find target parent in wt: %s\nparent of: %s"
3698
% (new_dirname, entry))
3699
target_parent_id = target_parent_entry[0][2]
3700
if target_parent_id == entry[0][2]:
3701
# This is the root, so the parent is None
3702
target_parent_id = None
3704
self.last_target_parent[0] = new_dirname
3705
self.last_target_parent[1] = target_parent_id
3707
source_exec = source_details[3]
3708
changed = (content_change
3709
or source_parent_id != target_parent_id
3710
or old_basename != entry[0][1]
3711
or source_exec != target_exec
3713
if not changed and not self.include_unchanged:
3716
if old_path is None:
3717
old_path = path = pathjoin(old_dirname, old_basename)
3718
old_path_u = self.utf8_decode(old_path)[0]
3721
old_path_u = self.utf8_decode(old_path)[0]
3722
if old_path == path:
3725
path_u = self.utf8_decode(path)[0]
3726
source_kind = DirState._minikind_to_kind[source_minikind]
3727
return (entry[0][2],
3728
(old_path_u, path_u),
3731
(source_parent_id, target_parent_id),
3732
(self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
3733
(source_kind, target_kind),
3734
(source_exec, target_exec)), changed
3735
elif source_minikind in b'a' and target_minikind in _fdlt:
3736
# looks like a new file
3737
path = pathjoin(entry[0][0], entry[0][1])
3738
# parent id is the entry for the path in the target tree
3739
# TODO: these are the same for an entire directory: cache em.
3740
parent_id = self.state._get_entry(self.target_index,
3741
path_utf8=entry[0][0])[0][2]
3742
if parent_id == entry[0][2]:
3744
if path_info is not None:
3746
if self.use_filesystem_for_exec:
3747
# We need S_ISREG here, because we aren't sure if this
3750
stat.S_ISREG(path_info[3].st_mode)
3751
and stat.S_IEXEC & path_info[3].st_mode)
3753
target_exec = target_details[3]
3754
return (entry[0][2],
3755
(None, self.utf8_decode(path)[0]),
3759
(None, self.utf8_decode(entry[0][1])[0]),
3760
(None, path_info[2]),
3761
(None, target_exec)), True
3763
# Its a missing file, report it as such.
3764
return (entry[0][2],
3765
(None, self.utf8_decode(path)[0]),
3769
(None, self.utf8_decode(entry[0][1])[0]),
3771
(None, False)), True
3772
elif source_minikind in _fdlt and target_minikind in b'a':
3773
# unversioned, possibly, or possibly not deleted: we dont care.
3774
# if its still on disk, *and* theres no other entry at this
3775
# path [we dont know this in this routine at the moment -
3776
# perhaps we should change this - then it would be an unknown.
3777
old_path = pathjoin(entry[0][0], entry[0][1])
3778
# parent id is the entry for the path in the target tree
3779
parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]
3780
if parent_id == entry[0][2]:
3782
return (entry[0][2],
3783
(self.utf8_decode(old_path)[0], None),
3787
(self.utf8_decode(entry[0][1])[0], None),
3788
(DirState._minikind_to_kind[source_minikind], None),
3789
(source_details[3], None)), True
3790
elif source_minikind in _fdlt and target_minikind in b'r':
3791
# a rename; could be a true rename, or a rename inherited from
3792
# a renamed parent. TODO: handle this efficiently. Its not
3793
# common case to rename dirs though, so a correct but slow
3794
# implementation will do.
3795
if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
3796
self.search_specific_files.add(target_details[1])
3797
elif source_minikind in _ra and target_minikind in _ra:
3798
# neither of the selected trees contain this file,
3799
# so skip over it. This is not currently directly tested, but
3800
# is indirectly via test_too_much.TestCommands.test_conflicts.
3803
raise AssertionError("don't know how to compare "
3804
"source_minikind=%r, target_minikind=%r"
3805
% (source_minikind, target_minikind))
3811
def _gather_result_for_consistency(self, result):
3812
"""Check a result we will yield to make sure we are consistent later.
3814
This gathers result's parents into a set to output later.
3816
:param result: A result tuple.
3818
if not self.partial or not result[0]:
3820
self.seen_ids.add(result[0])
3821
new_path = result[1][1]
3823
# Not the root and not a delete: queue up the parents of the path.
3824
self.search_specific_file_parents.update(
3825
p.encode('utf8') for p in osutils.parent_directories(new_path))
3826
# Add the root directory which parent_directories does not
3828
self.search_specific_file_parents.add(b'')
3830
def iter_changes(self):
3831
"""Iterate over the changes."""
3832
utf8_decode = cache_utf8._utf8_decode
3833
_lt_by_dirs = lt_by_dirs
3834
_process_entry = self._process_entry
3835
search_specific_files = self.search_specific_files
3836
searched_specific_files = self.searched_specific_files
3837
splitpath = osutils.splitpath
3839
# compare source_index and target_index at or under each element of search_specific_files.
3840
# follow the following comparison table. Note that we only want to do diff operations when
3841
# the target is fdl because thats when the walkdirs logic will have exposed the pathinfo
3845
# Source | Target | disk | action
3846
# r | fdlt | | add source to search, add id path move and perform
3847
# | | | diff check on source-target
3848
# r | fdlt | a | dangling file that was present in the basis.
3850
# r | a | | add source to search
3852
# r | r | | this path is present in a non-examined tree, skip.
3853
# r | r | a | this path is present in a non-examined tree, skip.
3854
# a | fdlt | | add new id
3855
# a | fdlt | a | dangling locally added file, skip
3856
# a | a | | not present in either tree, skip
3857
# a | a | a | not present in any tree, skip
3858
# a | r | | not present in either tree at this path, skip as it
3859
# | | | may not be selected by the users list of paths.
3860
# a | r | a | not present in either tree at this path, skip as it
3861
# | | | may not be selected by the users list of paths.
3862
# fdlt | fdlt | | content in both: diff them
3863
# fdlt | fdlt | a | deleted locally, but not unversioned - show as deleted ?
3864
# fdlt | a | | unversioned: output deleted id for now
3865
# fdlt | a | a | unversioned and deleted: output deleted id
3866
# fdlt | r | | relocated in this tree, so add target to search.
3867
# | | | Dont diff, we will see an r,fd; pair when we reach
3868
# | | | this id at the other path.
3869
# fdlt | r | a | relocated in this tree, so add target to search.
3870
# | | | Dont diff, we will see an r,fd; pair when we reach
3871
# | | | this id at the other path.
3873
# TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
3874
# keeping a cache of directories that we have seen.
3876
while search_specific_files:
3877
# TODO: the pending list should be lexically sorted? the
3878
# interface doesn't require it.
3879
current_root = search_specific_files.pop()
3880
current_root_unicode = current_root.decode('utf8')
3881
searched_specific_files.add(current_root)
3882
# process the entries for this containing directory: the rest will be
3883
# found by their parents recursively.
3884
root_entries = self.state._entries_for_path(current_root)
3885
root_abspath = self.tree.abspath(current_root_unicode)
3887
root_stat = os.lstat(root_abspath)
3888
except OSError as e:
3889
if e.errno == errno.ENOENT:
3890
# the path does not exist: let _process_entry know that.
3891
root_dir_info = None
3893
# some other random error: hand it up.
3896
root_dir_info = (b'', current_root,
3897
osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,
3899
if root_dir_info[2] == 'directory':
3900
if self.tree._directory_is_tree_reference(
3901
current_root.decode('utf8')):
3902
root_dir_info = root_dir_info[:2] + \
3903
('tree-reference',) + root_dir_info[3:]
3905
if not root_entries and not root_dir_info:
3906
# this specified path is not present at all, skip it.
3908
path_handled = False
3909
for entry in root_entries:
3910
result, changed = _process_entry(entry, root_dir_info)
3911
if changed is not None:
3914
self._gather_result_for_consistency(result)
3915
if changed or self.include_unchanged:
3917
if self.want_unversioned and not path_handled and root_dir_info:
3918
new_executable = bool(
3919
stat.S_ISREG(root_dir_info[3].st_mode)
3920
and stat.S_IEXEC & root_dir_info[3].st_mode)
3922
(None, current_root_unicode),
3926
(None, splitpath(current_root_unicode)[-1]),
3927
(None, root_dir_info[2]),
3928
(None, new_executable)
3930
initial_key = (current_root, b'', b'')
3931
block_index, _ = self.state._find_block_index_from_key(initial_key)
3932
if block_index == 0:
3933
# we have processed the total root already, but because the
3934
# initial key matched it we should skip it here.
3936
if root_dir_info and root_dir_info[2] == 'tree-reference':
3937
current_dir_info = None
3939
dir_iterator = osutils._walkdirs_utf8(root_abspath, prefix=current_root)
3941
current_dir_info = next(dir_iterator)
3942
except OSError as e:
3943
# on win32, python2.4 has e.errno == ERROR_DIRECTORY, but
3944
# python 2.5 has e.errno == EINVAL,
3945
# and e.winerror == ERROR_DIRECTORY
3946
e_winerror = getattr(e, 'winerror', None)
3947
win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
3948
# there may be directories in the inventory even though
3949
# this path is not a file on disk: so mark it as end of
3951
if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
3952
current_dir_info = None
3953
elif (sys.platform == 'win32'
3954
and (e.errno in win_errors
3955
or e_winerror in win_errors)):
3956
current_dir_info = None
3960
if current_dir_info[0][0] == b'':
3961
# remove .bzr from iteration
3962
bzr_index = bisect.bisect_left(current_dir_info[1], (b'.bzr',))
3963
if current_dir_info[1][bzr_index][0] != b'.bzr':
3964
raise AssertionError()
3965
del current_dir_info[1][bzr_index]
3966
# walk until both the directory listing and the versioned metadata
3968
if (block_index < len(self.state._dirblocks) and
3969
osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
3970
current_block = self.state._dirblocks[block_index]
3972
current_block = None
3973
while (current_dir_info is not None or
3974
current_block is not None):
3975
if (current_dir_info and current_block
3976
and current_dir_info[0][0] != current_block[0]):
3977
if _lt_by_dirs(current_dir_info[0][0], current_block[0]):
3978
# filesystem data refers to paths not covered by the dirblock.
3979
# this has two possibilities:
3980
# A) it is versioned but empty, so there is no block for it
3981
# B) it is not versioned.
3983
# if (A) then we need to recurse into it to check for
3984
# new unknown files or directories.
3985
# if (B) then we should ignore it, because we don't
3986
# recurse into unknown directories.
3988
while path_index < len(current_dir_info[1]):
3989
current_path_info = current_dir_info[1][path_index]
3990
if self.want_unversioned:
3991
if current_path_info[2] == 'directory':
3992
if self.tree._directory_is_tree_reference(
3993
current_path_info[0].decode('utf8')):
3994
current_path_info = current_path_info[:2] + \
3995
('tree-reference',) + current_path_info[3:]
3996
new_executable = bool(
3997
stat.S_ISREG(current_path_info[3].st_mode)
3998
and stat.S_IEXEC & current_path_info[3].st_mode)
4000
(None, utf8_decode(current_path_info[0])[0]),
4004
(None, utf8_decode(current_path_info[1])[0]),
4005
(None, current_path_info[2]),
4006
(None, new_executable))
4007
# dont descend into this unversioned path if it is
4009
if current_path_info[2] in ('directory',
4011
del current_dir_info[1][path_index]
4015
# This dir info has been handled, go to the next
4017
current_dir_info = next(dir_iterator)
4018
except StopIteration:
4019
current_dir_info = None
4021
# We have a dirblock entry for this location, but there
4022
# is no filesystem path for this. This is most likely
4023
# because a directory was removed from the disk.
4024
# We don't have to report the missing directory,
4025
# because that should have already been handled, but we
4026
# need to handle all of the files that are contained
4028
for current_entry in current_block[1]:
4029
# entry referring to file not present on disk.
4030
# advance the entry only, after processing.
4031
result, changed = _process_entry(current_entry, None)
4032
if changed is not None:
4034
self._gather_result_for_consistency(result)
4035
if changed or self.include_unchanged:
4038
if (block_index < len(self.state._dirblocks) and
4039
osutils.is_inside(current_root,
4040
self.state._dirblocks[block_index][0])):
4041
current_block = self.state._dirblocks[block_index]
4043
current_block = None
4046
if current_block and entry_index < len(current_block[1]):
4047
current_entry = current_block[1][entry_index]
4049
current_entry = None
4050
advance_entry = True
4052
if current_dir_info and path_index < len(current_dir_info[1]):
4053
current_path_info = current_dir_info[1][path_index]
4054
if current_path_info[2] == 'directory':
4055
if self.tree._directory_is_tree_reference(
4056
current_path_info[0].decode('utf8')):
4057
current_path_info = current_path_info[:2] + \
4058
('tree-reference',) + current_path_info[3:]
4060
current_path_info = None
4062
path_handled = False
4063
while (current_entry is not None or
4064
current_path_info is not None):
4065
if current_entry is None:
4066
# the check for path_handled when the path is advanced
4067
# will yield this path if needed.
4069
elif current_path_info is None:
4070
# no path is fine: the per entry code will handle it.
4071
result, changed = _process_entry(current_entry, current_path_info)
4072
if changed is not None:
4074
self._gather_result_for_consistency(result)
4075
if changed or self.include_unchanged:
4077
elif (current_entry[0][1] != current_path_info[1]
4078
or current_entry[1][self.target_index][0] in (b'a', b'r')):
4079
# The current path on disk doesn't match the dirblock
4080
# record. Either the dirblock is marked as absent, or
4081
# the file on disk is not present at all in the
4082
# dirblock. Either way, report about the dirblock
4083
# entry, and let other code handle the filesystem one.
4085
# Compare the basename for these files to determine
4087
if current_path_info[1] < current_entry[0][1]:
4088
# extra file on disk: pass for now, but only
4089
# increment the path, not the entry
4090
advance_entry = False
4092
# entry referring to file not present on disk.
4093
# advance the entry only, after processing.
4094
result, changed = _process_entry(current_entry, None)
4095
if changed is not None:
4097
self._gather_result_for_consistency(result)
4098
if changed or self.include_unchanged:
4100
advance_path = False
4102
result, changed = _process_entry(current_entry, current_path_info)
4103
if changed is not None:
4106
self._gather_result_for_consistency(result)
4107
if changed or self.include_unchanged:
4109
if advance_entry and current_entry is not None:
4111
if entry_index < len(current_block[1]):
4112
current_entry = current_block[1][entry_index]
4114
current_entry = None
4116
advance_entry = True # reset the advance flaga
4117
if advance_path and current_path_info is not None:
4118
if not path_handled:
4119
# unversioned in all regards
4120
if self.want_unversioned:
4121
new_executable = bool(
4122
stat.S_ISREG(current_path_info[3].st_mode)
4123
and stat.S_IEXEC & current_path_info[3].st_mode)
4125
relpath_unicode = utf8_decode(current_path_info[0])[0]
4126
except UnicodeDecodeError:
4127
raise errors.BadFilenameEncoding(
4128
current_path_info[0], osutils._fs_enc)
4130
(None, relpath_unicode),
4134
(None, utf8_decode(current_path_info[1])[0]),
4135
(None, current_path_info[2]),
4136
(None, new_executable))
4137
# dont descend into this unversioned path if it is
4139
if current_path_info[2] in ('directory'):
4140
del current_dir_info[1][path_index]
4142
# dont descend the disk iterator into any tree
4144
if current_path_info[2] == 'tree-reference':
4145
del current_dir_info[1][path_index]
4148
if path_index < len(current_dir_info[1]):
4149
current_path_info = current_dir_info[1][path_index]
4150
if current_path_info[2] == 'directory':
4151
if self.tree._directory_is_tree_reference(
4152
current_path_info[0].decode('utf8')):
4153
current_path_info = current_path_info[:2] + \
4154
('tree-reference',) + current_path_info[3:]
4156
current_path_info = None
4157
path_handled = False
4159
advance_path = True # reset the advance flagg.
4160
if current_block is not None:
4162
if (block_index < len(self.state._dirblocks) and
4163
osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
4164
current_block = self.state._dirblocks[block_index]
4166
current_block = None
4167
if current_dir_info is not None:
4169
current_dir_info = next(dir_iterator)
4170
except StopIteration:
4171
current_dir_info = None
4172
for result in self._iter_specific_file_parents():
4175
def _iter_specific_file_parents(self):
4176
"""Iter over the specific file parents."""
4177
while self.search_specific_file_parents:
4178
# Process the parent directories for the paths we were iterating.
4179
# Even in extremely large trees this should be modest, so currently
4180
# no attempt is made to optimise.
4181
path_utf8 = self.search_specific_file_parents.pop()
4182
if osutils.is_inside_any(self.searched_specific_files, path_utf8):
4183
# We've examined this path.
4185
if path_utf8 in self.searched_exact_paths:
4186
# We've examined this path.
4188
path_entries = self.state._entries_for_path(path_utf8)
4189
# We need either one or two entries. If the path in
4190
# self.target_index has moved (so the entry in source_index is in
4191
# 'ar') then we need to also look for the entry for this path in
4192
# self.source_index, to output the appropriate delete-or-rename.
4193
selected_entries = []
4195
for candidate_entry in path_entries:
4196
# Find entries present in target at this path:
4197
if candidate_entry[1][self.target_index][0] not in (b'a', b'r'):
4199
selected_entries.append(candidate_entry)
4200
# Find entries present in source at this path:
4201
elif (self.source_index is not None and
4202
candidate_entry[1][self.source_index][0] not in (b'a', b'r')):
4204
if candidate_entry[1][self.target_index][0] == b'a':
4205
# Deleted, emit it here.
4206
selected_entries.append(candidate_entry)
4208
# renamed, emit it when we process the directory it
4210
self.search_specific_file_parents.add(
4211
candidate_entry[1][self.target_index][1])
4213
raise AssertionError(
4214
"Missing entry for specific path parent %r, %r" % (
4215
path_utf8, path_entries))
4216
path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
4217
for entry in selected_entries:
4218
if entry[0][2] in self.seen_ids:
4220
result, changed = self._process_entry(entry, path_info)
4222
raise AssertionError(
4223
"Got entry<->path mismatch for specific path "
4224
"%r entry %r path_info %r " % (
4225
path_utf8, entry, path_info))
4226
# Only include changes - we're outside the users requested
4229
self._gather_result_for_consistency(result)
4230
if (result[6][0] == 'directory' and
4231
result[6][1] != 'directory'):
4232
# This stopped being a directory, the old children have
4234
if entry[1][self.source_index][0] == b'r':
4235
# renamed, take the source path
4236
entry_path_utf8 = entry[1][self.source_index][1]
4238
entry_path_utf8 = path_utf8
4239
initial_key = (entry_path_utf8, b'', b'')
4240
block_index, _ = self.state._find_block_index_from_key(
4242
if block_index == 0:
4243
# The children of the root are in block index 1.
4245
current_block = None
4246
if block_index < len(self.state._dirblocks):
4247
current_block = self.state._dirblocks[block_index]
4248
if not osutils.is_inside(
4249
entry_path_utf8, current_block[0]):
4250
# No entries for this directory at all.
4251
current_block = None
4252
if current_block is not None:
4253
for entry in current_block[1]:
4254
if entry[1][self.source_index][0] in (b'a', b'r'):
4255
# Not in the source tree, so doesn't have to be
4258
# Path of the entry itself.
4260
self.search_specific_file_parents.add(
4261
osutils.pathjoin(*entry[0][:2]))
4262
if changed or self.include_unchanged:
4264
self.searched_exact_paths.add(path_utf8)
4266
def _path_info(self, utf8_path, unicode_path):
4267
"""Generate path_info for unicode_path.
4269
:return: None if unicode_path does not exist, or a path_info tuple.
4271
abspath = self.tree.abspath(unicode_path)
4273
stat = os.lstat(abspath)
4274
except OSError as e:
4275
if e.errno == errno.ENOENT:
4276
# the path does not exist.
4280
utf8_basename = utf8_path.rsplit(b'/', 1)[-1]
4281
dir_info = (utf8_path, utf8_basename,
4282
osutils.file_kind_from_stat_mode(stat.st_mode), stat,
4284
if dir_info[2] == 'directory':
4285
if self.tree._directory_is_tree_reference(
4287
self.root_dir_info = self.root_dir_info[:2] + \
4288
('tree-reference',) + self.root_dir_info[3:]
4292
# Try to load the compiled form if possible
4294
from ._dirstate_helpers_pyx import (
4301
ProcessEntryC as _process_entry,
4302
update_entry as update_entry,
4304
except ImportError as e:
4305
osutils.failed_to_load_extension(e)
4306
from ._dirstate_helpers_py import (
4314
# FIXME: It would be nice to be able to track moved lines so that the
4315
# corresponding python code can be moved to the _dirstate_helpers_py
4316
# module. I don't want to break the history for this important piece of
4317
# code so I left the code here -- vila 20090622
4318
update_entry = py_update_entry
4319
_process_entry = ProcessEntryPython