20
20
lines by NL. The field delimiters are ommitted in the grammar, line delimiters
21
21
are not - this is done for clarity of reading. All string data is in utf8.
23
MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";
26
WHOLE_NUMBER = {digit}, digit;
28
REVISION_ID = a non-empty utf8 string;
30
dirstate format = header line, full checksum, row count, parent details,
31
ghost_details, entries;
32
header line = "#bazaar dirstate flat format 3", NL;
33
full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;
34
row count = "num_entries: ", WHOLE_NUMBER, NL;
35
parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
36
ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;
38
entry = entry_key, current_entry_details, {parent_entry_details};
39
entry_key = dirname, basename, fileid;
40
current_entry_details = common_entry_details, working_entry_details;
41
parent_entry_details = common_entry_details, history_entry_details;
42
common_entry_details = MINIKIND, fingerprint, size, executable
43
working_entry_details = packed_stat
44
history_entry_details = REVISION_ID;
47
fingerprint = a nonempty utf8 sequence with meaning defined by minikind.
49
Given this definition, the following is useful to know:
50
entry (aka row) - all the data for a given key.
51
entry[0]: The key (dirname, basename, fileid)
55
entry[1]: The tree(s) data for this path and id combination.
56
entry[1][0]: The current tree
57
entry[1][1]: The second tree
59
For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:
60
entry[1][0][0]: minikind
61
entry[1][0][1]: fingerprint
63
entry[1][0][3]: executable
64
entry[1][0][4]: packed_stat
66
entry[1][1][4]: revision_id
25
MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";
28
WHOLE_NUMBER = {digit}, digit;
30
REVISION_ID = a non-empty utf8 string;
32
dirstate format = header line, full checksum, row count, parent details,
33
ghost_details, entries;
34
header line = "#bazaar dirstate flat format 3", NL;
35
full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;
36
row count = "num_entries: ", WHOLE_NUMBER, NL;
37
parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
38
ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;
40
entry = entry_key, current_entry_details, {parent_entry_details};
41
entry_key = dirname, basename, fileid;
42
current_entry_details = common_entry_details, working_entry_details;
43
parent_entry_details = common_entry_details, history_entry_details;
44
common_entry_details = MINIKIND, fingerprint, size, executable
45
working_entry_details = packed_stat
46
history_entry_details = REVISION_ID;
49
fingerprint = a nonempty utf8 sequence with meaning defined by minikind.
51
Given this definition, the following is useful to know::
53
entry (aka row) - all the data for a given key.
54
entry[0]: The key (dirname, basename, fileid)
58
entry[1]: The tree(s) data for this path and id combination.
59
entry[1][0]: The current tree
60
entry[1][1]: The second tree
62
For an entry for a tree, we have (using tree 0 - current tree) to demonstrate::
64
entry[1][0][0]: minikind
65
entry[1][0][1]: fingerprint
67
entry[1][0][3]: executable
68
entry[1][0][4]: packed_stat
72
entry[1][1][4]: revision_id
68
74
There may be multiple rows at the root, one per id present in the root, so the
69
in memory root row is now:
70
self._dirblocks[0] -> ('', [entry ...]),
71
and the entries in there are
74
entries[0][2]: file_id
75
entries[1][0]: The tree data for the current tree for this fileid at /
79
'r' is a relocated entry: This path is not present in this tree with this id,
80
but the id can be found at another location. The fingerprint is used to
81
point to the target location.
82
'a' is an absent entry: In that tree the id is not present at this path.
83
'd' is a directory entry: This path in this tree is a directory with the
84
current file id. There is no fingerprint for directories.
85
'f' is a file entry: As for directory, but it's a file. The fingerprint is the
86
sha1 value of the file's canonical form, i.e. after any read filters have
87
been applied to the convenience form stored in the working tree.
88
'l' is a symlink entry: As for directory, but a symlink. The fingerprint is the
90
't' is a reference to a nested subtree; the fingerprint is the referenced
75
in memory root row is now::
77
self._dirblocks[0] -> ('', [entry ...]),
79
and the entries in there are::
83
entries[0][2]: file_id
84
entries[1][0]: The tree data for the current tree for this fileid at /
89
b'r' is a relocated entry: This path is not present in this tree with this
90
id, but the id can be found at another location. The fingerprint is
91
used to point to the target location.
92
b'a' is an absent entry: In that tree the id is not present at this path.
93
b'd' is a directory entry: This path in this tree is a directory with the
94
current file id. There is no fingerprint for directories.
95
b'f' is a file entry: As for directory, but it's a file. The fingerprint is
96
the sha1 value of the file's canonical form, i.e. after any read
97
filters have been applied to the convenience form stored in the working
99
b'l' is a symlink entry: As for directory, but a symlink. The fingerprint is
101
b't' is a reference to a nested subtree; the fingerprint is the referenced
95
The entries on disk and in memory are ordered according to the following keys:
106
The entries on disk and in memory are ordered according to the following keys::
97
108
directory, as a list of components
101
112
--- Format 1 had the following different definition: ---
102
rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,
103
WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,
105
PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,
106
basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,
116
rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,
117
WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,
119
PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,
120
basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,
109
123
PARENT ROW's are emitted for every parent that is not in the ghosts details
110
124
line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then
231
259
ERROR_DIRECTORY = 267
234
if not getattr(struct, '_compile', None):
235
# Cannot pre-compile the dirstate pack_stat
236
def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
237
"""Convert stat values into a packed representation."""
238
return _encode(_pack('>LLLLLL', st.st_size, int(st.st_mtime),
239
int(st.st_ctime), st.st_dev, st.st_ino & 0xFFFFFFFF,
242
# compile the struct compiler we need, so as to only do it once
243
from _struct import Struct
244
_compiled_pack = Struct('>LLLLLL').pack
245
def pack_stat(st, _encode=binascii.b2a_base64, _pack=_compiled_pack):
246
"""Convert stat values into a packed representation."""
247
# jam 20060614 it isn't really worth removing more entries if we
248
# are going to leave it in packed form.
249
# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
250
# With all entries, filesize is 5.9M and read time is maybe 280ms
251
# well within the noise margin
253
# base64 encoding always adds a final newline, so strip it off
254
# The current version
255
return _encode(_pack(st.st_size, int(st.st_mtime), int(st.st_ctime),
256
st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
257
# This is 0.060s / 1.520s faster by not encoding as much information
258
# return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]
259
# This is not strictly faster than _encode(_pack())[:-1]
260
# return '%X.%X.%X.%X.%X.%X' % (
261
# st.st_size, int(st.st_mtime), int(st.st_ctime),
262
# st.st_dev, st.st_ino, st.st_mode)
263
# Similar to the _encode(_pack('>LL'))
264
# return '%X.%X' % (int(st.st_mtime), st.st_mode)
262
class DirstateCorrupt(errors.BzrError):
264
_fmt = "The dirstate file (%(state)s) appears to be corrupt: %(msg)s"
266
def __init__(self, state, msg):
267
errors.BzrError.__init__(self)
267
272
class SHA1Provider(object):
411
418
self._last_block_index = None
412
419
self._last_entry_index = None
420
# The set of known hash changes
421
self._known_hash_changes = set()
422
# How many hash changed entries can we have without saving
423
self._worth_saving_limit = worth_saving_limit
424
self._config_stack = config.LocationStack(urlutils.local_path_to_url(
414
427
def __repr__(self):
415
428
return "%s(%r)" % \
416
429
(self.__class__.__name__, self._filename)
431
def _mark_modified(self, hash_changed_entries=None, header_modified=False):
432
"""Mark this dirstate as modified.
434
:param hash_changed_entries: if non-None, mark just these entries as
435
having their hash modified.
436
:param header_modified: mark the header modified as well, not just the
439
#trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries)
440
if hash_changed_entries:
441
self._known_hash_changes.update([e[0] for e in hash_changed_entries])
442
if self._dirblock_state in (DirState.NOT_IN_MEMORY,
443
DirState.IN_MEMORY_UNMODIFIED):
444
# If the dirstate is already marked a IN_MEMORY_MODIFIED, then
445
# that takes precedence.
446
self._dirblock_state = DirState.IN_MEMORY_HASH_MODIFIED
448
# TODO: Since we now have a IN_MEMORY_HASH_MODIFIED state, we
449
# should fail noisily if someone tries to set
450
# IN_MEMORY_MODIFIED but we don't have a write-lock!
451
# We don't know exactly what changed so disable smart saving
452
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
454
self._header_state = DirState.IN_MEMORY_MODIFIED
456
def _mark_unmodified(self):
457
"""Mark this dirstate as unmodified."""
458
self._header_state = DirState.IN_MEMORY_UNMODIFIED
459
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
460
self._known_hash_changes = set()
418
462
def add(self, path, file_id, kind, stat, fingerprint):
419
463
"""Add a path to be tracked.
421
:param path: The path within the dirstate - '' is the root, 'foo' is the
465
:param path: The path within the dirstate - b'' is the root, 'foo' is the
422
466
path foo within the root, 'foo/bar' is the path bar within foo
424
468
:param file_id: The file id of the path being added.
1483
1539
if basename_utf8:
1484
1540
parents.add((dirname_utf8, inv_entry.parent_id))
1485
1541
if old_path is None:
1486
adds.append((None, encode(new_path), file_id,
1542
old_path_utf8 = None
1544
old_path_utf8 = encode(old_path)
1545
if old_path is None:
1546
adds.append((None, new_path_utf8, file_id,
1487
1547
inv_to_entry(inv_entry), True))
1488
1548
new_ids.add(file_id)
1489
1549
elif new_path is None:
1490
deletes.append((encode(old_path), None, file_id, None, True))
1491
elif (old_path, new_path) != root_only:
1550
deletes.append((old_path_utf8, None, file_id, None, True))
1551
elif (old_path, new_path) == root_only:
1552
# change things in-place
1553
# Note: the case of a parent directory changing its file_id
1554
# tends to break optimizations here, because officially
1555
# the file has actually been moved, it just happens to
1556
# end up at the same path. If we can figure out how to
1557
# handle that case, we can avoid a lot of add+delete
1558
# pairs for objects that stay put.
1559
# elif old_path == new_path:
1560
changes.append((old_path_utf8, new_path_utf8, file_id,
1561
inv_to_entry(inv_entry)))
1493
1564
# Because renames must preserve their children we must have
1494
1565
# processed all relocations and removes before hand. The sort
1497
1568
# pair will result in the deleted item being reinserted, or
1498
1569
# renamed items being reinserted twice - and possibly at the
1499
1570
# wrong place. Splitting into a delete/add pair also simplifies
1500
# the handling of entries with ('f', ...), ('r' ...) because
1501
# the target of the 'r' is old_path here, and we add that to
1571
# the handling of entries with (b'f', ...), (b'r' ...) because
1572
# the target of the b'r' is old_path here, and we add that to
1502
1573
# deletes, meaning that the add handler does not need to check
1503
# for 'r' items on every pass.
1574
# for b'r' items on every pass.
1504
1575
self._update_basis_apply_deletes(deletes)
1506
1577
# Split into an add/delete pair recursively.
1507
adds.append((None, new_path_utf8, file_id,
1508
inv_to_entry(inv_entry), False))
1578
adds.append((old_path_utf8, new_path_utf8, file_id,
1579
inv_to_entry(inv_entry), False))
1509
1580
# Expunge deletes that we've seen so that deleted/renamed
1510
1581
# children of a rename directory are handled correctly.
1511
new_deletes = reversed(list(self._iter_child_entries(1,
1582
new_deletes = reversed(list(
1583
self._iter_child_entries(1, old_path_utf8)))
1513
1584
# Remove the current contents of the tree at orig_path, and
1514
1585
# reinsert at the correct new path.
1515
1586
for entry in new_deletes:
1517
source_path = entry[0][0] + '/' + entry[0][1]
1587
child_dirname, child_basename, child_file_id = entry[0]
1589
source_path = child_dirname + b'/' + child_basename
1519
source_path = entry[0][1]
1591
source_path = child_basename
1520
1592
if new_path_utf8:
1521
target_path = new_path_utf8 + source_path[len(old_path):]
1594
new_path_utf8 + source_path[len(old_path_utf8):]
1596
if old_path_utf8 == b'':
1524
1597
raise AssertionError("cannot rename directory to"
1526
target_path = source_path[len(old_path) + 1:]
1599
target_path = source_path[len(old_path_utf8) + 1:]
1527
1600
adds.append((None, target_path, entry[0][2], entry[1][1], False))
1528
1601
deletes.append(
1529
1602
(source_path, target_path, entry[0][2], None, False))
1530
1603
deletes.append(
1531
(encode(old_path), new_path, file_id, None, False))
1533
# changes to just the root should not require remove/insertion
1535
changes.append((encode(old_path), encode(new_path), file_id,
1536
inv_to_entry(inv_entry)))
1604
(old_path_utf8, new_path_utf8, file_id, None, False))
1537
1606
self._check_delta_ids_absent(new_ids, delta, 1)
1539
1608
# Finish expunging deletes/first half of renames.
1597
1665
# Adds are accumulated partly from renames, so can be in any input
1598
1666
# order - sort it.
1667
# TODO: we may want to sort in dirblocks order. That way each entry
1668
# will end up in the same directory, allowing the _get_entry
1669
# fast-path for looking up 2 items in the same dir work.
1670
adds.sort(key=lambda x: x[1])
1600
1671
# adds is now in lexographic order, which places all parents before
1601
1672
# their children, so we can process it linearly.
1673
st = static_tuple.StaticTuple
1603
1674
for old_path, new_path, file_id, new_details, real_add in adds:
1604
# the entry for this file_id must be in tree 0.
1605
entry = self._get_entry(0, file_id, new_path)
1606
if entry[0] is None or entry[0][2] != file_id:
1607
self._changes_aborted = True
1608
raise errors.InconsistentDelta(new_path, file_id,
1609
'working tree does not contain new entry')
1610
if real_add and entry[1][1][0] not in absent:
1611
self._changes_aborted = True
1612
raise errors.InconsistentDelta(new_path, file_id,
1613
'The entry was considered to be a genuinely new record,'
1614
' but there was already an old record for it.')
1615
# We don't need to update the target of an 'r' because the handling
1616
# of renames turns all 'r' situations into a delete at the original
1618
entry[1][1] = new_details
1675
dirname, basename = osutils.split(new_path)
1676
entry_key = st(dirname, basename, file_id)
1677
block_index, present = self._find_block_index_from_key(entry_key)
1679
# The block where we want to put the file is not present.
1680
# However, it might have just been an empty directory. Look for
1681
# the parent in the basis-so-far before throwing an error.
1682
parent_dir, parent_base = osutils.split(dirname)
1683
parent_block_idx, parent_entry_idx, _, parent_present = \
1684
self._get_block_entry_index(parent_dir, parent_base, 1)
1685
if not parent_present:
1686
self._raise_invalid(new_path, file_id,
1687
"Unable to find block for this record."
1688
" Was the parent added?")
1689
self._ensure_block(parent_block_idx, parent_entry_idx, dirname)
1691
block = self._dirblocks[block_index][1]
1692
entry_index, present = self._find_entry_index(entry_key, block)
1694
if old_path is not None:
1695
self._raise_invalid(new_path, file_id,
1696
'considered a real add but still had old_path at %s'
1699
entry = block[entry_index]
1700
basis_kind = entry[1][1][0]
1701
if basis_kind == b'a':
1702
entry[1][1] = new_details
1703
elif basis_kind == b'r':
1704
raise NotImplementedError()
1706
self._raise_invalid(new_path, file_id,
1707
"An entry was marked as a new add"
1708
" but the basis target already existed")
1710
# The exact key was not found in the block. However, we need to
1711
# check if there is a key next to us that would have matched.
1712
# We only need to check 2 locations, because there are only 2
1714
for maybe_index in range(entry_index-1, entry_index+1):
1715
if maybe_index < 0 or maybe_index >= len(block):
1717
maybe_entry = block[maybe_index]
1718
if maybe_entry[0][:2] != (dirname, basename):
1719
# Just a random neighbor
1721
if maybe_entry[0][2] == file_id:
1722
raise AssertionError(
1723
'_find_entry_index didnt find a key match'
1724
' but walking the data did, for %s'
1726
basis_kind = maybe_entry[1][1][0]
1727
if basis_kind not in (b'a', b'r'):
1728
self._raise_invalid(new_path, file_id,
1729
"we have an add record for path, but the path"
1730
" is already present with another file_id %s"
1731
% (maybe_entry[0][2],))
1733
entry = (entry_key, [DirState.NULL_PARENT_DETAILS,
1735
block.insert(entry_index, entry)
1737
active_kind = entry[1][0][0]
1738
if active_kind == b'a':
1739
# The active record shows up as absent, this could be genuine,
1740
# or it could be present at some other location. We need to
1742
id_index = self._get_id_index()
1743
# The id_index may not be perfectly accurate for tree1, because
1744
# we haven't been keeping it updated. However, it should be
1745
# fine for tree0, and that gives us enough info for what we
1747
keys = id_index.get(file_id, ())
1749
block_i, entry_i, d_present, f_present = \
1750
self._get_block_entry_index(key[0], key[1], 0)
1753
active_entry = self._dirblocks[block_i][1][entry_i]
1754
if (active_entry[0][2] != file_id):
1755
# Some other file is at this path, we don't need to
1758
real_active_kind = active_entry[1][0][0]
1759
if real_active_kind in (b'a', b'r'):
1760
# We found a record, which was not *this* record,
1761
# which matches the file_id, but is not actually
1762
# present. Something seems *really* wrong.
1763
self._raise_invalid(new_path, file_id,
1764
"We found a tree0 entry that doesnt make sense")
1765
# Now, we've found a tree0 entry which matches the file_id
1766
# but is at a different location. So update them to be
1768
active_dir, active_name = active_entry[0][:2]
1770
active_path = active_dir + b'/' + active_name
1772
active_path = active_name
1773
active_entry[1][1] = st(b'r', new_path, 0, False, b'')
1774
entry[1][0] = st(b'r', active_path, 0, False, b'')
1775
elif active_kind == b'r':
1776
raise NotImplementedError()
1778
new_kind = new_details[0]
1779
if new_kind == b'd':
1780
self._ensure_block(block_index, entry_index, new_path)
1620
1782
def _update_basis_apply_changes(self, changes):
1621
1783
"""Apply a sequence of changes to tree 1 during update_basis_by_delta.
1653
1808
null = DirState.NULL_PARENT_DETAILS
1654
1809
for old_path, new_path, file_id, _, real_delete in deletes:
1655
1810
if real_delete != (new_path is None):
1656
self._changes_aborted = True
1657
raise AssertionError("bad delete delta")
1811
self._raise_invalid(old_path, file_id, "bad delete delta")
1658
1812
# the entry for this file_id must be in tree 1.
1659
1813
dirname, basename = osutils.split(old_path)
1660
1814
block_index, entry_index, dir_present, file_present = \
1661
1815
self._get_block_entry_index(dirname, basename, 1)
1662
1816
if not file_present:
1663
self._changes_aborted = True
1664
raise errors.InconsistentDelta(old_path, file_id,
1817
self._raise_invalid(old_path, file_id,
1665
1818
'basis tree does not contain removed entry')
1666
1819
entry = self._dirblocks[block_index][1][entry_index]
1820
# The state of the entry in the 'active' WT
1821
active_kind = entry[1][0][0]
1667
1822
if entry[0][2] != file_id:
1668
self._changes_aborted = True
1669
raise errors.InconsistentDelta(old_path, file_id,
1823
self._raise_invalid(old_path, file_id,
1670
1824
'mismatched file_id in tree 1')
1672
if entry[1][0][0] != 'a':
1673
self._changes_aborted = True
1674
raise errors.InconsistentDelta(old_path, file_id,
1675
'This was marked as a real delete, but the WT state'
1676
' claims that it still exists and is versioned.')
1826
old_kind = entry[1][1][0]
1827
if active_kind in b'ar':
1828
# The active tree doesn't have this file_id.
1829
# The basis tree is changing this record. If this is a
1830
# rename, then we don't want the record here at all
1831
# anymore. If it is just an in-place change, we want the
1832
# record here, but we'll add it if we need to. So we just
1834
if active_kind == b'r':
1835
active_path = entry[1][0][1]
1836
active_entry = self._get_entry(0, file_id, active_path)
1837
if active_entry[1][1][0] != b'r':
1838
self._raise_invalid(old_path, file_id,
1839
"Dirstate did not have matching rename entries")
1840
elif active_entry[1][0][0] in b'ar':
1841
self._raise_invalid(old_path, file_id,
1842
"Dirstate had a rename pointing at an inactive"
1844
active_entry[1][1] = null
1677
1845
del self._dirblocks[block_index][1][entry_index]
1846
if old_kind == b'd':
1847
# This was a directory, and the active tree says it
1848
# doesn't exist, and now the basis tree says it doesn't
1849
# exist. Remove its dirblock if present
1851
present) = self._find_block_index_from_key(
1852
(old_path, b'', b''))
1854
dir_block = self._dirblocks[dir_block_index][1]
1856
# This entry is empty, go ahead and just remove it
1857
del self._dirblocks[dir_block_index]
1679
if entry[1][0][0] == 'a':
1680
self._changes_aborted = True
1681
raise errors.InconsistentDelta(old_path, file_id,
1682
'The entry was considered a rename, but the source path'
1683
' is marked as absent.')
1684
# For whatever reason, we were asked to rename an entry
1685
# that was originally marked as deleted. This could be
1686
# because we are renaming the parent directory, and the WT
1687
# current state has the file marked as deleted.
1688
elif entry[1][0][0] == 'r':
1689
# implement the rename
1690
del self._dirblocks[block_index][1][entry_index]
1692
# it is being resurrected here, so blank it out temporarily.
1693
self._dirblocks[block_index][1][entry_index][1][1] = null
1859
# There is still an active record, so just mark this
1862
block_i, entry_i, d_present, f_present = \
1863
self._get_block_entry_index(old_path, b'', 1)
1865
dir_block = self._dirblocks[block_i][1]
1866
for child_entry in dir_block:
1867
child_basis_kind = child_entry[1][1][0]
1868
if child_basis_kind not in b'ar':
1869
self._raise_invalid(old_path, file_id,
1870
"The file id was deleted but its children were "
1695
1873
def _after_delta_check_parents(self, parents, index):
1696
1874
"""Check that parents required by the delta are all intact.
2145
2326
def _get_id_index(self):
2146
"""Get an id index of self._dirblocks."""
2327
"""Get an id index of self._dirblocks.
2329
This maps from file_id => [(directory, name, file_id)] entries where
2330
that file_id appears in one of the trees.
2147
2332
if self._id_index is None:
2149
2334
for key, tree_details in self._iter_entries():
2150
id_index.setdefault(key[2], set()).add(key)
2335
self._add_to_id_index(id_index, key)
2151
2336
self._id_index = id_index
2152
2337
return self._id_index
2339
def _add_to_id_index(self, id_index, entry_key):
2340
"""Add this entry to the _id_index mapping."""
2341
# This code used to use a set for every entry in the id_index. However,
2342
# it is *rare* to have more than one entry. So a set is a large
2343
# overkill. And even when we do, we won't ever have more than the
2344
# number of parent trees. Which is still a small number (rarely >2). As
2345
# such, we use a simple tuple, and do our own uniqueness checks. While
2346
# the 'in' check is O(N) since N is nicely bounded it shouldn't ever
2347
# cause quadratic failure.
2348
file_id = entry_key[2]
2349
entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
2350
if file_id not in id_index:
2351
id_index[file_id] = static_tuple.StaticTuple(entry_key,)
2353
entry_keys = id_index[file_id]
2354
if entry_key not in entry_keys:
2355
id_index[file_id] = entry_keys + (entry_key,)
2357
def _remove_from_id_index(self, id_index, entry_key):
2358
"""Remove this entry from the _id_index mapping.
2360
It is an programming error to call this when the entry_key is not
2363
file_id = entry_key[2]
2364
entry_keys = list(id_index[file_id])
2365
entry_keys.remove(entry_key)
2366
id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
2154
2368
def _get_output_lines(self, lines):
2155
2369
"""Format lines for final output.
2160
2374
output_lines = [DirState.HEADER_FORMAT_3]
2161
lines.append('') # a final newline
2162
inventory_text = '\0\n\0'.join(lines)
2163
output_lines.append('crc32: %s\n' % (zlib.crc32(inventory_text),))
2375
lines.append(b'') # a final newline
2376
inventory_text = b'\0\n\0'.join(lines)
2377
output_lines.append(b'crc32: %d\n' % (zlib.crc32(inventory_text),))
2164
2378
# -3, 1 for num parents, 1 for ghosts, 1 for final newline
2165
2379
num_entries = len(lines)-3
2166
output_lines.append('num_entries: %s\n' % (num_entries,))
2380
output_lines.append(b'num_entries: %d\n' % (num_entries,))
2167
2381
output_lines.append(inventory_text)
2168
2382
return output_lines
2170
2384
def _make_deleted_row(self, fileid_utf8, parents):
2171
2385
"""Return a deleted row for fileid_utf8."""
2172
return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT,
2386
return (b'/', b'RECYCLED.BIN', b'file', fileid_utf8, 0, DirState.NULLSTAT,
2175
2389
def _num_present_parents(self):
2176
2390
"""The number of parent entries in each record row."""
2177
2391
return len(self._parents) - len(self._ghosts)
2180
def on_file(path, sha1_provider=None):
2394
def on_file(cls, path, sha1_provider=None, worth_saving_limit=0):
2181
2395
"""Construct a DirState on the file at path "path".
2183
2397
:param path: The path at which the dirstate file on disk should live.
2184
2398
:param sha1_provider: an object meeting the SHA1Provider interface.
2185
2399
If None, a DefaultSHA1Provider is used.
2400
:param worth_saving_limit: when the exact number of hash changed
2401
entries is known, only bother saving the dirstate if more than
2402
this count of entries have changed. -1 means never save.
2186
2403
:return: An unlocked DirState object, associated with the given path.
2188
2405
if sha1_provider is None:
2189
2406
sha1_provider = DefaultSHA1Provider()
2190
result = DirState(path, sha1_provider)
2407
result = cls(path, sha1_provider,
2408
worth_saving_limit=worth_saving_limit)
2193
2411
def _read_dirblocks_if_needed(self):
2243
2461
raise errors.BzrError(
2244
2462
'invalid header line: %r' % (header,))
2245
2463
crc_line = self._state_file.readline()
2246
if not crc_line.startswith('crc32: '):
2464
if not crc_line.startswith(b'crc32: '):
2247
2465
raise errors.BzrError('missing crc32 checksum: %r' % crc_line)
2248
self.crc_expected = int(crc_line[len('crc32: '):-1])
2466
self.crc_expected = int(crc_line[len(b'crc32: '):-1])
2249
2467
num_entries_line = self._state_file.readline()
2250
if not num_entries_line.startswith('num_entries: '):
2468
if not num_entries_line.startswith(b'num_entries: '):
2251
2469
raise errors.BzrError('missing num_entries line')
2252
self._num_entries = int(num_entries_line[len('num_entries: '):-1])
2470
self._num_entries = int(num_entries_line[len(b'num_entries: '):-1])
2254
def sha1_from_stat(self, path, stat_result, _pack_stat=pack_stat):
2472
def sha1_from_stat(self, path, stat_result):
2255
2473
"""Find a sha1 given a stat lookup."""
2256
return self._get_packed_stat_index().get(_pack_stat(stat_result), None)
2474
return self._get_packed_stat_index().get(pack_stat(stat_result), None)
2258
2476
def _get_packed_stat_index(self):
2259
2477
"""Get a packed_stat index of self._dirblocks."""
2260
2478
if self._packed_stat_index is None:
2262
2480
for key, tree_details in self._iter_entries():
2263
if tree_details[0][0] == 'f':
2481
if tree_details[0][0] == b'f':
2264
2482
index[tree_details[0][4]] = tree_details[0][1]
2265
2483
self._packed_stat_index = index
2266
2484
return self._packed_stat_index
2285
2503
trace.mutter('Not saving DirState because '
2286
2504
'_changes_aborted is set.')
2288
if (self._header_state == DirState.IN_MEMORY_MODIFIED or
2289
self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2506
# TODO: Since we now distinguish IN_MEMORY_MODIFIED from
2507
# IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail
2508
# to save an IN_MEMORY_HASH_MODIFIED, and fail *noisily* if we
2509
# fail to save IN_MEMORY_MODIFIED
2510
if not self._worth_saving():
2291
grabbed_write_lock = False
2292
if self._lock_state != 'w':
2293
grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
2294
# Switch over to the new lock, as the old one may be closed.
2513
grabbed_write_lock = False
2514
if self._lock_state != 'w':
2515
grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
2516
# Switch over to the new lock, as the old one may be closed.
2517
# TODO: jam 20070315 We should validate the disk file has
2518
# not changed contents, since temporary_write_lock may
2519
# not be an atomic operation.
2520
self._lock_token = new_lock
2521
self._state_file = new_lock.f
2522
if not grabbed_write_lock:
2523
# We couldn't grab a write lock, so we switch back to a read one
2526
lines = self.get_lines()
2527
self._state_file.seek(0)
2528
self._state_file.writelines(lines)
2529
self._state_file.truncate()
2530
self._state_file.flush()
2531
self._maybe_fdatasync()
2532
self._mark_unmodified()
2534
if grabbed_write_lock:
2535
self._lock_token = self._lock_token.restore_read_lock()
2536
self._state_file = self._lock_token.f
2295
2537
# TODO: jam 20070315 We should validate the disk file has
2296
# not changed contents. Since temporary_write_lock may
2538
# not changed contents. Since restore_read_lock may
2297
2539
# not be an atomic operation.
2298
self._lock_token = new_lock
2299
self._state_file = new_lock.f
2300
if not grabbed_write_lock:
2301
# We couldn't grab a write lock, so we switch back to a read one
2304
self._state_file.seek(0)
2305
self._state_file.writelines(self.get_lines())
2306
self._state_file.truncate()
2307
self._state_file.flush()
2308
self._header_state = DirState.IN_MEMORY_UNMODIFIED
2309
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
2311
if grabbed_write_lock:
2312
self._lock_token = self._lock_token.restore_read_lock()
2313
self._state_file = self._lock_token.f
2314
# TODO: jam 20070315 We should validate the disk file has
2315
# not changed contents. Since restore_read_lock may
2316
# not be an atomic operation.
2541
def _maybe_fdatasync(self):
2542
"""Flush to disk if possible and if not configured off."""
2543
if self._config_stack.get('dirstate.fdatasync'):
2544
osutils.fdatasync(self._state_file.fileno())
2546
def _worth_saving(self):
2547
"""Is it worth saving the dirstate or not?"""
2548
if (self._header_state == DirState.IN_MEMORY_MODIFIED
2549
or self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2551
if self._dirblock_state == DirState.IN_MEMORY_HASH_MODIFIED:
2552
if self._worth_saving_limit == -1:
2553
# We never save hash changes when the limit is -1
2555
# If we're using smart saving and only a small number of
2556
# entries have changed their hash, don't bother saving. John has
2557
# suggested using a heuristic here based on the size of the
2558
# changed files and/or tree. For now, we go with a configurable
2559
# number of changes, keeping the calculation time
2560
# as low overhead as possible. (This also keeps all existing
2561
# tests passing as the default is 0, i.e. always save.)
2562
if len(self._known_hash_changes) >= self._worth_saving_limit:
2318
2566
def _set_data(self, parent_ids, dirblocks):
2319
2567
"""Set the full dirstate data in memory.
2463
2726
# mapping from path,id. We need to look up the correct path
2464
2727
# for the indexes from 0 to tree_index -1
2465
2728
new_details = []
2466
for lookup_index in xrange(tree_index):
2729
for lookup_index in range(tree_index):
2467
2730
# boundary case: this is the first occurence of file_id
2468
# so there are no id_indexs, possibly take this out of
2731
# so there are no id_indexes, possibly take this out of
2470
if not len(id_index[file_id]):
2733
if not len(entry_keys):
2471
2734
new_details.append(DirState.NULL_PARENT_DETAILS)
2473
2736
# grab any one entry, use it to find the right path.
2474
# TODO: optimise this to reduce memory use in highly
2475
# fragmented situations by reusing the relocation
2477
a_key = iter(id_index[file_id]).next()
2478
if by_path[a_key][lookup_index][0] in ('r', 'a'):
2479
# its a pointer or missing statement, use it as is.
2737
a_key = next(iter(entry_keys))
2738
if by_path[a_key][lookup_index][0] in (b'r', b'a'):
2739
# its a pointer or missing statement, use it as
2480
2741
new_details.append(by_path[a_key][lookup_index])
2482
2743
# we have the right key, make a pointer to it.
2483
real_path = ('/'.join(a_key[0:2])).strip('/')
2484
new_details.append(('r', real_path, 0, False, ''))
2744
real_path = (b'/'.join(a_key[0:2])).strip(b'/')
2745
new_details.append(st(b'r', real_path, 0, False,
2485
2747
new_details.append(self._inv_entry_to_details(entry))
2486
2748
new_details.extend(new_location_suffix)
2487
2749
by_path[new_entry_key] = new_details
2488
id_index[file_id].add(new_entry_key)
2750
self._add_to_id_index(id_index, new_entry_key)
2489
2751
# --- end generation of full tree mappings
2491
2753
# sort and output all the entries
2492
new_entries = self._sort_entries(by_path.items())
2754
new_entries = self._sort_entries(viewitems(by_path))
2493
2755
self._entries_to_current_state(new_entries)
2494
2756
self._parents = [rev_id for rev_id, tree in trees]
2495
2757
self._ghosts = list(ghosts)
2496
self._header_state = DirState.IN_MEMORY_MODIFIED
2497
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2758
self._mark_modified(header_modified=True)
2498
2759
self._id_index = id_index
2500
2761
def _sort_entries(self, entry_list):
2849
3148
if not present:
2850
3149
raise AssertionError('not present: %r', entry_key)
2851
3150
self._dirblocks[block_index][1][entry_index][1][0] = \
2852
('r', path_utf8, 0, False, '')
3151
(b'r', path_utf8, 0, False, b'')
2853
3152
# add a containing dirblock if needed.
2854
if new_details[0] == 'd':
2855
subdir_key = (osutils.pathjoin(*key[0:2]), '', '')
3153
if new_details[0] == b'd':
3154
# GZ 2017-06-09: Using pathjoin why?
3155
subdir_key = (osutils.pathjoin(*key[0:2]), b'', b'')
2856
3156
block_index, present = self._find_block_index_from_key(subdir_key)
2857
3157
if not present:
2858
3158
self._dirblocks.insert(block_index, (subdir_key[0], []))
2860
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
3160
self._mark_modified()
2862
3162
def _maybe_remove_row(self, block, index, id_index):
2863
3163
"""Remove index if it is absent or relocated across the row.
2865
3165
id_index is updated accordingly.
3166
:return: True if we removed the row, False otherwise
2867
3168
present_in_row = False
2868
3169
entry = block[index]
2869
3170
for column in entry[1]:
2870
if column[0] not in 'ar':
3171
if column[0] not in (b'a', b'r'):
2871
3172
present_in_row = True
2873
3174
if not present_in_row:
2874
3175
block.pop(index)
2875
id_index[entry[0][2]].remove(entry[0])
3176
self._remove_from_id_index(id_index, entry[0])
2877
3180
def _validate(self):
2878
3181
"""Check that invariants on the dirblock are correct.