20
20
lines by NL. The field delimiters are ommitted in the grammar, line delimiters
21
21
are not - this is done for clarity of reading. All string data is in utf8.
25
MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";
28
WHOLE_NUMBER = {digit}, digit;
30
REVISION_ID = a non-empty utf8 string;
32
dirstate format = header line, full checksum, row count, parent details,
33
ghost_details, entries;
34
header line = "#bazaar dirstate flat format 3", NL;
35
full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;
36
row count = "num_entries: ", WHOLE_NUMBER, NL;
37
parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
38
ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;
40
entry = entry_key, current_entry_details, {parent_entry_details};
41
entry_key = dirname, basename, fileid;
42
current_entry_details = common_entry_details, working_entry_details;
43
parent_entry_details = common_entry_details, history_entry_details;
44
common_entry_details = MINIKIND, fingerprint, size, executable
45
working_entry_details = packed_stat
46
history_entry_details = REVISION_ID;
49
fingerprint = a nonempty utf8 sequence with meaning defined by minikind.
51
Given this definition, the following is useful to know::
53
entry (aka row) - all the data for a given key.
54
entry[0]: The key (dirname, basename, fileid)
58
entry[1]: The tree(s) data for this path and id combination.
59
entry[1][0]: The current tree
60
entry[1][1]: The second tree
62
For an entry for a tree, we have (using tree 0 - current tree) to demonstrate::
64
entry[1][0][0]: minikind
65
entry[1][0][1]: fingerprint
67
entry[1][0][3]: executable
68
entry[1][0][4]: packed_stat
72
entry[1][1][4]: revision_id
23
MINIKIND = "f" | "d" | "l" | "a" | "r" | "t";
26
WHOLE_NUMBER = {digit}, digit;
28
REVISION_ID = a non-empty utf8 string;
30
dirstate format = header line, full checksum, row count, parent details,
31
ghost_details, entries;
32
header line = "#bazaar dirstate flat format 3", NL;
33
full checksum = "crc32: ", ["-"], WHOLE_NUMBER, NL;
34
row count = "num_entries: ", WHOLE_NUMBER, NL;
35
parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
36
ghost_details = WHOLE NUMBER, {REVISION_ID}*, NL;
38
entry = entry_key, current_entry_details, {parent_entry_details};
39
entry_key = dirname, basename, fileid;
40
current_entry_details = common_entry_details, working_entry_details;
41
parent_entry_details = common_entry_details, history_entry_details;
42
common_entry_details = MINIKIND, fingerprint, size, executable
43
working_entry_details = packed_stat
44
history_entry_details = REVISION_ID;
47
fingerprint = a nonempty utf8 sequence with meaning defined by minikind.
49
Given this definition, the following is useful to know:
50
entry (aka row) - all the data for a given key.
51
entry[0]: The key (dirname, basename, fileid)
55
entry[1]: The tree(s) data for this path and id combination.
56
entry[1][0]: The current tree
57
entry[1][1]: The second tree
59
For an entry for a tree, we have (using tree 0 - current tree) to demonstrate:
60
entry[1][0][0]: minikind
61
entry[1][0][1]: fingerprint
63
entry[1][0][3]: executable
64
entry[1][0][4]: packed_stat
66
entry[1][1][4]: revision_id
74
68
There may be multiple rows at the root, one per id present in the root, so the
75
in memory root row is now::
77
self._dirblocks[0] -> ('', [entry ...]),
79
and the entries in there are::
83
entries[0][2]: file_id
84
entries[1][0]: The tree data for the current tree for this fileid at /
89
b'r' is a relocated entry: This path is not present in this tree with this
90
id, but the id can be found at another location. The fingerprint is
91
used to point to the target location.
92
b'a' is an absent entry: In that tree the id is not present at this path.
93
b'd' is a directory entry: This path in this tree is a directory with the
94
current file id. There is no fingerprint for directories.
95
b'f' is a file entry: As for directory, but it's a file. The fingerprint is
96
the sha1 value of the file's canonical form, i.e. after any read
97
filters have been applied to the convenience form stored in the working
99
b'l' is a symlink entry: As for directory, but a symlink. The fingerprint is
101
b't' is a reference to a nested subtree; the fingerprint is the referenced
69
in memory root row is now:
70
self._dirblocks[0] -> ('', [entry ...]),
71
and the entries in there are
74
entries[0][2]: file_id
75
entries[1][0]: The tree data for the current tree for this fileid at /
79
'r' is a relocated entry: This path is not present in this tree with this id,
80
but the id can be found at another location. The fingerprint is used to
81
point to the target location.
82
'a' is an absent entry: In that tree the id is not present at this path.
83
'd' is a directory entry: This path in this tree is a directory with the
84
current file id. There is no fingerprint for directories.
85
'f' is a file entry: As for directory, but it's a file. The fingerprint is the
86
sha1 value of the file's canonical form, i.e. after any read filters have
87
been applied to the convenience form stored in the working tree.
88
'l' is a symlink entry: As for directory, but a symlink. The fingerprint is the
90
't' is a reference to a nested subtree; the fingerprint is the referenced
106
The entries on disk and in memory are ordered according to the following keys::
95
The entries on disk and in memory are ordered according to the following keys:
108
97
directory, as a list of components
112
101
--- Format 1 had the following different definition: ---
116
rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,
117
WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,
119
PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,
120
basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,
102
rows = dirname, NULL, basename, NULL, MINIKIND, NULL, fileid_utf8, NULL,
103
WHOLE NUMBER (* size *), NULL, packed stat, NULL, sha1|symlink target,
105
PARENT ROW = NULL, revision_utf8, NULL, MINIKIND, NULL, dirname, NULL,
106
basename, NULL, WHOLE NUMBER (* size *), NULL, "y" | "n", NULL,
123
109
PARENT ROW's are emitted for every parent that is not in the ghosts details
124
110
line. That is, if the parents are foo, bar, baz, and the ghosts are bar, then
259
231
ERROR_DIRECTORY = 267
262
class DirstateCorrupt(errors.BzrError):
264
_fmt = "The dirstate file (%(state)s) appears to be corrupt: %(msg)s"
266
def __init__(self, state, msg):
267
errors.BzrError.__init__(self)
234
if not getattr(struct, '_compile', None):
235
# Cannot pre-compile the dirstate pack_stat
236
def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
237
"""Convert stat values into a packed representation."""
238
return _encode(_pack('>LLLLLL', st.st_size, int(st.st_mtime),
239
int(st.st_ctime), st.st_dev, st.st_ino & 0xFFFFFFFF,
242
# compile the struct compiler we need, so as to only do it once
243
from _struct import Struct
244
_compiled_pack = Struct('>LLLLLL').pack
245
def pack_stat(st, _encode=binascii.b2a_base64, _pack=_compiled_pack):
246
"""Convert stat values into a packed representation."""
247
# jam 20060614 it isn't really worth removing more entries if we
248
# are going to leave it in packed form.
249
# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
250
# With all entries, filesize is 5.9M and read time is maybe 280ms
251
# well within the noise margin
253
# base64 encoding always adds a final newline, so strip it off
254
# The current version
255
return _encode(_pack(st.st_size, int(st.st_mtime), int(st.st_ctime),
256
st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
257
# This is 0.060s / 1.520s faster by not encoding as much information
258
# return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]
259
# This is not strictly faster than _encode(_pack())[:-1]
260
# return '%X.%X.%X.%X.%X.%X' % (
261
# st.st_size, int(st.st_mtime), int(st.st_ctime),
262
# st.st_dev, st.st_ino, st.st_mode)
263
# Similar to the _encode(_pack('>LL'))
264
# return '%X.%X' % (int(st.st_mtime), st.st_mode)
272
267
class SHA1Provider(object):
450
411
self._last_block_index = None
451
412
self._last_entry_index = None
452
# The set of known hash changes
453
self._known_hash_changes = set()
454
# How many hash changed entries can we have without saving
455
self._worth_saving_limit = worth_saving_limit
456
self._config_stack = config.LocationStack(urlutils.local_path_to_url(
459
414
def __repr__(self):
460
415
return "%s(%r)" % \
461
416
(self.__class__.__name__, self._filename)
463
def _mark_modified(self, hash_changed_entries=None, header_modified=False):
464
"""Mark this dirstate as modified.
466
:param hash_changed_entries: if non-None, mark just these entries as
467
having their hash modified.
468
:param header_modified: mark the header modified as well, not just the
471
#trace.mutter_callsite(3, "modified hash entries: %s", hash_changed_entries)
472
if hash_changed_entries:
473
self._known_hash_changes.update([e[0] for e in hash_changed_entries])
474
if self._dirblock_state in (DirState.NOT_IN_MEMORY,
475
DirState.IN_MEMORY_UNMODIFIED):
476
# If the dirstate is already marked a IN_MEMORY_MODIFIED, then
477
# that takes precedence.
478
self._dirblock_state = DirState.IN_MEMORY_HASH_MODIFIED
480
# TODO: Since we now have a IN_MEMORY_HASH_MODIFIED state, we
481
# should fail noisily if someone tries to set
482
# IN_MEMORY_MODIFIED but we don't have a write-lock!
483
# We don't know exactly what changed so disable smart saving
484
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
486
self._header_state = DirState.IN_MEMORY_MODIFIED
488
def _mark_unmodified(self):
489
"""Mark this dirstate as unmodified."""
490
self._header_state = DirState.IN_MEMORY_UNMODIFIED
491
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
492
self._known_hash_changes = set()
494
418
def add(self, path, file_id, kind, stat, fingerprint):
495
419
"""Add a path to be tracked.
497
:param path: The path within the dirstate - b'' is the root, 'foo' is the
421
:param path: The path within the dirstate - '' is the root, 'foo' is the
498
422
path foo within the root, 'foo/bar' is the path bar within foo
500
424
:param file_id: The file id of the path being added.
1571
1483
if basename_utf8:
1572
1484
parents.add((dirname_utf8, inv_entry.parent_id))
1573
1485
if old_path is None:
1574
old_path_utf8 = None
1576
old_path_utf8 = encode(old_path)
1577
if old_path is None:
1578
adds.append((None, new_path_utf8, file_id,
1486
adds.append((None, encode(new_path), file_id,
1579
1487
inv_to_entry(inv_entry), True))
1580
1488
new_ids.add(file_id)
1581
1489
elif new_path is None:
1582
deletes.append((old_path_utf8, None, file_id, None, True))
1583
elif (old_path, new_path) == root_only:
1584
# change things in-place
1585
# Note: the case of a parent directory changing its file_id
1586
# tends to break optimizations here, because officially
1587
# the file has actually been moved, it just happens to
1588
# end up at the same path. If we can figure out how to
1589
# handle that case, we can avoid a lot of add+delete
1590
# pairs for objects that stay put.
1591
# elif old_path == new_path:
1592
changes.append((old_path_utf8, new_path_utf8, file_id,
1593
inv_to_entry(inv_entry)))
1490
deletes.append((encode(old_path), None, file_id, None, True))
1491
elif (old_path, new_path) != root_only:
1596
1493
# Because renames must preserve their children we must have
1597
1494
# processed all relocations and removes before hand. The sort
1600
1497
# pair will result in the deleted item being reinserted, or
1601
1498
# renamed items being reinserted twice - and possibly at the
1602
1499
# wrong place. Splitting into a delete/add pair also simplifies
1603
# the handling of entries with (b'f', ...), (b'r' ...) because
1604
# the target of the b'r' is old_path here, and we add that to
1500
# the handling of entries with ('f', ...), ('r' ...) because
1501
# the target of the 'r' is old_path here, and we add that to
1605
1502
# deletes, meaning that the add handler does not need to check
1606
# for b'r' items on every pass.
1503
# for 'r' items on every pass.
1607
1504
self._update_basis_apply_deletes(deletes)
1609
1506
# Split into an add/delete pair recursively.
1610
adds.append((old_path_utf8, new_path_utf8, file_id,
1611
inv_to_entry(inv_entry), False))
1507
adds.append((None, new_path_utf8, file_id,
1508
inv_to_entry(inv_entry), False))
1612
1509
# Expunge deletes that we've seen so that deleted/renamed
1613
1510
# children of a rename directory are handled correctly.
1614
new_deletes = reversed(list(
1615
self._iter_child_entries(1, old_path_utf8)))
1511
new_deletes = reversed(list(self._iter_child_entries(1,
1616
1513
# Remove the current contents of the tree at orig_path, and
1617
1514
# reinsert at the correct new path.
1618
1515
for entry in new_deletes:
1619
child_dirname, child_basename, child_file_id = entry[0]
1621
source_path = child_dirname + b'/' + child_basename
1517
source_path = entry[0][0] + '/' + entry[0][1]
1623
source_path = child_basename
1519
source_path = entry[0][1]
1624
1520
if new_path_utf8:
1626
new_path_utf8 + source_path[len(old_path_utf8):]
1521
target_path = new_path_utf8 + source_path[len(old_path):]
1628
if old_path_utf8 == b'':
1629
1524
raise AssertionError("cannot rename directory to"
1631
target_path = source_path[len(old_path_utf8) + 1:]
1526
target_path = source_path[len(old_path) + 1:]
1632
1527
adds.append((None, target_path, entry[0][2], entry[1][1], False))
1633
1528
deletes.append(
1634
1529
(source_path, target_path, entry[0][2], None, False))
1635
1530
deletes.append(
1636
(old_path_utf8, new_path_utf8, file_id, None, False))
1531
(encode(old_path), new_path, file_id, None, False))
1533
# changes to just the root should not require remove/insertion
1535
changes.append((encode(old_path), encode(new_path), file_id,
1536
inv_to_entry(inv_entry)))
1638
1537
self._check_delta_ids_absent(new_ids, delta, 1)
1640
1539
# Finish expunging deletes/first half of renames.
1697
1597
# Adds are accumulated partly from renames, so can be in any input
1698
1598
# order - sort it.
1699
# TODO: we may want to sort in dirblocks order. That way each entry
1700
# will end up in the same directory, allowing the _get_entry
1701
# fast-path for looking up 2 items in the same dir work.
1702
adds.sort(key=lambda x: x[1])
1703
1600
# adds is now in lexographic order, which places all parents before
1704
1601
# their children, so we can process it linearly.
1705
st = static_tuple.StaticTuple
1706
1603
for old_path, new_path, file_id, new_details, real_add in adds:
1707
dirname, basename = osutils.split(new_path)
1708
entry_key = st(dirname, basename, file_id)
1709
block_index, present = self._find_block_index_from_key(entry_key)
1711
# The block where we want to put the file is not present.
1712
# However, it might have just been an empty directory. Look for
1713
# the parent in the basis-so-far before throwing an error.
1714
parent_dir, parent_base = osutils.split(dirname)
1715
parent_block_idx, parent_entry_idx, _, parent_present = \
1716
self._get_block_entry_index(parent_dir, parent_base, 1)
1717
if not parent_present:
1718
self._raise_invalid(new_path, file_id,
1719
"Unable to find block for this record."
1720
" Was the parent added?")
1721
self._ensure_block(parent_block_idx, parent_entry_idx, dirname)
1723
block = self._dirblocks[block_index][1]
1724
entry_index, present = self._find_entry_index(entry_key, block)
1726
if old_path is not None:
1727
self._raise_invalid(new_path, file_id,
1728
'considered a real add but still had old_path at %s'
1731
entry = block[entry_index]
1732
basis_kind = entry[1][1][0]
1733
if basis_kind == b'a':
1734
entry[1][1] = new_details
1735
elif basis_kind == b'r':
1736
raise NotImplementedError()
1738
self._raise_invalid(new_path, file_id,
1739
"An entry was marked as a new add"
1740
" but the basis target already existed")
1742
# The exact key was not found in the block. However, we need to
1743
# check if there is a key next to us that would have matched.
1744
# We only need to check 2 locations, because there are only 2
1746
for maybe_index in range(entry_index-1, entry_index+1):
1747
if maybe_index < 0 or maybe_index >= len(block):
1749
maybe_entry = block[maybe_index]
1750
if maybe_entry[0][:2] != (dirname, basename):
1751
# Just a random neighbor
1753
if maybe_entry[0][2] == file_id:
1754
raise AssertionError(
1755
'_find_entry_index didnt find a key match'
1756
' but walking the data did, for %s'
1758
basis_kind = maybe_entry[1][1][0]
1759
if basis_kind not in (b'a', b'r'):
1760
self._raise_invalid(new_path, file_id,
1761
"we have an add record for path, but the path"
1762
" is already present with another file_id %s"
1763
% (maybe_entry[0][2],))
1765
entry = (entry_key, [DirState.NULL_PARENT_DETAILS,
1767
block.insert(entry_index, entry)
1769
active_kind = entry[1][0][0]
1770
if active_kind == b'a':
1771
# The active record shows up as absent, this could be genuine,
1772
# or it could be present at some other location. We need to
1774
id_index = self._get_id_index()
1775
# The id_index may not be perfectly accurate for tree1, because
1776
# we haven't been keeping it updated. However, it should be
1777
# fine for tree0, and that gives us enough info for what we
1779
keys = id_index.get(file_id, ())
1781
block_i, entry_i, d_present, f_present = \
1782
self._get_block_entry_index(key[0], key[1], 0)
1785
active_entry = self._dirblocks[block_i][1][entry_i]
1786
if (active_entry[0][2] != file_id):
1787
# Some other file is at this path, we don't need to
1790
real_active_kind = active_entry[1][0][0]
1791
if real_active_kind in (b'a', b'r'):
1792
# We found a record, which was not *this* record,
1793
# which matches the file_id, but is not actually
1794
# present. Something seems *really* wrong.
1795
self._raise_invalid(new_path, file_id,
1796
"We found a tree0 entry that doesnt make sense")
1797
# Now, we've found a tree0 entry which matches the file_id
1798
# but is at a different location. So update them to be
1800
active_dir, active_name = active_entry[0][:2]
1802
active_path = active_dir + b'/' + active_name
1804
active_path = active_name
1805
active_entry[1][1] = st(b'r', new_path, 0, False, b'')
1806
entry[1][0] = st(b'r', active_path, 0, False, b'')
1807
elif active_kind == b'r':
1808
raise NotImplementedError()
1810
new_kind = new_details[0]
1811
if new_kind == b'd':
1812
self._ensure_block(block_index, entry_index, new_path)
1604
# the entry for this file_id must be in tree 0.
1605
entry = self._get_entry(0, file_id, new_path)
1606
if entry[0] is None or entry[0][2] != file_id:
1607
self._changes_aborted = True
1608
raise errors.InconsistentDelta(new_path, file_id,
1609
'working tree does not contain new entry')
1610
if real_add and entry[1][1][0] not in absent:
1611
self._changes_aborted = True
1612
raise errors.InconsistentDelta(new_path, file_id,
1613
'The entry was considered to be a genuinely new record,'
1614
' but there was already an old record for it.')
1615
# We don't need to update the target of an 'r' because the handling
1616
# of renames turns all 'r' situations into a delete at the original
1618
entry[1][1] = new_details
1814
1620
def _update_basis_apply_changes(self, changes):
1815
1621
"""Apply a sequence of changes to tree 1 during update_basis_by_delta.
1840
1653
null = DirState.NULL_PARENT_DETAILS
1841
1654
for old_path, new_path, file_id, _, real_delete in deletes:
1842
1655
if real_delete != (new_path is None):
1843
self._raise_invalid(old_path, file_id, "bad delete delta")
1656
self._changes_aborted = True
1657
raise AssertionError("bad delete delta")
1844
1658
# the entry for this file_id must be in tree 1.
1845
1659
dirname, basename = osutils.split(old_path)
1846
1660
block_index, entry_index, dir_present, file_present = \
1847
1661
self._get_block_entry_index(dirname, basename, 1)
1848
1662
if not file_present:
1849
self._raise_invalid(old_path, file_id,
1663
self._changes_aborted = True
1664
raise errors.InconsistentDelta(old_path, file_id,
1850
1665
'basis tree does not contain removed entry')
1851
1666
entry = self._dirblocks[block_index][1][entry_index]
1852
# The state of the entry in the 'active' WT
1853
active_kind = entry[1][0][0]
1854
1667
if entry[0][2] != file_id:
1855
self._raise_invalid(old_path, file_id,
1668
self._changes_aborted = True
1669
raise errors.InconsistentDelta(old_path, file_id,
1856
1670
'mismatched file_id in tree 1')
1858
old_kind = entry[1][1][0]
1859
if active_kind in b'ar':
1860
# The active tree doesn't have this file_id.
1861
# The basis tree is changing this record. If this is a
1862
# rename, then we don't want the record here at all
1863
# anymore. If it is just an in-place change, we want the
1864
# record here, but we'll add it if we need to. So we just
1866
if active_kind == b'r':
1867
active_path = entry[1][0][1]
1868
active_entry = self._get_entry(0, file_id, active_path)
1869
if active_entry[1][1][0] != b'r':
1870
self._raise_invalid(old_path, file_id,
1871
"Dirstate did not have matching rename entries")
1872
elif active_entry[1][0][0] in b'ar':
1873
self._raise_invalid(old_path, file_id,
1874
"Dirstate had a rename pointing at an inactive"
1876
active_entry[1][1] = null
1672
if entry[1][0][0] != 'a':
1673
self._changes_aborted = True
1674
raise errors.InconsistentDelta(old_path, file_id,
1675
'This was marked as a real delete, but the WT state'
1676
' claims that it still exists and is versioned.')
1877
1677
del self._dirblocks[block_index][1][entry_index]
1878
if old_kind == b'd':
1879
# This was a directory, and the active tree says it
1880
# doesn't exist, and now the basis tree says it doesn't
1881
# exist. Remove its dirblock if present
1883
present) = self._find_block_index_from_key(
1884
(old_path, b'', b''))
1886
dir_block = self._dirblocks[dir_block_index][1]
1888
# This entry is empty, go ahead and just remove it
1889
del self._dirblocks[dir_block_index]
1891
# There is still an active record, so just mark this
1894
block_i, entry_i, d_present, f_present = \
1895
self._get_block_entry_index(old_path, b'', 1)
1897
dir_block = self._dirblocks[block_i][1]
1898
for child_entry in dir_block:
1899
child_basis_kind = child_entry[1][1][0]
1900
if child_basis_kind not in b'ar':
1901
self._raise_invalid(old_path, file_id,
1902
"The file id was deleted but its children were "
1679
if entry[1][0][0] == 'a':
1680
self._changes_aborted = True
1681
raise errors.InconsistentDelta(old_path, file_id,
1682
'The entry was considered a rename, but the source path'
1683
' is marked as absent.')
1684
# For whatever reason, we were asked to rename an entry
1685
# that was originally marked as deleted. This could be
1686
# because we are renaming the parent directory, and the WT
1687
# current state has the file marked as deleted.
1688
elif entry[1][0][0] == 'r':
1689
# implement the rename
1690
del self._dirblocks[block_index][1][entry_index]
1692
# it is being resurrected here, so blank it out temporarily.
1693
self._dirblocks[block_index][1][entry_index][1][1] = null
1905
1695
def _after_delta_check_parents(self, parents, index):
1906
1696
"""Check that parents required by the delta are all intact.
2358
2145
def _get_id_index(self):
2359
"""Get an id index of self._dirblocks.
2361
This maps from file_id => [(directory, name, file_id)] entries where
2362
that file_id appears in one of the trees.
2146
"""Get an id index of self._dirblocks."""
2364
2147
if self._id_index is None:
2366
2149
for key, tree_details in self._iter_entries():
2367
self._add_to_id_index(id_index, key)
2150
id_index.setdefault(key[2], set()).add(key)
2368
2151
self._id_index = id_index
2369
2152
return self._id_index
2371
def _add_to_id_index(self, id_index, entry_key):
2372
"""Add this entry to the _id_index mapping."""
2373
# This code used to use a set for every entry in the id_index. However,
2374
# it is *rare* to have more than one entry. So a set is a large
2375
# overkill. And even when we do, we won't ever have more than the
2376
# number of parent trees. Which is still a small number (rarely >2). As
2377
# such, we use a simple tuple, and do our own uniqueness checks. While
2378
# the 'in' check is O(N) since N is nicely bounded it shouldn't ever
2379
# cause quadratic failure.
2380
file_id = entry_key[2]
2381
entry_key = static_tuple.StaticTuple.from_sequence(entry_key)
2382
if file_id not in id_index:
2383
id_index[file_id] = static_tuple.StaticTuple(entry_key,)
2385
entry_keys = id_index[file_id]
2386
if entry_key not in entry_keys:
2387
id_index[file_id] = entry_keys + (entry_key,)
2389
def _remove_from_id_index(self, id_index, entry_key):
2390
"""Remove this entry from the _id_index mapping.
2392
It is an programming error to call this when the entry_key is not
2395
file_id = entry_key[2]
2396
entry_keys = list(id_index[file_id])
2397
entry_keys.remove(entry_key)
2398
id_index[file_id] = static_tuple.StaticTuple.from_sequence(entry_keys)
2400
2154
def _get_output_lines(self, lines):
2401
2155
"""Format lines for final output.
2406
2160
output_lines = [DirState.HEADER_FORMAT_3]
2407
lines.append(b'') # a final newline
2408
inventory_text = b'\0\n\0'.join(lines)
2409
output_lines.append(b'crc32: %d\n' % (zlib.crc32(inventory_text),))
2161
lines.append('') # a final newline
2162
inventory_text = '\0\n\0'.join(lines)
2163
output_lines.append('crc32: %s\n' % (zlib.crc32(inventory_text),))
2410
2164
# -3, 1 for num parents, 1 for ghosts, 1 for final newline
2411
2165
num_entries = len(lines)-3
2412
output_lines.append(b'num_entries: %d\n' % (num_entries,))
2166
output_lines.append('num_entries: %s\n' % (num_entries,))
2413
2167
output_lines.append(inventory_text)
2414
2168
return output_lines
2416
2170
def _make_deleted_row(self, fileid_utf8, parents):
2417
2171
"""Return a deleted row for fileid_utf8."""
2418
return (b'/', b'RECYCLED.BIN', b'file', fileid_utf8, 0, DirState.NULLSTAT,
2172
return ('/', 'RECYCLED.BIN', 'file', fileid_utf8, 0, DirState.NULLSTAT,
2421
2175
def _num_present_parents(self):
2422
2176
"""The number of parent entries in each record row."""
2423
2177
return len(self._parents) - len(self._ghosts)
2426
def on_file(cls, path, sha1_provider=None, worth_saving_limit=0):
2180
def on_file(path, sha1_provider=None):
2427
2181
"""Construct a DirState on the file at path "path".
2429
2183
:param path: The path at which the dirstate file on disk should live.
2430
2184
:param sha1_provider: an object meeting the SHA1Provider interface.
2431
2185
If None, a DefaultSHA1Provider is used.
2432
:param worth_saving_limit: when the exact number of hash changed
2433
entries is known, only bother saving the dirstate if more than
2434
this count of entries have changed. -1 means never save.
2435
2186
:return: An unlocked DirState object, associated with the given path.
2437
2188
if sha1_provider is None:
2438
2189
sha1_provider = DefaultSHA1Provider()
2439
result = cls(path, sha1_provider,
2440
worth_saving_limit=worth_saving_limit)
2190
result = DirState(path, sha1_provider)
2443
2193
def _read_dirblocks_if_needed(self):
2493
2243
raise errors.BzrError(
2494
2244
'invalid header line: %r' % (header,))
2495
2245
crc_line = self._state_file.readline()
2496
if not crc_line.startswith(b'crc32: '):
2246
if not crc_line.startswith('crc32: '):
2497
2247
raise errors.BzrError('missing crc32 checksum: %r' % crc_line)
2498
self.crc_expected = int(crc_line[len(b'crc32: '):-1])
2248
self.crc_expected = int(crc_line[len('crc32: '):-1])
2499
2249
num_entries_line = self._state_file.readline()
2500
if not num_entries_line.startswith(b'num_entries: '):
2250
if not num_entries_line.startswith('num_entries: '):
2501
2251
raise errors.BzrError('missing num_entries line')
2502
self._num_entries = int(num_entries_line[len(b'num_entries: '):-1])
2252
self._num_entries = int(num_entries_line[len('num_entries: '):-1])
2504
def sha1_from_stat(self, path, stat_result):
2254
def sha1_from_stat(self, path, stat_result, _pack_stat=pack_stat):
2505
2255
"""Find a sha1 given a stat lookup."""
2506
return self._get_packed_stat_index().get(pack_stat(stat_result), None)
2256
return self._get_packed_stat_index().get(_pack_stat(stat_result), None)
2508
2258
def _get_packed_stat_index(self):
2509
2259
"""Get a packed_stat index of self._dirblocks."""
2510
2260
if self._packed_stat_index is None:
2512
2262
for key, tree_details in self._iter_entries():
2513
if tree_details[0][0] == b'f':
2263
if tree_details[0][0] == 'f':
2514
2264
index[tree_details[0][4]] = tree_details[0][1]
2515
2265
self._packed_stat_index = index
2516
2266
return self._packed_stat_index
2535
2285
trace.mutter('Not saving DirState because '
2536
2286
'_changes_aborted is set.')
2538
# TODO: Since we now distinguish IN_MEMORY_MODIFIED from
2539
# IN_MEMORY_HASH_MODIFIED, we should only fail quietly if we fail
2540
# to save an IN_MEMORY_HASH_MODIFIED, and fail *noisily* if we
2541
# fail to save IN_MEMORY_MODIFIED
2542
if not self._worth_saving():
2288
if (self._header_state == DirState.IN_MEMORY_MODIFIED or
2289
self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2545
grabbed_write_lock = False
2546
if self._lock_state != 'w':
2547
grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
2548
# Switch over to the new lock, as the old one may be closed.
2549
# TODO: jam 20070315 We should validate the disk file has
2550
# not changed contents, since temporary_write_lock may
2551
# not be an atomic operation.
2552
self._lock_token = new_lock
2553
self._state_file = new_lock.f
2554
if not grabbed_write_lock:
2555
# We couldn't grab a write lock, so we switch back to a read one
2558
lines = self.get_lines()
2559
self._state_file.seek(0)
2560
self._state_file.writelines(lines)
2561
self._state_file.truncate()
2562
self._state_file.flush()
2563
self._maybe_fdatasync()
2564
self._mark_unmodified()
2566
if grabbed_write_lock:
2567
self._lock_token = self._lock_token.restore_read_lock()
2568
self._state_file = self._lock_token.f
2291
grabbed_write_lock = False
2292
if self._lock_state != 'w':
2293
grabbed_write_lock, new_lock = self._lock_token.temporary_write_lock()
2294
# Switch over to the new lock, as the old one may be closed.
2569
2295
# TODO: jam 20070315 We should validate the disk file has
2570
# not changed contents. Since restore_read_lock may
2296
# not changed contents. Since temporary_write_lock may
2571
2297
# not be an atomic operation.
2573
def _maybe_fdatasync(self):
2574
"""Flush to disk if possible and if not configured off."""
2575
if self._config_stack.get('dirstate.fdatasync'):
2576
osutils.fdatasync(self._state_file.fileno())
2578
def _worth_saving(self):
2579
"""Is it worth saving the dirstate or not?"""
2580
if (self._header_state == DirState.IN_MEMORY_MODIFIED
2581
or self._dirblock_state == DirState.IN_MEMORY_MODIFIED):
2583
if self._dirblock_state == DirState.IN_MEMORY_HASH_MODIFIED:
2584
if self._worth_saving_limit == -1:
2585
# We never save hash changes when the limit is -1
2587
# If we're using smart saving and only a small number of
2588
# entries have changed their hash, don't bother saving. John has
2589
# suggested using a heuristic here based on the size of the
2590
# changed files and/or tree. For now, we go with a configurable
2591
# number of changes, keeping the calculation time
2592
# as low overhead as possible. (This also keeps all existing
2593
# tests passing as the default is 0, i.e. always save.)
2594
if len(self._known_hash_changes) >= self._worth_saving_limit:
2298
self._lock_token = new_lock
2299
self._state_file = new_lock.f
2300
if not grabbed_write_lock:
2301
# We couldn't grab a write lock, so we switch back to a read one
2304
self._state_file.seek(0)
2305
self._state_file.writelines(self.get_lines())
2306
self._state_file.truncate()
2307
self._state_file.flush()
2308
self._header_state = DirState.IN_MEMORY_UNMODIFIED
2309
self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
2311
if grabbed_write_lock:
2312
self._lock_token = self._lock_token.restore_read_lock()
2313
self._state_file = self._lock_token.f
2314
# TODO: jam 20070315 We should validate the disk file has
2315
# not changed contents. Since restore_read_lock may
2316
# not be an atomic operation.
2598
2318
def _set_data(self, parent_ids, dirblocks):
2599
2319
"""Set the full dirstate data in memory.
2758
2463
# mapping from path,id. We need to look up the correct path
2759
2464
# for the indexes from 0 to tree_index -1
2760
2465
new_details = []
2761
for lookup_index in range(tree_index):
2466
for lookup_index in xrange(tree_index):
2762
2467
# boundary case: this is the first occurence of file_id
2763
# so there are no id_indexes, possibly take this out of
2468
# so there are no id_indexs, possibly take this out of
2765
if not len(entry_keys):
2470
if not len(id_index[file_id]):
2766
2471
new_details.append(DirState.NULL_PARENT_DETAILS)
2768
2473
# grab any one entry, use it to find the right path.
2769
a_key = next(iter(entry_keys))
2770
if by_path[a_key][lookup_index][0] in (b'r', b'a'):
2771
# its a pointer or missing statement, use it as
2474
# TODO: optimise this to reduce memory use in highly
2475
# fragmented situations by reusing the relocation
2477
a_key = iter(id_index[file_id]).next()
2478
if by_path[a_key][lookup_index][0] in ('r', 'a'):
2479
# its a pointer or missing statement, use it as is.
2773
2480
new_details.append(by_path[a_key][lookup_index])
2775
2482
# we have the right key, make a pointer to it.
2776
real_path = (b'/'.join(a_key[0:2])).strip(b'/')
2777
new_details.append(st(b'r', real_path, 0, False,
2483
real_path = ('/'.join(a_key[0:2])).strip('/')
2484
new_details.append(('r', real_path, 0, False, ''))
2779
2485
new_details.append(self._inv_entry_to_details(entry))
2780
2486
new_details.extend(new_location_suffix)
2781
2487
by_path[new_entry_key] = new_details
2782
self._add_to_id_index(id_index, new_entry_key)
2488
id_index[file_id].add(new_entry_key)
2783
2489
# --- end generation of full tree mappings
2785
2491
# sort and output all the entries
2786
new_entries = self._sort_entries(viewitems(by_path))
2492
new_entries = self._sort_entries(by_path.items())
2787
2493
self._entries_to_current_state(new_entries)
2788
2494
self._parents = [rev_id for rev_id, tree in trees]
2789
2495
self._ghosts = list(ghosts)
2790
self._mark_modified(header_modified=True)
2496
self._header_state = DirState.IN_MEMORY_MODIFIED
2497
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2791
2498
self._id_index = id_index
2793
2500
def _sort_entries(self, entry_list):
3180
2849
if not present:
3181
2850
raise AssertionError('not present: %r', entry_key)
3182
2851
self._dirblocks[block_index][1][entry_index][1][0] = \
3183
(b'r', path_utf8, 0, False, b'')
2852
('r', path_utf8, 0, False, '')
3184
2853
# add a containing dirblock if needed.
3185
if new_details[0] == b'd':
3186
# GZ 2017-06-09: Using pathjoin why?
3187
subdir_key = (osutils.pathjoin(*key[0:2]), b'', b'')
2854
if new_details[0] == 'd':
2855
subdir_key = (osutils.pathjoin(*key[0:2]), '', '')
3188
2856
block_index, present = self._find_block_index_from_key(subdir_key)
3189
2857
if not present:
3190
2858
self._dirblocks.insert(block_index, (subdir_key[0], []))
3192
self._mark_modified()
2860
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
3194
2862
def _maybe_remove_row(self, block, index, id_index):
3195
2863
"""Remove index if it is absent or relocated across the row.
3197
2865
id_index is updated accordingly.
3198
:return: True if we removed the row, False otherwise
3200
2867
present_in_row = False
3201
2868
entry = block[index]
3202
2869
for column in entry[1]:
3203
if column[0] not in (b'a', b'r'):
2870
if column[0] not in 'ar':
3204
2871
present_in_row = True
3206
2873
if not present_in_row:
3207
2874
block.pop(index)
3208
self._remove_from_id_index(id_index, entry[0])
2875
id_index[entry[0][2]].remove(entry[0])
3212
2877
def _validate(self):
3213
2878
"""Check that invariants on the dirblock are correct.