716
714
class CommonInventory(object):
717
"""Basic inventory logic, defined in terms of primitives like has_id."""
715
"""Basic inventory logic, defined in terms of primitives like has_id.
717
An inventory is the metadata about the contents of a tree.
719
This is broadly a map from file_id to entries such as directories, files,
720
symlinks and tree references. Each entry maintains its own metadata like
721
SHA1 and length for files, or children for a directory.
723
Entries can be looked up either by path or by file_id.
725
InventoryEntry objects must not be modified after they are
726
inserted, other than through the Inventory API.
719
729
def __contains__(self, file_id):
720
730
"""True if this entry contains a file with given id.
1023
1033
class Inventory(CommonInventory):
1024
"""Inventory of versioned files in a tree.
1026
This describes which file_id is present at each point in the tree,
1027
and possibly the SHA-1 or other information about the file.
1028
Entries can be looked up either by path or by file_id.
1030
The inventory represents a typical unix file tree, with
1031
directories containing files and subdirectories. We never store
1032
the full path to a file, because renaming a directory implicitly
1033
moves all of its contents. This class internally maintains a
1034
"""Mutable dict based in-memory inventory.
1036
We never store the full path to a file, because renaming a directory
1037
implicitly moves all of its contents. This class internally maintains a
1034
1038
lookup tree that allows the children under a directory to be
1035
1039
returned quickly.
1037
InventoryEntry objects must not be modified after they are
1038
inserted, other than through the Inventory API.
1040
1041
>>> inv = Inventory()
1041
1042
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
1042
1043
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT', sha1=None, len=None, revision=None)
1043
1044
>>> inv['123-123'].name
1046
May be treated as an iterator or set to look up file ids:
1047
Id's may be looked up from paths:
1048
>>> bool(inv.path2id('hello.c'))
1049
>>> inv.path2id('hello.c')
1050
1051
>>> '123-123' in inv
1053
May also look up by name:
1054
There are iterators over the contents:
1055
>>> [x[0] for x in inv.iter_entries()]
1056
>>> [entry[0] for entry in inv.iter_entries()]
1056
1057
['', u'hello.c']
1057
>>> inv = Inventory('TREE_ROOT-12345678-12345678')
1058
>>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
1059
Traceback (most recent call last):
1060
BzrError: parent_id {TREE_ROOT} not in inventory
1061
>>> inv.add(InventoryFile('123-123', 'hello.c', 'TREE_ROOT-12345678-12345678'))
1062
InventoryFile('123-123', 'hello.c', parent_id='TREE_ROOT-12345678-12345678', sha1=None, len=None, revision=None)
1064
1060
def __init__(self, root_id=ROOT_ID, revision_id=None):
1065
1061
"""Create or read an inventory.
1090
1086
def apply_delta(self, delta):
1091
1087
"""Apply a delta to this inventory.
1089
See the inventory developers documentation for the theory behind
1092
If delta application fails the inventory is left in an indeterminate
1093
state and must not be used.
1093
1095
:param delta: A list of changes to apply. After all the changes are
1094
1096
applied the final inventory must be internally consistent, but it
1095
1097
is ok to supply changes which, if only half-applied would have an
1127
1129
# Check that the delta is legal. It would be nice if this could be
1128
1130
# done within the loops below but it's safer to validate the delta
1129
# before starting to mutate the inventory.
1130
unique_file_ids = set([f for _, _, f, _ in delta])
1131
if len(unique_file_ids) != len(delta):
1132
raise AssertionError("a file-id appears multiple times in %r"
1131
# before starting to mutate the inventory, as there isn't a rollback
1133
list(_check_delta_unique_ids(_check_delta_unique_new_paths(
1134
_check_delta_unique_old_paths(_check_delta_ids_match_entry(
1135
_check_delta_ids_are_valid(
1136
_check_delta_new_path_entry_both_or_None(
1137
1140
# Remove all affected items which were in the original inventory,
1140
1143
# modified children remaining by the time we examine it.
1141
1144
for old_path, file_id in sorted(((op, f) for op, np, f, e in delta
1142
1145
if op is not None), reverse=True):
1143
if file_id not in self:
1146
1146
# Preserve unaltered children of file_id for later reinsertion.
1147
1147
file_id_children = getattr(self[file_id], 'children', {})
1148
1148
if len(file_id_children):
1149
1149
children[file_id] = file_id_children
1150
if self.id2path(file_id) != old_path:
1151
raise errors.InconsistentDelta(old_path, file_id,
1152
"Entry was at wrong other path %r." % self.id2path(file_id))
1150
1153
# Remove file_id and the unaltered children. If file_id is not
1151
1154
# being deleted it will be reinserted back later.
1152
1155
self.remove_recursive_id(file_id)
1155
1158
# longest, ensuring that items which were modified and whose parents in
1156
1159
# the resulting inventory were also modified, are inserted after their
1158
for new_path, new_entry in sorted((np, e) for op, np, f, e in
1161
for new_path, f, new_entry in sorted((np, f, e) for op, np, f, e in
1159
1162
delta if np is not None):
1160
1163
if new_entry.kind == 'directory':
1161
1164
# Pop the child which to allow detection of children whose
1166
1169
replacement.revision = new_entry.revision
1167
1170
replacement.children = children.pop(replacement.file_id, {})
1168
1171
new_entry = replacement
1174
except errors.DuplicateFileId:
1175
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1176
"New id is already present in target.")
1177
except AttributeError:
1178
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1179
"Parent is not a directory.")
1180
if self.id2path(new_entry.file_id) != new_path:
1181
raise errors.InconsistentDelta(new_path, new_entry.file_id,
1182
"New path is not consistent with parent path.")
1170
1183
if len(children):
1171
1184
# Get the parent id that was deleted
1172
1185
parent_id, children = children.popitem()
1252
1265
To add a file to a branch ready to be committed, use Branch.add,
1253
1266
which calls this.
1255
Returns the new entry object.
1257
1270
if entry.file_id in self._byid:
1258
1271
raise errors.DuplicateFileId(entry.file_id,
1259
1272
self._byid[entry.file_id])
1261
1273
if entry.parent_id is None:
1262
1274
self.root = entry
1265
1277
parent = self._byid[entry.parent_id]
1266
1278
except KeyError:
1267
raise BzrError("parent_id {%s} not in inventory" %
1279
raise errors.InconsistentDelta("<unknown>", entry.parent_id,
1280
"Parent not in inventory.")
1270
1281
if entry.name in parent.children:
1271
raise BzrError("%s is already versioned" %
1272
osutils.pathjoin(self.id2path(parent.file_id),
1273
entry.name).encode('utf-8'))
1282
raise errors.InconsistentDelta(
1283
self.id2path(parent.children[entry.name].file_id),
1285
"Path already versioned")
1274
1286
parent.children[entry.name] = entry
1275
1287
return self._add_child(entry)
1469
1481
self._fileid_to_entry_cache = {}
1470
1482
self._path_to_fileid_cache = {}
1471
1483
self._search_key_name = search_key_name
1486
def __eq__(self, other):
1487
"""Compare two sets by comparing their contents."""
1488
if not isinstance(other, CHKInventory):
1489
return NotImplemented
1491
this_key = self.id_to_entry.key()
1492
other_key = other.id_to_entry.key()
1493
this_pid_key = self.parent_id_basename_to_file_id.key()
1494
other_pid_key = other.parent_id_basename_to_file_id.key()
1495
if None in (this_key, this_pid_key, other_key, other_pid_key):
1497
return this_key == other_key and this_pid_key == other_pid_key
1473
1499
def _entry_to_bytes(self, entry):
1474
1500
"""Serialise entry as a single bytestring.
1586
1616
search_key_func=search_key_func)
1587
1617
result.id_to_entry._ensure_root()
1588
1618
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1589
parent_id_basename_delta = []
1619
# Change to apply to the parent_id_basename delta. The dict maps
1620
# (parent_id, basename) -> (old_key, new_value). We use a dict because
1621
# when a path has its id replaced (e.g. the root is changed, or someone
1622
# does bzr mv a b, bzr mv c a, we should output a single change to this
1623
# map rather than two.
1624
parent_id_basename_delta = {}
1590
1625
if self.parent_id_basename_to_file_id is not None:
1591
1626
result.parent_id_basename_to_file_id = chk_map.CHKMap(
1592
1627
self.parent_id_basename_to_file_id._store,
1602
1637
result.parent_id_basename_to_file_id = None
1603
1638
result.root_id = self.root_id
1604
1639
id_to_entry_delta = []
1640
# inventory_delta is only traversed once, so we just update the
1642
# Check for repeated file ids
1643
inventory_delta = _check_delta_unique_ids(inventory_delta)
1644
# Repeated old paths
1645
inventory_delta = _check_delta_unique_old_paths(inventory_delta)
1646
# Check for repeated new paths
1647
inventory_delta = _check_delta_unique_new_paths(inventory_delta)
1648
# Check for entries that don't match the fileid
1649
inventory_delta = _check_delta_ids_match_entry(inventory_delta)
1650
# Check for nonsense fileids
1651
inventory_delta = _check_delta_ids_are_valid(inventory_delta)
1652
# Check for new_path <-> entry consistency
1653
inventory_delta = _check_delta_new_path_entry_both_or_None(
1655
# All changed entries need to have their parents be directories and be
1656
# at the right path. This set contains (path, id) tuples.
1658
# When we delete an item, all the children of it must be either deleted
1659
# or altered in their own right. As we batch process the change via
1660
# CHKMap.apply_delta, we build a set of things to use to validate the
1605
1664
for old_path, new_path, file_id, entry in inventory_delta:
1606
1665
# file id changes
1607
1666
if new_path == '':
1616
1675
del result._path_to_fileid_cache[old_path]
1617
1676
except KeyError:
1678
deletes.add(file_id)
1620
1680
new_key = (file_id,)
1621
1681
new_value = result._entry_to_bytes(entry)
1622
1682
# Update caches. It's worth doing this whether
1623
1683
# we're propagating the old caches or not.
1624
1684
result._path_to_fileid_cache[new_path] = file_id
1685
parents.add((split(new_path)[0], entry.parent_id))
1625
1686
if old_path is None:
1628
1689
old_key = (file_id,)
1690
if self.id2path(file_id) != old_path:
1691
raise errors.InconsistentDelta(old_path, file_id,
1692
"Entry was at wrong other path %r." %
1693
self.id2path(file_id))
1694
altered.add(file_id)
1629
1695
id_to_entry_delta.append((old_key, new_key, new_value))
1630
1696
if result.parent_id_basename_to_file_id is not None:
1631
1697
# parent_id, basename changes
1641
1707
new_key = self._parent_id_basename_key(entry)
1642
1708
new_value = file_id
1709
# If the two keys are the same, the value will be unchanged
1710
# as its always the file id for this entry.
1643
1711
if old_key != new_key:
1644
# If the two keys are the same, the value will be unchanged
1645
# as its always the file id.
1646
parent_id_basename_delta.append((old_key, new_key, new_value))
1712
# Transform a change into explicit delete/add preserving
1713
# a possible match on the key from a different file id.
1714
if old_key is not None:
1715
parent_id_basename_delta.setdefault(
1716
old_key, [None, None])[0] = old_key
1717
if new_key is not None:
1718
parent_id_basename_delta.setdefault(
1719
new_key, [None, None])[1] = new_value
1720
# validate that deletes are complete.
1721
for file_id in deletes:
1722
entry = self[file_id]
1723
if entry.kind != 'directory':
1725
# This loop could potentially be better by using the id_basename
1726
# map to just get the child file ids.
1727
for child in entry.children.values():
1728
if child.file_id not in altered:
1729
raise errors.InconsistentDelta(self.id2path(child.file_id),
1730
child.file_id, "Child not deleted or reparented when "
1647
1732
result.id_to_entry.apply_delta(id_to_entry_delta)
1648
1733
if parent_id_basename_delta:
1649
result.parent_id_basename_to_file_id.apply_delta(parent_id_basename_delta)
1734
# Transform the parent_id_basename delta data into a linear delta
1735
# with only one record for a given key. Optimally this would allow
1736
# re-keying, but its simpler to just output that as a delete+add
1737
# to spend less time calculating the delta.
1739
for key, (old_key, value) in parent_id_basename_delta.iteritems():
1740
if value is not None:
1741
delta_list.append((old_key, key, value))
1743
delta_list.append((old_key, None, None))
1744
result.parent_id_basename_to_file_id.apply_delta(delta_list)
1745
parents.discard(('', None))
1746
for parent_path, parent in parents:
1748
if result[parent].kind != 'directory':
1749
raise errors.InconsistentDelta(result.id2path(parent), parent,
1750
'Not a directory, but given children')
1751
except errors.NoSuchId:
1752
raise errors.InconsistentDelta("<unknown>", parent,
1753
"Parent is not present in resulting inventory.")
1754
if result.path2id(parent_path) != parent:
1755
raise errors.InconsistentDelta(parent_path, parent,
1756
"Parent has wrong path %r." % result.path2id(parent_path))
1716
1823
:param maximum_size: The CHKMap node size limit.
1717
1824
:param search_key_name: The identifier for the search key function
1719
result = CHKInventory(search_key_name)
1826
result = klass(search_key_name)
1720
1827
result.revision_id = inventory.revision_id
1721
1828
result.root_id = inventory.root.file_id
1722
search_key_func = chk_map.search_key_registry.get(search_key_name)
1723
result.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func)
1724
result.id_to_entry._root_node.set_maximum_size(maximum_size)
1726
result.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
1727
None, search_key_func)
1728
result.parent_id_basename_to_file_id._root_node.set_maximum_size(
1730
result.parent_id_basename_to_file_id._root_node._key_width = 2
1731
parent_id_delta = []
1830
entry_to_bytes = result._entry_to_bytes
1831
parent_id_basename_key = result._parent_id_basename_key
1832
id_to_entry_dict = {}
1833
parent_id_basename_dict = {}
1732
1834
for path, entry in inventory.iter_entries():
1733
file_id_delta.append((None, (entry.file_id,),
1734
result._entry_to_bytes(entry)))
1735
parent_id_delta.append(
1736
(None, result._parent_id_basename_key(entry),
1738
result.id_to_entry.apply_delta(file_id_delta)
1739
result.parent_id_basename_to_file_id.apply_delta(parent_id_delta)
1835
id_to_entry_dict[(entry.file_id,)] = entry_to_bytes(entry)
1836
p_id_key = parent_id_basename_key(entry)
1837
parent_id_basename_dict[p_id_key] = entry.file_id
1839
result._populate_from_dicts(chk_store, id_to_entry_dict,
1840
parent_id_basename_dict, maximum_size=maximum_size)
1843
def _populate_from_dicts(self, chk_store, id_to_entry_dict,
1844
parent_id_basename_dict, maximum_size):
1845
search_key_func = chk_map.search_key_registry.get(self._search_key_name)
1846
root_key = chk_map.CHKMap.from_dict(chk_store, id_to_entry_dict,
1847
maximum_size=maximum_size, key_width=1,
1848
search_key_func=search_key_func)
1849
self.id_to_entry = chk_map.CHKMap(chk_store, root_key,
1851
root_key = chk_map.CHKMap.from_dict(chk_store,
1852
parent_id_basename_dict,
1853
maximum_size=maximum_size, key_width=2,
1854
search_key_func=search_key_func)
1855
self.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
1856
root_key, search_key_func)
1742
1858
def _parent_id_basename_key(self, entry):
1743
1859
"""Create a key for a entry in a parent_id_basename_to_file_id index."""
1744
1860
if entry.parent_id is not None:
1903
2019
def path2id(self, name):
1904
2020
"""See CommonInventory.path2id()."""
2021
# TODO: perhaps support negative hits?
1905
2022
result = self._path_to_fileid_cache.get(name, None)
1907
result = CommonInventory.path2id(self, name)
1908
self._path_to_fileid_cache[name] = result
2023
if result is not None:
2025
if isinstance(name, basestring):
2026
names = osutils.splitpath(name)
2029
current_id = self.root_id
2030
if current_id is None:
2032
parent_id_index = self.parent_id_basename_to_file_id
2033
for basename in names:
2034
# TODO: Cache each path we figure out in this function.
2035
basename_utf8 = basename.encode('utf8')
2036
key_filter = [(current_id, basename_utf8)]
2038
for (parent_id, name_utf8), file_id in parent_id_index.iteritems(
2039
key_filter=key_filter):
2040
if parent_id != current_id or name_utf8 != basename_utf8:
2041
raise errors.BzrError("corrupt inventory lookup! "
2042
"%r %r %r %r" % (parent_id, current_id, name_utf8,
2046
current_id = file_id
2047
self._path_to_fileid_cache[name] = current_id
1911
2050
def to_lines(self):
1912
2051
"""Serialise the inventory to lines."""
2046
2185
_NAME_RE = re.compile(r'^[^/\\]+$')
2048
2187
return bool(_NAME_RE.match(name))
2190
def _check_delta_unique_ids(delta):
2191
"""Decorate a delta and check that the file ids in it are unique.
2193
:return: A generator over delta.
2197
length = len(ids) + 1
2199
if len(ids) != length:
2200
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2205
def _check_delta_unique_new_paths(delta):
2206
"""Decorate a delta and check that the new paths in it are unique.
2208
:return: A generator over delta.
2212
length = len(paths) + 1
2214
if path is not None:
2216
if len(paths) != length:
2217
raise errors.InconsistentDelta(path, item[2], "repeated path")
2221
def _check_delta_unique_old_paths(delta):
2222
"""Decorate a delta and check that the old paths in it are unique.
2224
:return: A generator over delta.
2228
length = len(paths) + 1
2230
if path is not None:
2232
if len(paths) != length:
2233
raise errors.InconsistentDelta(path, item[2], "repeated path")
2237
def _check_delta_ids_are_valid(delta):
2238
"""Decorate a delta and check that the ids in it are valid.
2240
:return: A generator over delta.
2245
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2246
"entry with file_id None %r" % entry)
2247
if type(item[2]) != str:
2248
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2249
"entry with non bytes file_id %r" % entry)
2253
def _check_delta_ids_match_entry(delta):
2254
"""Decorate a delta and check that the ids in it match the entry.file_id.
2256
:return: A generator over delta.
2260
if entry is not None:
2261
if entry.file_id != item[2]:
2262
raise errors.InconsistentDelta(item[0] or item[1], item[2],
2263
"mismatched id with %r" % entry)
2267
def _check_delta_new_path_entry_both_or_None(delta):
2268
"""Decorate a delta and check that the new_path and entry are paired.
2270
:return: A generator over delta.
2275
if new_path is None and entry is not None:
2276
raise errors.InconsistentDelta(item[0], item[1],
2277
"Entry with no new_path")
2278
if new_path is not None and entry is None:
2279
raise errors.InconsistentDelta(new_path, item[1],
2280
"new_path with no entry")