1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Inventory delta serialisation.
19
See doc/developers/inventory.txt for the design and rationalisation.
21
In this module the interesting classes are:
22
- InventoryDelta - object to read/write journalled inventories.
25
__all__ = ['InventoryDelta']
27
from bzrlib import errors, lazy_regex
28
from bzrlib.osutils import basename, sha_string, sha_strings
29
from bzrlib import inventory
30
from bzrlib.revision import NULL_REVISION
31
from bzrlib.tsort import topo_sort
34
def _directory_content(entry):
35
"""Serialise the content component of entry which is a directory.
37
:param entry: An InventoryDirectory.
42
def _file_content(entry):
43
"""Serialise the content component of entry which is a file.
45
:param entry: An InventoryFile.
51
size_exec_sha = (entry.text_size, exec_bytes, entry.text_sha1)
52
if None in size_exec_sha:
53
raise errors.BzrError('Missing size or sha for %s' % entry.file_id)
54
return "file\x00%d\x00%s\x00%s" % size_exec_sha
57
def _link_content(entry):
58
"""Serialise the content component of entry which is a symlink.
60
:param entry: An InventoryLink.
62
target = entry.symlink_target
64
raise errors.BzrError('Missing target for %s' % entry.file_id)
65
return "link\x00%s" % target.encode('utf8')
68
def _reference_content(entry):
69
"""Serialise the content component of entry which is a tree-reference.
71
:param entry: A TreeReference.
73
tree_revision = entry.reference_revision
74
if tree_revision is None:
75
raise errors.BzrError('Missing reference revision for %s' % entry.file_id)
76
return "tree\x00%s" % tree_revision
79
def _dir_to_entry(content, name, parent_id, file_id, last_modified,
80
_type=inventory.InventoryDirectory):
81
"""Convert a dir content record to an InventoryDirectory."""
82
result = _type(file_id, name, parent_id)
83
result.revision = last_modified
87
def _file_to_entry(content, name, parent_id, file_id, last_modified,
88
_type=inventory.InventoryFile):
89
"""Convert a dir content record to an InventoryFile."""
90
result = _type(file_id, name, parent_id)
91
result.revision = last_modified
92
result.text_size = int(content[1])
93
result.text_sha1 = content[3]
95
result.executable = True
97
result.executable = False
101
def _link_to_entry(content, name, parent_id, file_id, last_modified,
102
_type=inventory.InventoryLink):
103
"""Convert a link content record to an InventoryLink."""
104
result = _type(file_id, name, parent_id)
105
result.revision = last_modified
106
result.symlink_target = content[1].decode('utf8')
110
def _tree_to_entry(content, name, parent_id, file_id, last_modified,
111
_type=inventory.TreeReference):
112
"""Convert a tree content record to a TreeReference."""
113
result = _type(file_id, name, parent_id)
114
result.revision = last_modified
115
result.reference_revision = content[1]
120
class InventoryDeltaSerializer(object):
121
"""Serialize and deserialize inventory deltas."""
123
FORMAT_1 = 'bzr inventory delta v1 (bzr 1.14)'
124
_file_ids_altered_regex = lazy_regex.lazy_compile(
125
'^(?P<path_utf8>[^\x00]+)\x00(?P<file_id>[^\x00]+)\x00[^\x00]*\x00'
126
'(?P<revision_id>[^\x00]+)\x00'
129
def __init__(self, versioned_root, tree_references):
130
"""Create an InventoryDelta.
132
:param versioned_root: If True, any root entry that is seen is expected
133
to be versioned, and root entries can have any fileid.
134
:param tree_references: If True support tree-reference entries.
136
self._versioned_root = versioned_root
137
self._tree_references = tree_references
138
self._entry_to_content = {
139
'directory': _directory_content,
140
'file': _file_content,
141
'symlink': _link_content,
144
self._entry_to_content['tree-reference'] = _reference_content
146
def delta_to_lines(self, old_name, new_name, delta_to_new):
147
"""Return a line sequence for delta_to_new.
149
:param old_name: A UTF8 revision id for the old inventory. May be
150
NULL_REVISION if there is no older inventory and delta_to_new
151
includes the entire inventory contents.
152
:param new_name: The version name of the inventory we create with this
154
:param delta_to_new: An inventory delta such as Inventory.apply_delta
156
:return: The serialised delta as lines.
158
lines = ['', '', '', '', '']
159
to_line = self._delta_item_to_line
160
for delta_item in delta_to_new:
161
lines.append(to_line(delta_item))
162
if lines[-1].__class__ != str:
163
raise errors.BzrError(
164
'to_line generated non-str output %r' % lines[-1])
166
lines[0] = "format: %s\n" % InventoryDeltaSerializer.FORMAT_1
167
lines[1] = "parent: %s\n" % old_name
168
lines[2] = "version: %s\n" % new_name
169
lines[3] = "versioned_root: %s\n" % self._serialize_bool(
170
self._versioned_root)
171
lines[4] = "tree_references: %s\n" % self._serialize_bool(
172
self._tree_references)
175
def _serialize_bool(self, value):
181
def _delta_item_to_line(self, delta_item):
182
"""Convert delta_item to a line."""
183
_, newpath, file_id, entry = delta_item
186
newpath_utf8 = 'None'
188
last_modified = NULL_REVISION
189
content = 'deleted\x00\x00'
191
# TODO: Test real-world utf8 cache hit rate. It may be a win.
192
newpath_utf8 = '/' + newpath.encode('utf8')
193
# Serialise None as ''
194
parent_id = entry.parent_id or ''
195
# Serialise unknown revisions as NULL_REVISION
196
last_modified = entry.revision
197
# special cases for /
198
if newpath_utf8 == '/' and not self._versioned_root:
199
if file_id != 'TREE_ROOT':
200
raise errors.BzrError(
201
'file_id %s is not TREE_ROOT for /' % file_id)
202
if last_modified is not None:
203
raise errors.BzrError(
204
'Version present for / in %s' % file_id)
205
last_modified = NULL_REVISION
206
if last_modified is None:
207
raise errors.BzrError("no version for fileid %s" % file_id)
208
content = self._entry_to_content[entry.kind](entry)
209
return ("%s\x00%s\x00%s\x00%s\x00%s\n" %
210
(newpath_utf8, file_id, parent_id, last_modified, content))
212
def _deserialize_bool(self, value):
215
elif value == "false":
218
raise errors.BzrError("value %r is not a bool" % (value,))
220
def parse_text_bytes(self, bytes):
221
"""Parse the text bytes of a journal entry.
223
:param bytes: The bytes to parse. This can be obtained by calling
224
delta_to_lines and then doing ''.join(delta_lines).
225
:return: (parent_id, new_id, inventory_delta)
227
lines = bytes.split('\n')[:-1] # discard the last empty line
228
if not lines or lines[0] != 'format: %s' % InventoryDeltaSerializer.FORMAT_1:
229
raise errors.BzrError('unknown format %r' % lines[0:1])
230
if len(lines) < 2 or not lines[1].startswith('parent: '):
231
raise errors.BzrError('missing parent: marker')
232
journal_parent_id = lines[1][8:]
233
if len(lines) < 3 or not lines[2].startswith('version: '):
234
raise errors.BzrError('missing version: marker')
235
journal_version_id = lines[2][9:]
236
if len(lines) < 4 or not lines[3].startswith('versioned_root: '):
237
raise errors.BzrError('missing versioned_root: marker')
238
journal_versioned_root = self._deserialize_bool(lines[3][16:])
239
if len(lines) < 5 or not lines[4].startswith('tree_references: '):
240
raise errors.BzrError('missing tree_references: marker')
241
journal_tree_references = self._deserialize_bool(lines[4][17:])
242
if journal_versioned_root != self._versioned_root:
243
raise errors.BzrError(
244
"serialized versioned_root flag is wrong: %s" %
245
(journal_versioned_root,))
246
if journal_tree_references != self._tree_references:
247
raise errors.BzrError(
248
"serialized tree_references flag is wrong: %s" %
249
(journal_tree_references,))
252
line_iter = iter(lines)
255
for line in line_iter:
256
newpath_utf8, file_id, parent_id, last_modified, content \
257
= line.split('\x00', 4)
258
parent_id = parent_id or None
259
if file_id in seen_ids:
260
raise errors.BzrError(
261
"duplicate file id in journal entry %r" % lines)
262
seen_ids.add(file_id)
263
if newpath_utf8 == '/' and not journal_versioned_root and (
264
last_modified != 'null:' or file_id != 'TREE_ROOT'):
265
raise errors.BzrError("Versioned root found: %r" % line)
266
elif last_modified[-1] == ':':
267
raise errors.BzrError('special revisionid found: %r' % line)
268
if not journal_tree_references and content.startswith('tree\x00'):
269
raise errors.BzrError("Tree reference found: %r" % line)
270
content_tuple = tuple(content.split('\x00'))
271
entry = _parse_entry(
272
newpath_utf8, file_id, parent_id, last_modified, content_tuple)
273
oldpath = None # XXX: apply_delta ignores this value.
274
delta_item = (oldpath, newpath_utf8, file_id, entry)
275
result.append(delta_item)
276
return journal_parent_id, journal_version_id, result
279
def _parse_entry(utf8_path, file_id, parent_id, last_modified, content):
281
'dir': _dir_to_entry,
282
'file': _file_to_entry,
283
'link': _link_to_entry,
284
'tree': _tree_to_entry,
287
path = utf8_path[1:].decode('utf8')
288
name = basename(path)
289
return entry_factory[content[0]](
290
content, name, parent_id, file_id, last_modified)