17
17
"""Serializer object for CHK based inventory storage."""
23
from .. import lazy_import
19
from __future__ import absolute_import
21
from cStringIO import StringIO
23
from bzrlib import lazy_import
24
24
lazy_import.lazy_import(globals(),
26
from breezy.bzr import (
35
34
revision as _mod_revision,
42
39
def _validate_properties(props, _decode=cache_utf8._utf8_decode):
43
40
# TODO: we really want an 'isascii' check for key
44
41
# Cast the utf8 properties into Unicode 'in place'
45
return {_decode(key)[0]: _decode(value, 'surrogateescape')[0] for key, value in props.items()}
42
for key, value in props.iteritems():
43
props[key] = _decode(value)[0]
48
47
def _is_format_10(value):
65
64
# TODO: add a 'validate_utf8' for things like revision_id and file_id
66
65
# and a validator for parent-ids
67
_schema = {b'format': (None, int, _is_format_10),
68
b'committer': ('committer', bytes, cache_utf8.decode),
69
b'timezone': ('timezone', int, None),
70
b'timestamp': ('timestamp', bytes, float),
71
b'revision-id': ('revision_id', bytes, None),
72
b'parent-ids': ('parent_ids', list, None),
73
b'inventory-sha1': ('inventory_sha1', bytes, None),
74
b'message': ('message', bytes, cache_utf8.decode),
75
b'properties': ('properties', dict, _validate_properties),
66
_schema = {'format': (None, int, _is_format_10),
67
'committer': ('committer', str, cache_utf8.decode),
68
'timezone': ('timezone', int, None),
69
'timestamp': ('timestamp', str, float),
70
'revision-id': ('revision_id', str, None),
71
'parent-ids': ('parent_ids', list, None),
72
'inventory-sha1': ('inventory_sha1', str, None),
73
'message': ('message', str, cache_utf8.decode),
74
'properties': ('properties', dict, _validate_properties),
78
77
def write_revision_to_string(self, rev):
79
78
encode_utf8 = cache_utf8._utf8_encode
81
80
# This lets us control the ordering, so that we are able to create
85
(b"committer", encode_utf8(rev.committer)[0]),
84
("committer", encode_utf8(rev.committer)[0]),
87
86
if rev.timezone is not None:
88
ret.append((b"timezone", rev.timezone))
87
ret.append(("timezone", rev.timezone))
89
88
# For bzr revisions, the most common property is just 'branch-nick'
90
89
# which changes infrequently.
92
for key, value in rev.properties.items():
93
revprops[encode_utf8(key)[0]] = encode_utf8(value, 'surrogateescape')[0]
94
ret.append((b'properties', revprops))
91
for key, value in rev.properties.iteritems():
92
revprops[key] = encode_utf8(value)[0]
93
ret.append(('properties', revprops))
96
(b"timestamp", b"%.3f" % rev.timestamp),
97
(b"revision-id", rev.revision_id),
98
(b"parent-ids", rev.parent_ids),
99
(b"inventory-sha1", rev.inventory_sha1),
100
(b"message", encode_utf8(rev.message)[0]),
95
("timestamp", "%.3f" % rev.timestamp),
96
("revision-id", rev.revision_id),
97
("parent-ids", rev.parent_ids),
98
("inventory-sha1", rev.inventory_sha1),
99
("message", encode_utf8(rev.message)[0]),
102
101
return bencode.bencode(ret)
104
def write_revision_to_lines(self, rev):
105
return self.write_revision_to_string(rev).splitlines(True)
103
def write_revision(self, rev, f):
104
f.write(self.write_revision_to_string(rev))
107
106
def read_revision_from_string(self, text):
108
107
# TODO: consider writing a Revision decoder, rather than using the
110
109
# However, to decode all 25k revisions of bzr takes approx 1.3s
111
110
# If we remove all extra validation that goes down to about 1.2s.
112
111
# Of that time, probably 0.6s is spend in bencode.bdecode().
113
# Regardless 'time brz log' of everything is 7+s, so 1.3s to
112
# Regardless 'time bzr log' of everything is 7+s, so 1.3s to
114
113
# extract revision texts isn't a majority of time.
115
114
ret = bencode.bdecode(text)
116
115
if not isinstance(ret, list):
130
129
value = validator(value)
131
130
bits[var_name] = value
132
131
if len(bits) != len(schema):
133
missing = [key for key, (var_name, _, _) in schema.items()
132
missing = [key for key, (var_name, _, _) in schema.iteritems()
134
133
if var_name not in bits]
135
134
raise ValueError('Revision text was missing expected keys %s.'
136
135
' text %r' % (missing, text))
145
144
class CHKSerializer(serializer.Serializer):
146
145
"""A CHKInventory based serializer with 'plain' behaviour."""
149
148
revision_format_num = None
150
149
support_altered_by_hack = False
151
supported_kinds = {'file', 'directory', 'symlink', 'tree-reference'}
150
supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference'])
153
152
def __init__(self, node_size, search_key_name):
154
153
self.maximum_size = node_size
158
157
return_from_cache=False):
159
158
"""Construct from XML Element"""
160
159
inv = xml_serializer.unpack_inventory_flat(elt, self.format_num,
161
xml_serializer.unpack_inventory_entry, entry_cache,
160
xml_serializer.unpack_inventory_entry, entry_cache,
165
def read_inventory_from_lines(self, xml_lines, revision_id=None,
166
entry_cache=None, return_from_cache=False):
164
def read_inventory_from_string(self, xml_string, revision_id=None,
165
entry_cache=None, return_from_cache=False):
167
166
"""Read xml_string into an inventory object.
169
168
:param xml_string: The xml to read.
181
180
return self._unpack_inventory(
182
xml_serializer.fromstringlist(xml_lines), revision_id,
181
xml_serializer.fromstring(xml_string), revision_id,
183
182
entry_cache=entry_cache,
184
183
return_from_cache=return_from_cache)
185
except xml_serializer.ParseError as e:
186
raise serializer.UnexpectedInventoryFormat(e)
184
except xml_serializer.ParseError, e:
185
raise errors.UnexpectedInventoryFormat(e)
188
187
def read_inventory(self, f, revision_id=None):
189
188
"""Read an inventory from a file-like object."""
192
191
return self._unpack_inventory(self._read_element(f),
196
except xml_serializer.ParseError as e:
197
raise serializer.UnexpectedInventoryFormat(e)
195
except xml_serializer.ParseError, e:
196
raise errors.UnexpectedInventoryFormat(e)
199
198
def write_inventory_to_lines(self, inv):
200
199
"""Return a list of lines with the encoded inventory."""
201
200
return self.write_inventory(inv, None)
203
def write_inventory_to_chunks(self, inv):
204
"""Return a list of lines with the encoded inventory."""
205
return self.write_inventory(inv, None)
202
def write_inventory_to_string(self, inv, working=False):
203
"""Just call write_inventory with a StringIO and return the value.
205
:param working: If True skip history data - text_sha1, text_size,
206
reference_revision, symlink_target.
209
self.write_inventory(inv, sio, working)
210
return sio.getvalue()
207
212
def write_inventory(self, inv, f, working=False):
208
213
"""Write inventory to a file.
218
223
append = output.append
219
224
if inv.revision_id is not None:
222
xml_serializer.encode_and_escape(inv.revision_id), b'"'])
225
revid1 = ' revision_id="'
226
revid2 = xml_serializer.encode_and_escape(inv.revision_id)
225
append(b'<inventory format="%s"%s>\n' % (
226
self.format_num, revid))
227
append(b'<directory file_id="%s" name="%s" revision="%s" />\n' % (
230
append('<inventory format="%s"%s%s>\n' % (
231
self.format_num, revid1, revid2))
232
append('<directory file_id="%s name="%s revision="%s />\n' % (
228
233
xml_serializer.encode_and_escape(inv.root.file_id),
229
234
xml_serializer.encode_and_escape(inv.root.name),
230
235
xml_serializer.encode_and_escape(inv.root.revision)))
231
236
xml_serializer.serialize_inventory_flat(inv,
233
root_id=None, supported_kinds=self.supported_kinds,
238
root_id=None, supported_kinds=self.supported_kinds,
235
240
if f is not None:
236
241
f.writelines(output)
240
chk_serializer_255_bigpage = CHKSerializer(65536, b'hash-255-way')
245
chk_serializer_255_bigpage = CHKSerializer(65536, 'hash-255-way')
243
248
class CHKBEncodeSerializer(BEncodeRevisionSerializer1, CHKSerializer):
244
249
"""A CHKInventory and BEncode based serializer with 'plain' behaviour."""
249
chk_bencode_serializer = CHKBEncodeSerializer(65536, b'hash-255-way')
254
chk_bencode_serializer = CHKBEncodeSerializer(65536, 'hash-255-way')