1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Serializer object for CHK based inventory storage."""
19
from __future__ import absolute_import
21
from .. import lazy_import
22
lazy_import.lazy_import(globals(),
24
from breezy.bzr import (
32
revision as _mod_revision,
37
from ..sixish import (
42
def _validate_properties(props, _decode=cache_utf8._utf8_decode):
43
# TODO: we really want an 'isascii' check for key
44
# Cast the utf8 properties into Unicode 'in place'
45
return {_decode(key)[0]: _decode(value)[0] for key, value in props.items()}
48
def _is_format_10(value):
50
raise ValueError('Format number was not recognized, expected 10 got %d'
55
class BEncodeRevisionSerializer1(object):
56
"""Simple revision serializer based around bencode.
59
squashes_xml_invalid_characters = False
61
# Maps {key:(Revision attribute, bencode_type, validator)}
62
# This tells us what kind we expect bdecode to create, what variable on
63
# Revision we should be using, and a function to call to validate/transform
65
# TODO: add a 'validate_utf8' for things like revision_id and file_id
66
# and a validator for parent-ids
67
_schema = {b'format': (None, int, _is_format_10),
68
b'committer': ('committer', bytes, cache_utf8.decode),
69
b'timezone': ('timezone', int, None),
70
b'timestamp': ('timestamp', bytes, float),
71
b'revision-id': ('revision_id', bytes, None),
72
b'parent-ids': ('parent_ids', list, None),
73
b'inventory-sha1': ('inventory_sha1', bytes, None),
74
b'message': ('message', bytes, cache_utf8.decode),
75
b'properties': ('properties', dict, _validate_properties),
78
def write_revision_to_string(self, rev):
79
encode_utf8 = cache_utf8._utf8_encode
80
# Use a list of tuples rather than a dict
81
# This lets us control the ordering, so that we are able to create
85
(b"committer", encode_utf8(rev.committer)[0]),
87
if rev.timezone is not None:
88
ret.append((b"timezone", rev.timezone))
89
# For bzr revisions, the most common property is just 'branch-nick'
90
# which changes infrequently.
92
for key, value in rev.properties.items():
93
revprops[encode_utf8(key)[0]] = encode_utf8(value)[0]
94
ret.append((b'properties', revprops))
96
(b"timestamp", b"%.3f" % rev.timestamp),
97
(b"revision-id", rev.revision_id),
98
(b"parent-ids", rev.parent_ids),
99
(b"inventory-sha1", rev.inventory_sha1),
100
(b"message", encode_utf8(rev.message)[0]),
102
return bencode.bencode(ret)
104
def write_revision(self, rev, f):
105
f.write(self.write_revision_to_string(rev))
107
def read_revision_from_string(self, text):
108
# TODO: consider writing a Revision decoder, rather than using the
109
# generic bencode decoder
110
# However, to decode all 25k revisions of bzr takes approx 1.3s
111
# If we remove all extra validation that goes down to about 1.2s.
112
# Of that time, probably 0.6s is spend in bencode.bdecode().
113
# Regardless 'time brz log' of everything is 7+s, so 1.3s to
114
# extract revision texts isn't a majority of time.
115
ret = bencode.bdecode(text)
116
if not isinstance(ret, list):
117
raise ValueError("invalid revision text")
118
schema = self._schema
119
# timezone is allowed to be missing, but should be set
120
bits = {'timezone': None}
121
for key, value in ret:
122
# Will raise KeyError if not a valid part of the schema, or an
123
# entry is given 2 times.
124
var_name, expected_type, validator = schema[key]
125
if value.__class__ is not expected_type:
126
raise ValueError('key %s did not conform to the expected type'
128
% (key, expected_type, type(value)))
129
if validator is not None:
130
value = validator(value)
131
bits[var_name] = value
132
if len(bits) != len(schema):
133
missing = [key for key, (var_name, _, _) in schema.items()
134
if var_name not in bits]
135
raise ValueError('Revision text was missing expected keys %s.'
136
' text %r' % (missing, text))
137
del bits[None] # Get rid of 'format' since it doesn't get mapped
138
rev = _mod_revision.Revision(**bits)
141
def read_revision(self, f):
142
return self.read_revision_from_string(f.read())
145
class CHKSerializer(serializer.Serializer):
146
"""A CHKInventory based serializer with 'plain' behaviour."""
149
revision_format_num = None
150
support_altered_by_hack = False
151
supported_kinds = {'file', 'directory', 'symlink', 'tree-reference'}
153
def __init__(self, node_size, search_key_name):
154
self.maximum_size = node_size
155
self.search_key_name = search_key_name
157
def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
158
return_from_cache=False):
159
"""Construct from XML Element"""
160
inv = xml_serializer.unpack_inventory_flat(elt, self.format_num,
161
xml_serializer.unpack_inventory_entry, entry_cache,
165
def read_inventory_from_string(self, xml_string, revision_id=None,
166
entry_cache=None, return_from_cache=False):
167
"""Read xml_string into an inventory object.
169
:param xml_string: The xml to read.
170
:param revision_id: If not-None, the expected revision id of the
172
:param entry_cache: An optional cache of InventoryEntry objects. If
173
supplied we will look up entries via (file_id, revision_id) which
174
should map to a valid InventoryEntry (File/Directory/etc) object.
175
:param return_from_cache: Return entries directly from the cache,
176
rather than copying them first. This is only safe if the caller
177
promises not to mutate the returned inventory entries, but it can
178
make some operations significantly faster.
181
return self._unpack_inventory(
182
xml_serializer.fromstring(xml_string), revision_id,
183
entry_cache=entry_cache,
184
return_from_cache=return_from_cache)
185
except xml_serializer.ParseError as e:
186
raise errors.UnexpectedInventoryFormat(e)
188
def read_inventory(self, f, revision_id=None):
189
"""Read an inventory from a file-like object."""
192
return self._unpack_inventory(self._read_element(f),
196
except xml_serializer.ParseError as e:
197
raise errors.UnexpectedInventoryFormat(e)
199
def write_inventory_to_lines(self, inv):
200
"""Return a list of lines with the encoded inventory."""
201
return self.write_inventory(inv, None)
203
def write_inventory_to_string(self, inv, working=False):
204
"""Just call write_inventory with a BytesIO and return the value.
206
:param working: If True skip history data - text_sha1, text_size,
207
reference_revision, symlink_target.
210
self.write_inventory(inv, sio, working)
211
return sio.getvalue()
213
def write_inventory(self, inv, f, working=False):
214
"""Write inventory to a file.
216
:param inv: the inventory to write.
217
:param f: the file to write. (May be None if the lines are the desired
219
:param working: If True skip history data - text_sha1, text_size,
220
reference_revision, symlink_target.
221
:return: The inventory as a list of lines.
224
append = output.append
225
if inv.revision_id is not None:
226
revid1 = b' revision_id="'
227
revid2 = xml_serializer.encode_and_escape(inv.revision_id)
231
append(b'<inventory format="%s"%s%s>\n' % (
232
self.format_num, revid1, revid2))
233
append(b'<directory file_id="%s name="%s revision="%s />\n' % (
234
xml_serializer.encode_and_escape(inv.root.file_id),
235
xml_serializer.encode_and_escape(inv.root.name),
236
xml_serializer.encode_and_escape(inv.root.revision)))
237
xml_serializer.serialize_inventory_flat(inv,
239
root_id=None, supported_kinds=self.supported_kinds,
246
chk_serializer_255_bigpage = CHKSerializer(65536, b'hash-255-way')
249
class CHKBEncodeSerializer(BEncodeRevisionSerializer1, CHKSerializer):
250
"""A CHKInventory and BEncode based serializer with 'plain' behaviour."""
255
chk_bencode_serializer = CHKBEncodeSerializer(65536, b'hash-255-way')