17
17
"""Serializer object for CHK based inventory storage."""
19
from cStringIO import (
19
23
from bzrlib import (
23
28
revision as _mod_revision,
29
def _validate_properties(props, _decode=cache_utf8._utf8_decode):
30
# TODO: we really want an 'isascii' check for key
31
# Cast the utf8 properties into Unicode 'in place'
32
for key, value in props.iteritems():
33
props[key] = _decode(value)[0]
37
def _is_format_10(value):
39
raise ValueError('Format number was not recognized, expected 10 got %d'
44
33
class BEncodeRevisionSerializer1(object):
45
"""Simple revision serializer based around bencode.
34
"""Simple revision serializer based around bencode.
48
squashes_xml_invalid_characters = False
50
# Maps {key:(Revision attribute, bencode_type, validator)}
51
# This tells us what kind we expect bdecode to create, what variable on
52
# Revision we should be using, and a function to call to validate/transform
54
# TODO: add a 'validate_utf8' for things like revision_id and file_id
55
# and a validator for parent-ids
56
_schema = {'format': (None, int, _is_format_10),
57
'committer': ('committer', str, cache_utf8.decode),
58
'timezone': ('timezone', int, None),
59
'timestamp': ('timestamp', str, float),
60
'revision-id': ('revision_id', str, None),
61
'parent-ids': ('parent_ids', list, None),
62
'inventory-sha1': ('inventory_sha1', str, None),
63
'message': ('message', str, cache_utf8.decode),
64
'properties': ('properties', dict, _validate_properties),
67
38
def write_revision_to_string(self, rev):
68
encode_utf8 = cache_utf8._utf8_encode
69
# Use a list of tuples rather than a dict
70
# This lets us control the ordering, so that we are able to create
74
("committer", encode_utf8(rev.committer)[0]),
76
if rev.timezone is not None:
77
ret.append(("timezone", rev.timezone))
78
# For bzr revisions, the most common property is just 'branch-nick'
79
# which changes infrequently.
39
encode_utf8 = cache_utf8.encode
41
"revision-id": rev.revision_id,
42
"timestamp": "%.3f" % rev.timestamp,
43
"parent-ids": rev.parent_ids,
44
"inventory-sha1": rev.inventory_sha1,
45
"committer": encode_utf8(rev.committer),
46
"message": encode_utf8(rev.message),
81
49
for key, value in rev.properties.iteritems():
82
revprops[key] = encode_utf8(value)[0]
83
ret.append(('properties', revprops))
85
("timestamp", "%.3f" % rev.timestamp),
86
("revision-id", rev.revision_id),
87
("parent-ids", rev.parent_ids),
88
("inventory-sha1", rev.inventory_sha1),
89
("message", encode_utf8(rev.message)[0]),
50
revprops[key] = encode_utf8(value)
51
ret["properties"] = revprops
52
if rev.timezone is not None:
53
ret["timezone"] = rev.timezone
91
54
return bencode.bencode(ret)
93
56
def write_revision(self, rev, f):
94
57
f.write(self.write_revision_to_string(rev))
96
59
def read_revision_from_string(self, text):
97
# TODO: consider writing a Revision decoder, rather than using the
98
# generic bencode decoder
99
# However, to decode all 25k revisions of bzr takes approx 1.3s
100
# If we remove all extra validation that goes down to about 1.2s.
101
# Of that time, probably 0.6s is spend in bencode.bdecode().
102
# Regardless 'time bzr log' of everything is 7+s, so 1.3s to
103
# extract revision texts isn't a majority of time.
60
decode_utf8 = cache_utf8.decode
104
61
ret = bencode.bdecode(text)
105
if not isinstance(ret, list):
106
raise ValueError("invalid revision text")
107
schema = self._schema
108
# timezone is allowed to be missing, but should be set
109
bits = {'timezone': None}
110
for key, value in ret:
111
# Will raise KeyError if not a valid part of the schema, or an
112
# entry is given 2 times.
113
var_name, expected_type, validator = schema[key]
114
if value.__class__ is not expected_type:
115
raise ValueError('key %s did not conform to the expected type'
117
% (key, expected_type, type(value)))
118
if validator is not None:
119
value = validator(value)
120
bits[var_name] = value
121
if len(bits) != len(schema):
122
missing = [key for key, (var_name, _, _) in schema.iteritems()
123
if var_name not in bits]
124
raise ValueError('Revision text was missing expected keys %s.'
125
' text %r' % (missing, text))
126
del bits[None] # Get rid of 'format' since it doesn't get mapped
127
rev = _mod_revision.Revision(**bits)
62
rev = _mod_revision.Revision(
63
committer=decode_utf8(ret["committer"]),
64
revision_id=ret["revision-id"],
65
parent_ids=ret["parent-ids"],
66
inventory_sha1=ret["inventory-sha1"],
67
timestamp=float(ret["timestamp"]),
68
message=decode_utf8(ret["message"]),
71
rev.timezone = ret["timezone"]
74
for key, value in ret["properties"].iteritems():
75
rev.properties[key] = decode_utf8(value)
130
78
def read_revision(self, f):
131
79
return self.read_revision_from_string(f.read())
134
class CHKSerializerSubtree(BEncodeRevisionSerializer1, xml7.Serializer_v7):
82
class CHKSerializerSubtree(BEncodeRevisionSerializer1, xml6.Serializer_v6):
135
83
"""A CHKInventory based serializer that supports tree references"""
137
85
supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference'])