64
68
return "&#%d;" % ord(match.group())
67
_unicode_to_escaped_map = {}
69
def _encode_and_escape(unicode_str, _map=_unicode_to_escaped_map):
71
def _utf8_escape_replace(match, _map=_xml_escape_map):
72
"""Escape utf8 characters into XML safe ones.
74
This uses 2 tricks. It is either escaping "standard" characters, like "&<>,
75
or it is handling characters with the high-bit set. For ascii characters,
76
we just lookup the replacement in the dictionary. For everything else, we
77
decode back into Unicode, and then use the XML escape code.
80
return _map[match.group()]
82
return ''.join('&#%d;' % ord(uni_chr)
83
for uni_chr in match.group().decode('utf8'))
88
def _encode_and_escape(unicode_or_utf8_str, _map=_to_escaped_map):
70
89
"""Encode the string into utf8, and escape invalid XML characters"""
71
90
# We frequently get entities we have not seen before, so it is better
72
91
# to check if None, rather than try/KeyError
73
text = _map.get(unicode_str)
92
text = _map.get(unicode_or_utf8_str)
75
# The alternative policy is to do a regular UTF8 encoding
76
# and then escape only XML meta characters.
77
# Performance is equivalent once you use cache_utf8. *However*
78
# this makes the serialized texts incompatible with old versions
79
# of bzr. So no net gain. (Perhaps the read code would handle utf8
80
# better than entity escapes, but cElementTree seems to do just fine
82
text = str(_utf8_re.sub(_utf8_escape_replace, unicode_str)) + '"'
83
_map[unicode_str] = text
94
if unicode_or_utf8_str.__class__ == unicode:
95
# The alternative policy is to do a regular UTF8 encoding
96
# and then escape only XML meta characters.
97
# Performance is equivalent once you use cache_utf8. *However*
98
# this makes the serialized texts incompatible with old versions
99
# of bzr. So no net gain. (Perhaps the read code would handle utf8
100
# better than entity escapes, but cElementTree seems to do just fine
102
text = str(_unicode_re.sub(_unicode_escape_replace,
103
unicode_or_utf8_str)) + '"'
105
# Plain strings are considered to already be in utf-8 so we do a
106
# slightly different method for escaping.
107
text = _utf8_re.sub(_utf8_escape_replace,
108
unicode_or_utf8_str) + '"'
109
_map[unicode_or_utf8_str] = text
113
def _get_utf8_or_ascii(a_str,
114
_encode_utf8=cache_utf8.encode,
115
_get_cached_ascii=cache_utf8.get_cached_ascii):
116
"""Return a cached version of the string.
118
cElementTree will return a plain string if the XML is plain ascii. It only
119
returns Unicode when it needs to. We want to work in utf-8 strings. So if
120
cElementTree returns a plain string, we can just return the cached version.
121
If it is Unicode, then we need to encode it.
123
:param a_str: An 8-bit string or Unicode as returned by
124
cElementTree.Element.get()
125
:return: A utf-8 encoded 8-bit string.
127
# This is fairly optimized because we know what cElementTree does, this is
128
# not meant as a generic function for all cases. Because it is possible for
129
# an 8-bit string to not be ascii or valid utf8.
130
if a_str.__class__ == unicode:
131
return _encode_utf8(a_str)
133
return _get_cached_ascii(a_str)
87
136
def _clear_cache():
88
137
"""Clean out the unicode => escaped map"""
89
_unicode_to_escaped_map.clear()
92
class Serializer_v5(Serializer):
93
"""Version 5 serializer
95
Packs objects into XML and vice versa.
138
_to_escaped_map.clear()
141
class Serializer_v8(Serializer):
142
"""This serialiser adds rich roots.
144
Its revision format number matches its inventory number.
100
150
support_altered_by_hack = True
101
151
# This format supports the altered-by hack that reads file ids directly out
102
152
# of the versionedfile, without doing XML parsing.
104
def write_inventory_to_string(self, inv):
105
"""Just call write_inventory with a StringIO and return the value"""
154
supported_kinds = set(['file', 'directory', 'symlink'])
156
revision_format_num = None
158
def _check_revisions(self, inv):
159
"""Extension point for subclasses to check during serialisation.
161
:param inv: An inventory about to be serialised, to be checked.
162
:raises: AssertionError if an error has occured.
164
if inv.revision_id is None:
165
raise AssertionError()
166
if inv.root.revision is None:
167
raise AssertionError()
169
def write_inventory_to_lines(self, inv):
170
"""Return a list of lines with the encoded inventory."""
171
return self.write_inventory(inv, None)
173
def write_inventory_to_string(self, inv, working=False):
174
"""Just call write_inventory with a StringIO and return the value.
176
:param working: If True skip history data - text_sha1, text_size,
177
reference_revision, symlink_target.
106
179
sio = cStringIO.StringIO()
107
self.write_inventory(inv, sio)
180
self.write_inventory(inv, sio, working)
108
181
return sio.getvalue()
110
def write_inventory(self, inv, f):
183
def write_inventory(self, inv, f, working=False):
111
184
"""Write inventory to a file.
113
186
:param inv: the inventory to write.
114
:param f: the file to write.
187
:param f: the file to write. (May be None if the lines are the desired
189
:param working: If True skip history data - text_sha1, text_size,
190
reference_revision, symlink_target.
191
:return: The inventory as a list of lines.
116
193
_ensure_utf8_re()
194
self._check_revisions(inv)
118
196
append = output.append
119
197
self._append_inventory_root(append, inv)
122
200
root_path, root_ie = entries.next()
123
201
for path, ie in entries:
124
self._append_entry(append, ie)
202
if ie.parent_id != self.root_id:
203
parent_str = ' parent_id="'
204
parent_id = _encode_and_escape(ie.parent_id)
208
if ie.kind == 'file':
210
executable = ' executable="yes"'
214
append('<file%s file_id="%s name="%s%s%s revision="%s '
215
'text_sha1="%s" text_size="%d" />\n' % (
216
executable, _encode_and_escape(ie.file_id),
217
_encode_and_escape(ie.name), parent_str, parent_id,
218
_encode_and_escape(ie.revision), ie.text_sha1,
221
append('<file%s file_id="%s name="%s%s%s />\n' % (
222
executable, _encode_and_escape(ie.file_id),
223
_encode_and_escape(ie.name), parent_str, parent_id))
224
elif ie.kind == 'directory':
226
append('<directory file_id="%s name="%s%s%s revision="%s '
228
_encode_and_escape(ie.file_id),
229
_encode_and_escape(ie.name),
230
parent_str, parent_id,
231
_encode_and_escape(ie.revision)))
233
append('<directory file_id="%s name="%s%s%s />\n' % (
234
_encode_and_escape(ie.file_id),
235
_encode_and_escape(ie.name),
236
parent_str, parent_id))
237
elif ie.kind == 'symlink':
239
append('<symlink file_id="%s name="%s%s%s revision="%s '
240
'symlink_target="%s />\n' % (
241
_encode_and_escape(ie.file_id),
242
_encode_and_escape(ie.name),
243
parent_str, parent_id,
244
_encode_and_escape(ie.revision),
245
_encode_and_escape(ie.symlink_target)))
247
append('<symlink file_id="%s name="%s%s%s />\n' % (
248
_encode_and_escape(ie.file_id),
249
_encode_and_escape(ie.name),
250
parent_str, parent_id))
251
elif ie.kind == 'tree-reference':
252
if ie.kind not in self.supported_kinds:
253
raise errors.UnsupportedInventoryKind(ie.kind)
255
append('<tree-reference file_id="%s name="%s%s%s '
256
'revision="%s reference_revision="%s />\n' % (
257
_encode_and_escape(ie.file_id),
258
_encode_and_escape(ie.name),
259
parent_str, parent_id,
260
_encode_and_escape(ie.revision),
261
_encode_and_escape(ie.reference_revision)))
263
append('<tree-reference file_id="%s name="%s%s%s />\n' % (
264
_encode_and_escape(ie.file_id),
265
_encode_and_escape(ie.name),
266
parent_str, parent_id))
268
raise errors.UnsupportedInventoryKind(ie.kind)
125
269
append('</inventory>\n')
127
272
# Just to keep the cache from growing without bounds
128
273
# but we may actually not want to do clear the cache
131
277
def _append_inventory_root(self, append, inv):
132
278
"""Append the inventory root to output."""
134
if inv.root.file_id not in (None, ROOT_ID):
136
append(_encode_and_escape(inv.root.file_id))
137
append(' format="5"')
138
279
if inv.revision_id is not None:
139
append(' revision_id="')
140
append(_encode_and_escape(inv.revision_id))
143
def _append_entry(self, append, ie):
144
"""Convert InventoryEntry to XML element and append to output."""
145
# TODO: should just be a plain assertion
146
assert InventoryEntry.versionable_kind(ie.kind), \
147
'unsupported entry kind %s' % ie.kind
152
append(' executable="yes"')
154
append(_encode_and_escape(ie.file_id))
156
append(_encode_and_escape(ie.name))
157
if self._parent_condition(ie):
158
assert isinstance(ie.parent_id, basestring)
159
append(' parent_id="')
160
append(_encode_and_escape(ie.parent_id))
161
if ie.revision is not None:
162
append(' revision="')
163
append(_encode_and_escape(ie.revision))
164
if ie.symlink_target is not None:
165
append(' symlink_target="')
166
append(_encode_and_escape(ie.symlink_target))
167
if ie.text_sha1 is not None:
168
append(' text_sha1="')
171
if ie.text_size is not None:
172
append(' text_size="%d"' % ie.text_size)
176
def _parent_condition(self, ie):
177
return ie.parent_id != ROOT_ID
280
revid1 = ' revision_id="'
281
revid2 = _encode_and_escape(inv.revision_id)
285
append('<inventory format="%s"%s%s>\n' % (
286
self.format_num, revid1, revid2))
287
append('<directory file_id="%s name="%s revision="%s />\n' % (
288
_encode_and_escape(inv.root.file_id),
289
_encode_and_escape(inv.root.name),
290
_encode_and_escape(inv.root.revision)))
179
292
def _pack_revision(self, rev):
180
293
"""Revision object -> xml tree"""
294
# For the XML format, we need to write them as Unicode rather than as
295
# utf-8 strings. So that cElementTree can handle properly escaping
297
decode_utf8 = cache_utf8.decode
298
revision_id = rev.revision_id
299
if isinstance(revision_id, str):
300
revision_id = decode_utf8(revision_id)
301
format_num = self.format_num
302
if self.revision_format_num is not None:
303
format_num = self.revision_format_num
181
304
root = Element('revision',
182
305
committer = rev.committer,
183
timestamp = '%.9f' % rev.timestamp,
184
revision_id = rev.revision_id,
306
timestamp = '%.3f' % rev.timestamp,
307
revision_id = revision_id,
185
308
inventory_sha1 = rev.inventory_sha1,
188
311
if rev.timezone is not None:
189
312
root.set('timezone', str(rev.timezone))
206
331
def _pack_revision_properties(self, rev, under_element):
207
332
top_elt = SubElement(under_element, 'properties')
208
333
for prop_name, prop_value in sorted(rev.properties.items()):
209
assert isinstance(prop_name, basestring)
210
assert isinstance(prop_value, basestring)
211
334
prop_elt = SubElement(top_elt, 'property')
212
335
prop_elt.set('name', prop_name)
213
336
prop_elt.text = prop_value
214
337
prop_elt.tail = '\n'
215
338
top_elt.tail = '\n'
217
def _unpack_inventory(self, elt):
218
"""Construct from XML Element
220
assert elt.tag == 'inventory'
221
root_id = elt.get('file_id') or ROOT_ID
340
def _unpack_inventory(self, elt, revision_id=None):
341
"""Construct from XML Element"""
342
if elt.tag != 'inventory':
343
raise errors.UnexpectedInventoryFormat('Root tag is %r' % elt.tag)
222
344
format = elt.get('format')
223
if format is not None:
225
raise BzrError("invalid format version %r on inventory"
345
if format != self.format_num:
346
raise errors.UnexpectedInventoryFormat('Invalid format version %r'
227
348
revision_id = elt.get('revision_id')
228
349
if revision_id is not None:
229
revision_id = cache_utf8.get_cached_unicode(revision_id)
230
inv = Inventory(root_id, revision_id=revision_id)
350
revision_id = cache_utf8.encode(revision_id)
351
inv = inventory.Inventory(root_id=None, revision_id=revision_id)
232
353
ie = self._unpack_entry(e)
233
if ie.parent_id == ROOT_ID:
234
ie.parent_id = root_id
238
def _unpack_entry(self, elt, none_parents=False):
357
def _unpack_entry(self, elt):
240
359
if not InventoryEntry.versionable_kind(kind):
241
360
raise AssertionError('unsupported entry kind %s' % kind)
243
get_cached = cache_utf8.get_cached_unicode
362
get_cached = _get_utf8_or_ascii
245
364
parent_id = elt.get('parent_id')
246
if parent_id is None and not none_parents:
248
# TODO: jam 20060817 At present, caching file ids costs us too
249
# much time. It slows down overall read performances from
250
# approx 500ms to 700ms. And doesn't improve future reads.
251
# it might be because revision ids and file ids are mixing.
252
# Consider caching *just* the file ids, for a limited period
254
#parent_id = get_cached(parent_id)
255
#file_id = get_cached(elt.get('file_id'))
256
file_id = elt.get('file_id')
365
if parent_id is not None:
366
parent_id = get_cached(parent_id)
367
file_id = get_cached(elt.get('file_id'))
258
369
if kind == 'directory':
259
370
ie = inventory.InventoryDirectory(file_id,