1
# Copyright (C) 2005, 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
from bzrlib.xml_serializer import SubElement, Element, Serializer
25
from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry
26
from bzrlib.revision import Revision
27
from bzrlib.errors import BzrError
33
"'":"'", # FIXME: overkill
40
def _ensure_utf8_re():
41
"""Make sure the _utf8_re regex has been compiled"""
43
if _utf8_re is not None:
45
_utf8_re = re.compile('[&<>\'\"]')
48
def _utf8_escape_replace(match, _map=_utf8_escape_map):
49
"""Replace a string of non-ascii, non XML safe characters with their escape
51
This will escape both Standard XML escapes, like <>"', etc.
52
As well as escaping non ascii characters, because ElementTree did.
53
This helps us remain compatible to older versions of bzr. We may change
54
our policy in the future, though.
56
# jam 20060816 Benchmarks show that try/KeyError is faster if you
57
# expect the entity to rarely miss. There is about a 10% difference
58
# in overall time. But if you miss frequently, then if None is much
59
# faster. For our use case, we *rarely* have a revision id, file id
60
# or path name that is unicode. So use try/KeyError.
61
return _map[match.group()]
64
_unicode_to_escaped_map = {}
66
def _encode_and_escape(unicode_str, _map=_unicode_to_escaped_map,
67
_encode=cache_utf8.encode):
68
"""Encode the string into utf8, and escape invalid XML characters"""
69
# We frequently get entities we have not seen before, so it is better
70
# to check if None, rather than try/KeyError
71
text = _map.get(unicode_str)
73
# The alternative policy is to do a regular UTF8 encoding
74
# and then escape only XML meta characters. This could take
75
# advantage of cache_utf8 since a lot of the revision ids
76
# and file ids would already be cached.
77
text = _utf8_re.sub(_utf8_escape_replace, _encode(unicode_str)) + '"'
78
_map[unicode_str] = text
83
"""Clean out the unicode => escaped map"""
84
_unicode_to_escaped_map.clear()
87
class Serializer_v5(Serializer):
88
"""Version 5 serializer
90
Packs objects into XML and vice versa.
95
def write_inventory_to_string(self, inv):
96
"""Just call write_inventory with a StringIO and return the value"""
97
sio = cStringIO.StringIO()
98
self.write_inventory(inv, sio)
101
def write_inventory(self, inv, f):
102
"""Write inventory to a file.
104
:param inv: the inventory to write.
105
:param f: the file to write.
109
append = output.append
110
self._append_inventory_root(append, inv)
111
entries = inv.iter_entries()
113
root_path, root_ie = entries.next()
114
for path, ie in entries:
115
self._append_entry(append, ie)
116
append('</inventory>\n')
118
# Just to keep the cache from growing without bounds
119
# but we may actually not want to do clear the cache
122
def _append_inventory_root(self, append, inv):
123
"""Append the inventory root to output."""
125
if inv.root.file_id not in (None, ROOT_ID):
127
append(_encode_and_escape(inv.root.file_id))
128
append(' format="5"')
129
if inv.revision_id is not None:
130
append(' revision_id="')
131
append(_encode_and_escape(inv.revision_id))
134
def _append_entry(self, append, ie):
135
"""Convert InventoryEntry to XML element and append to output."""
136
# TODO: should just be a plain assertion
137
assert InventoryEntry.versionable_kind(ie.kind), \
138
'unsupported entry kind %s' % ie.kind
143
append(' executable="yes"')
145
append(_encode_and_escape(ie.file_id))
147
append(_encode_and_escape(ie.name))
148
if ie.parent_id != ROOT_ID:
149
assert isinstance(ie.parent_id, basestring)
150
append(' parent_id="')
151
append(_encode_and_escape(ie.parent_id))
152
if ie.revision is not None:
153
append(' revision="')
154
append(_encode_and_escape(ie.revision))
155
if ie.symlink_target is not None:
156
append(' symlink_target="')
157
append(_encode_and_escape(ie.symlink_target))
158
if ie.text_sha1 is not None:
159
append(' text_size="')
162
if ie.text_size is not None:
163
append(' text_size="%d"' % ie.text_size)
167
def _pack_inventory(self, inv):
168
"""Convert to XML Element"""
169
entries = inv.iter_entries()
170
e = Element('inventory',
173
path, root = entries.next()
174
if root.file_id not in (None, ROOT_ID):
175
e.set('file_id', root.file_id)
176
if inv.revision_id is not None:
177
e.set('revision_id', inv.revision_id)
178
for path, ie in entries:
179
e.append(self._pack_entry(ie))
182
def _pack_entry(self, ie):
183
"""Convert InventoryEntry to XML element"""
184
# TODO: should just be a plain assertion
185
if not InventoryEntry.versionable_kind(ie.kind):
186
raise AssertionError('unsupported entry kind %s' % ie.kind)
188
e.set('name', ie.name)
189
e.set('file_id', ie.file_id)
191
if ie.text_size != None:
192
e.set('text_size', '%d' % ie.text_size)
194
for f in ['text_sha1', 'revision', 'symlink_target']:
200
e.set('executable', 'yes')
202
# to be conservative, we don't externalize the root pointers
203
# for now, leaving them as null in the xml form. in a future
204
# version it will be implied by nested elements.
205
if ie.parent_id != ROOT_ID:
206
assert isinstance(ie.parent_id, basestring)
207
e.set('parent_id', ie.parent_id)
211
def _pack_revision(self, rev):
212
"""Revision object -> xml tree"""
213
root = Element('revision',
214
committer = rev.committer,
215
timestamp = '%.9f' % rev.timestamp,
216
revision_id = rev.revision_id,
217
inventory_sha1 = rev.inventory_sha1,
220
if rev.timezone is not None:
221
root.set('timezone', str(rev.timezone))
223
msg = SubElement(root, 'message')
224
msg.text = rev.message
227
pelts = SubElement(root, 'parents')
228
pelts.tail = pelts.text = '\n'
229
for parent_id in rev.parent_ids:
230
assert isinstance(parent_id, basestring)
231
p = SubElement(pelts, 'revision_ref')
233
p.set('revision_id', parent_id)
235
self._pack_revision_properties(rev, root)
239
def _pack_revision_properties(self, rev, under_element):
240
top_elt = SubElement(under_element, 'properties')
241
for prop_name, prop_value in sorted(rev.properties.items()):
242
assert isinstance(prop_name, basestring)
243
assert isinstance(prop_value, basestring)
244
prop_elt = SubElement(top_elt, 'property')
245
prop_elt.set('name', prop_name)
246
prop_elt.text = prop_value
251
def _unpack_inventory(self, elt):
252
"""Construct from XML Element
254
assert elt.tag == 'inventory'
255
root_id = elt.get('file_id') or ROOT_ID
256
format = elt.get('format')
257
if format is not None:
259
raise BzrError("invalid format version %r on inventory"
261
revision_id = elt.get('revision_id')
262
if revision_id is not None:
263
revision_id = cache_utf8.get_cached_unicode(revision_id)
264
inv = Inventory(root_id, revision_id=revision_id)
266
ie = self._unpack_entry(e)
267
if ie.parent_id == ROOT_ID:
268
ie.parent_id = root_id
273
def _unpack_entry(self, elt):
275
if not InventoryEntry.versionable_kind(kind):
276
raise AssertionError('unsupported entry kind %s' % kind)
278
get_cached = cache_utf8.get_cached_unicode
280
parent_id = elt.get('parent_id')
281
if parent_id == None:
283
parent_id = get_cached(parent_id)
284
file_id = get_cached(elt.get('file_id'))
286
if kind == 'directory':
287
ie = inventory.InventoryDirectory(file_id,
291
ie = inventory.InventoryFile(file_id,
294
ie.text_sha1 = elt.get('text_sha1')
295
if elt.get('executable') == 'yes':
297
v = elt.get('text_size')
298
ie.text_size = v and int(v)
299
elif kind == 'symlink':
300
ie = inventory.InventoryLink(file_id,
303
ie.symlink_target = elt.get('symlink_target')
305
raise BzrError("unknown kind %r" % kind)
306
revision = elt.get('revision')
307
if revision is not None:
308
revision = get_cached(revision)
309
ie.revision = revision
314
def _unpack_revision(self, elt):
315
"""XML Element -> Revision object"""
316
assert elt.tag == 'revision'
317
format = elt.get('format')
318
if format is not None:
320
raise BzrError("invalid format version %r on inventory"
322
get_cached = cache_utf8.get_cached_unicode
323
rev = Revision(committer = elt.get('committer'),
324
timestamp = float(elt.get('timestamp')),
325
revision_id = get_cached(elt.get('revision_id')),
326
inventory_sha1 = elt.get('inventory_sha1')
328
parents = elt.find('parents') or []
330
assert p.tag == 'revision_ref', \
331
"bad parent node tag %r" % p.tag
332
rev.parent_ids.append(get_cached(p.get('revision_id')))
333
self._unpack_revision_properties(elt, rev)
334
v = elt.get('timezone')
338
rev.timezone = int(v)
339
rev.message = elt.findtext('message') # text of <message>
343
def _unpack_revision_properties(self, elt, rev):
344
"""Unpack properties onto a revision."""
345
props_elt = elt.find('properties')
346
assert len(rev.properties) == 0
349
for prop_elt in props_elt:
350
assert prop_elt.tag == 'property', \
351
"bad tag under properties list: %r" % prop_elt.tag
352
name = prop_elt.get('name')
353
value = prop_elt.text
354
# If a property had an empty value ('') cElementTree reads
355
# that back as None, convert it back to '', so that all
356
# properties have string values
359
assert name not in rev.properties, \
360
"repeated property %r" % name
361
rev.properties[name] = value
364
serializer_v5 = Serializer_v5()