22
23
# importing this module is fairly slow because it has to load several
25
from bzrlib.serializer import Serializer
26
from bzrlib.trace import mutter
30
# it's in this package in python2.5
31
from xml.etree.cElementTree import (ElementTree, SubElement, Element,
32
XMLTreeBuilder, fromstring, tostring)
33
import xml.etree as elementtree
35
from cElementTree import (ElementTree, SubElement, Element,
36
XMLTreeBuilder, fromstring, tostring)
37
import elementtree.ElementTree
38
ParseError = SyntaxError
27
from util.cElementTree import ElementTree, SubElement, Element
39
28
except ImportError:
40
mutter('WARNING: using slower ElementTree; consider installing cElementTree'
41
" and make sure it's on your PYTHONPATH")
42
# this copy is shipped with bzr
43
from util.elementtree.ElementTree import (ElementTree, SubElement,
44
Element, XMLTreeBuilder,
46
import util.elementtree as elementtree
47
from xml.parsers.expat import ExpatError as ParseError
49
from bzrlib import errors
52
class XMLSerializer(Serializer):
53
"""Abstract XML object serialize/deserialize"""
55
squashes_xml_invalid_characters = True
57
def read_inventory_from_string(self, xml_string, revision_id=None,
58
entry_cache=None, return_from_cache=False):
59
"""Read xml_string into an inventory object.
61
:param xml_string: The xml to read.
62
:param revision_id: If not-None, the expected revision id of the
63
inventory. Some serialisers use this to set the results' root
64
revision. This should be supplied for deserialising all
65
from-repository inventories so that xml5 inventories that were
66
serialised without a revision identifier can be given the right
67
revision id (but not for working tree inventories where users can
68
edit the data without triggering checksum errors or anything).
69
:param entry_cache: An optional cache of InventoryEntry objects. If
70
supplied we will look up entries via (file_id, revision_id) which
71
should map to a valid InventoryEntry (File/Directory/etc) object.
72
:param return_from_cache: Return entries directly from the cache,
73
rather than copying them first. This is only safe if the caller
74
promises not to mutate the returned inventory entries, but it can
75
make some operations significantly faster.
78
return self._unpack_inventory(fromstring(xml_string), revision_id,
79
entry_cache=entry_cache,
80
return_from_cache=return_from_cache)
82
raise errors.UnexpectedInventoryFormat(e)
84
def read_inventory(self, f, revision_id=None):
86
return self._unpack_inventory(self._read_element(f),
89
raise errors.UnexpectedInventoryFormat(e)
29
from util.elementtree.ElementTree import ElementTree, SubElement, Element
31
from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry
32
from bzrlib.revision import Revision, RevisionReference
33
from bzrlib.errors import BzrError
36
class Serializer(object):
37
"""Abstract object serialize/deserialize"""
38
def write_inventory(self, inv, f):
39
"""Write inventory to a file"""
40
elt = self._pack_inventory(inv)
41
self._write_element(elt, f)
43
def read_inventory(self, f):
44
return self._unpack_inventory(self._read_element(f))
91
46
def write_revision(self, rev, f):
92
47
self._write_element(self._pack_revision(rev), f)
94
def write_revision_to_string(self, rev):
95
return tostring(self._pack_revision(rev)) + '\n'
97
49
def read_revision(self, f):
98
50
return self._unpack_revision(self._read_element(f))
100
def read_revision_from_string(self, xml_string):
101
return self._unpack_revision(fromstring(xml_string))
103
52
def _write_element(self, elt, f):
104
53
ElementTree(elt).write(f, 'utf-8')
108
57
return ElementTree().parse(f)
111
# performance tuning for elementree's serialiser. This should be
112
# sent upstream - RBC 20060523.
113
# the functions here are patched into elementtree at runtime.
115
escape_re = re.compile("[&'\"<>]")
118
"'":"'", # FIXME: overkill
123
def _escape_replace(match, map=escape_map):
124
return map[match.group()]
126
def _escape_attrib(text, encoding=None, replace=None):
127
# escape attribute value
131
text = elementtree.ElementTree._encode(text, encoding)
133
return elementtree.ElementTree._encode_entity(text)
135
return escape_re.sub(_escape_replace, text)
137
text = replace(text, "&", "&")
138
text = replace(text, "'", "'") # FIXME: overkill
139
text = replace(text, "\"", """)
140
text = replace(text, "<", "<")
141
text = replace(text, ">", ">")
143
except (TypeError, AttributeError):
144
elementtree.ElementTree._raise_serialization_error(text)
146
elementtree.ElementTree._escape_attrib = _escape_attrib
148
escape_cdata_re = re.compile("[&<>]")
154
def _escape_cdata_replace(match, map=escape_cdata_map):
155
return map[match.group()]
157
def _escape_cdata(text, encoding=None, replace=None):
158
# escape character data
162
text = elementtree.ElementTree._encode(text, encoding)
164
return elementtree.ElementTree._encode_entity(text)
166
return escape_cdata_re.sub(_escape_cdata_replace, text)
168
text = replace(text, "&", "&")
169
text = replace(text, "<", "<")
170
text = replace(text, ">", ">")
172
except (TypeError, AttributeError):
173
elementtree.ElementTree._raise_serialization_error(text)
175
elementtree.ElementTree._escape_cdata = _escape_cdata
178
def escape_invalid_chars(message):
179
"""Escape the XML-invalid characters in a commit message.
181
:param message: Commit message to escape
182
:return: tuple with escaped message and number of characters escaped
186
# Python strings can include characters that can't be
187
# represented in well-formed XML; escape characters that
188
# aren't listed in the XML specification
189
# (http://www.w3.org/TR/REC-xml/#NT-Char).
190
return re.subn(u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
191
lambda match: match.group(0).encode('unicode_escape'),
61
class _Serializer_v4(Serializer):
62
"""Version 0.0.4 serializer
64
You should use the serialzer_v4 singleton."""
68
def _pack_inventory(self, inv):
69
"""Convert to XML Element"""
70
e = Element('inventory')
72
if inv.root.file_id not in (None, ROOT_ID):
73
e.set('file_id', inv.root.file_id)
74
for path, ie in inv.iter_entries():
75
e.append(self._pack_entry(ie))
79
def _pack_entry(self, ie):
80
"""Convert InventoryEntry to XML element"""
82
e.set('name', ie.name)
83
e.set('file_id', ie.file_id)
84
e.set('kind', ie.kind)
86
if ie.text_size != None:
87
e.set('text_size', '%d' % ie.text_size)
89
for f in ['text_id', 'text_sha1']:
94
# to be conservative, we don't externalize the root pointers
95
# for now, leaving them as null in the xml form. in a future
96
# version it will be implied by nested elements.
97
if ie.parent_id != ROOT_ID:
98
assert isinstance(ie.parent_id, basestring)
99
e.set('parent_id', ie.parent_id)
106
def _unpack_inventory(self, elt):
107
"""Construct from XML Element
109
assert elt.tag == 'inventory'
110
root_id = elt.get('file_id') or ROOT_ID
111
inv = Inventory(root_id)
113
ie = self._unpack_entry(e)
114
if ie.parent_id == ROOT_ID:
115
ie.parent_id = root_id
120
def _unpack_entry(self, elt):
121
assert elt.tag == 'entry'
123
## original format inventories don't have a parent_id for
124
## nodes in the root directory, but it's cleaner to use one
126
parent_id = elt.get('parent_id')
127
if parent_id == None:
130
ie = InventoryEntry(elt.get('file_id'),
134
ie.text_id = elt.get('text_id')
135
ie.text_sha1 = elt.get('text_sha1')
137
## mutter("read inventoryentry: %r" % (elt.attrib))
139
v = elt.get('text_size')
140
ie.text_size = v and int(v)
145
def _pack_revision(self, rev):
146
"""Revision object -> xml tree"""
147
root = Element('revision',
148
committer = rev.committer,
149
timestamp = '%.9f' % rev.timestamp,
150
revision_id = rev.revision_id,
151
inventory_id = rev.inventory_id,
152
inventory_sha1 = rev.inventory_sha1,
155
root.set('timezone', str(rev.timezone))
158
msg = SubElement(root, 'message')
159
msg.text = rev.message
163
pelts = SubElement(root, 'parents')
164
pelts.tail = pelts.text = '\n'
165
for rr in rev.parents:
166
assert isinstance(rr, RevisionReference)
167
p = SubElement(pelts, 'revision_ref')
169
assert rr.revision_id
170
p.set('revision_id', rr.revision_id)
172
p.set('revision_sha1', rr.revision_sha1)
177
def _unpack_revision(self, elt):
178
"""XML Element -> Revision object"""
180
# <changeset> is deprecated...
181
if elt.tag not in ('revision', 'changeset'):
182
raise BzrError("unexpected tag in revision file: %r" % elt)
184
rev = Revision(committer = elt.get('committer'),
185
timestamp = float(elt.get('timestamp')),
186
revision_id = elt.get('revision_id'),
187
inventory_id = elt.get('inventory_id'),
188
inventory_sha1 = elt.get('inventory_sha1')
191
precursor = elt.get('precursor')
192
precursor_sha1 = elt.get('precursor_sha1')
194
pelts = elt.find('parents')
198
assert p.tag == 'revision_ref', \
199
"bad parent node tag %r" % p.tag
200
rev_ref = RevisionReference(p.get('revision_id'),
201
p.get('revision_sha1'))
202
rev.parents.append(rev_ref)
206
prec_parent = rev.parents[0].revision_id
207
assert prec_parent == precursor
209
# revisions written prior to 0.0.5 have a single precursor
210
# give as an attribute
211
rev_ref = RevisionReference(precursor, precursor_sha1)
212
rev.parents.append(rev_ref)
214
v = elt.get('timezone')
215
rev.timezone = v and int(v)
217
rev.message = elt.findtext('message') # text of <message>
222
class _Serializer_v5(Serializer):
223
"""Version 5 serializer
225
Packs objects into XML and vice versa.
227
You should use the serialzer_v5 singleton."""
231
def _pack_inventory(self, inv):
232
"""Convert to XML Element"""
233
e = Element('inventory')
235
if inv.root.file_id not in (None, ROOT_ID):
236
e.set('file_id', inv.root.file_id)
237
for path, ie in inv.iter_entries():
238
e.append(self._pack_entry(ie))
242
def _pack_entry(self, ie):
243
"""Convert InventoryEntry to XML element"""
244
assert ie.kind == 'directory' or ie.kind == 'file'
246
e.set('name', ie.name)
247
e.set('file_id', ie.file_id)
249
if ie.text_size != None:
250
e.set('text_size', '%d' % ie.text_size)
252
for f in ['text_version', 'text_sha1', 'entry_version']:
257
# to be conservative, we don't externalize the root pointers
258
# for now, leaving them as null in the xml form. in a future
259
# version it will be implied by nested elements.
260
if ie.parent_id != ROOT_ID:
261
assert isinstance(ie.parent_id, basestring)
262
e.set('parent_id', ie.parent_id)
269
def _pack_revision(self, rev):
270
"""Revision object -> xml tree"""
271
root = Element('revision',
272
committer = rev.committer,
273
timestamp = '%.9f' % rev.timestamp,
274
revision_id = rev.revision_id,
275
inventory_id = rev.inventory_id,
276
inventory_sha1 = rev.inventory_sha1,
279
root.set('timezone', str(rev.timezone))
282
msg = SubElement(root, 'message')
283
msg.text = rev.message
287
pelts = SubElement(root, 'parents')
288
pelts.tail = pelts.text = '\n'
289
for rr in rev.parents:
290
assert isinstance(rr, RevisionReference)
291
p = SubElement(pelts, 'revision_ref')
293
assert rr.revision_id
294
p.set('revision_id', rr.revision_id)
300
def _unpack_inventory(self, elt):
301
"""Construct from XML Element
303
assert elt.tag == 'inventory'
304
root_id = elt.get('file_id') or ROOT_ID
305
inv = Inventory(root_id)
307
ie = self._unpack_entry(e)
308
if ie.parent_id == ROOT_ID:
309
ie.parent_id = root_id
314
def _unpack_entry(self, elt):
316
assert kind == 'directory' or kind == 'file'
318
parent_id = elt.get('parent_id')
319
if parent_id == None:
322
ie = InventoryEntry(elt.get('file_id'),
326
ie.text_version = elt.get('text_version')
327
ie.entry_version = elt.get('entry_version')
328
ie.text_sha1 = elt.get('text_sha1')
329
v = elt.get('text_size')
330
ie.text_size = v and int(v)
335
def _unpack_revision(self, elt):
336
"""XML Element -> Revision object"""
337
assert elt.tag == 'revision'
339
rev = Revision(committer = elt.get('committer'),
340
timestamp = float(elt.get('timestamp')),
341
revision_id = elt.get('revision_id'),
342
inventory_id = elt.get('inventory_id'),
343
inventory_sha1 = elt.get('inventory_sha1')
346
for p in elt.find('parents'):
347
assert p.tag == 'revision_ref', \
348
"bad parent node tag %r" % p.tag
349
rev_ref = RevisionReference(p.get('revision_id'))
350
rev.parents.append(rev_ref)
352
v = elt.get('timezone')
353
rev.timezone = v and int(v)
355
rev.message = elt.findtext('message') # text of <message>
360
"""singleton instance"""
361
serializer_v4 = _Serializer_v4()
363
serializer_v5 = _Serializer_v5()