bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
4763.2.4
by John Arbash Meinel
 merge bzr.2.1 in preparation for NEWS entry.  | 
1  | 
# Copyright (C) 2005-2010 Canonical Ltd
 | 
| 
1887.1.1
by Adeodato Simó
 Do not separate paragraphs in the copyright statement with blank lines,  | 
2  | 
#
 | 
| 
1
by mbp at sourcefrog
 import from baz patch-364  | 
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
| 
1887.1.1
by Adeodato Simó
 Do not separate paragraphs in the copyright statement with blank lines,  | 
7  | 
#
 | 
| 
1
by mbp at sourcefrog
 import from baz patch-364  | 
8  | 
# This program is distributed in the hope that it will be useful,
 | 
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
| 
1887.1.1
by Adeodato Simó
 Do not separate paragraphs in the copyright statement with blank lines,  | 
12  | 
#
 | 
| 
1
by mbp at sourcefrog
 import from baz patch-364  | 
13  | 
# You should have received a copy of the GNU General Public License
 | 
14  | 
# along with this program; if not, write to the Free Software
 | 
|
| 
4183.7.1
by Sabin Iacob
 update FSF mailing address  | 
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
| 
1
by mbp at sourcefrog
 import from baz patch-364  | 
16  | 
|
17  | 
"""XML externalization support."""
 | 
|
18  | 
||
| 
48
by Martin Pool
 witty comment  | 
19  | 
# "XML is like violence: if it doesn't solve your problem, you aren't
 | 
20  | 
# using enough of it." -- various
 | 
|
21  | 
||
| 
1180
by Martin Pool
 - start splitting code for xml (de)serialization away from objects  | 
22  | 
# importing this module is fairly slow because it has to load several
 | 
23  | 
# ElementTree bits
 | 
|
24  | 
||
| 
5340.11.1
by Martin
 Remove monkey patching of private ElementTree escaping functions entirely  | 
25  | 
import re  | 
26  | 
||
| 
4237.3.1
by Jelmer Vernooij
 Add new module with generic serializer information; keep XML-specific bits in  | 
27  | 
from bzrlib.serializer import Serializer  | 
| 
5121.2.4
by Jelmer Vernooij
 Remove more unused imports.  | 
28  | 
from bzrlib.trace import mutter  | 
| 
1248
by Martin Pool
 - new weave based cleanup [broken]  | 
29  | 
|
| 
802
by Martin Pool
 - Remove XMLMixin class in favour of simple pack_xml, unpack_xml functions  | 
30  | 
try:  | 
| 
2039.2.1
by Martin Pool
 Load python2.5's ElementTree if present  | 
31  | 
try:  | 
32  | 
        # it's in this package in python2.5
 | 
|
33  | 
from xml.etree.cElementTree import (ElementTree, SubElement, Element,  | 
|
34  | 
XMLTreeBuilder, fromstring, tostring)  | 
|
35  | 
import xml.etree as elementtree  | 
|
| 
4797.66.1
by Martin
 Prevent AttributeError in xml_serializer on certain cElementTree setups  | 
36  | 
        # Also import ElementTree module so monkey-patching below always works
 | 
37  | 
import xml.etree.ElementTree  | 
|
| 
2039.2.1
by Martin Pool
 Load python2.5's ElementTree if present  | 
38  | 
except ImportError:  | 
39  | 
from cElementTree import (ElementTree, SubElement, Element,  | 
|
40  | 
XMLTreeBuilder, fromstring, tostring)  | 
|
| 
3475.1.2
by John Arbash Meinel
 Fix missing import  | 
41  | 
import elementtree.ElementTree  | 
| 
2029.2.1
by Marien Zwart
 Handle the different exception (non-c)ElementTree raises.  | 
42  | 
ParseError = SyntaxError  | 
| 
802
by Martin Pool
 - Remove XMLMixin class in favour of simple pack_xml, unpack_xml functions  | 
43  | 
except ImportError:  | 
| 
1185.33.68
by Martin Pool
 Emit warning to trace file only if using cElementTree.  | 
44  | 
mutter('WARNING: using slower ElementTree; consider installing cElementTree'  | 
45  | 
" and make sure it's on your PYTHONPATH")  | 
|
| 
2039.2.1
by Martin Pool
 Load python2.5's ElementTree if present  | 
46  | 
    # this copy is shipped with bzr
 | 
| 
1227
by Martin Pool
 - methods to deserialize objects from strings  | 
47  | 
from util.elementtree.ElementTree import (ElementTree, SubElement,  | 
| 
1248
by Martin Pool
 - new weave based cleanup [broken]  | 
48  | 
Element, XMLTreeBuilder,  | 
49  | 
fromstring, tostring)  | 
|
| 
1772.1.1
by mbp at sourcefrog
 Fix up loading of fallback ElementTree  | 
50  | 
import util.elementtree as elementtree  | 
| 
2029.2.1
by Marien Zwart
 Handle the different exception (non-c)ElementTree raises.  | 
51  | 
from xml.parsers.expat import ExpatError as ParseError  | 
| 
802
by Martin Pool
 - Remove XMLMixin class in favour of simple pack_xml, unpack_xml functions  | 
52  | 
|
| 
6355.1.1
by Jelmer Vernooij
 Move some utility functions to xml_serializer.  | 
53  | 
from bzrlib import (  | 
54  | 
cache_utf8,  | 
|
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
55  | 
inventory,  | 
| 
6355.1.1
by Jelmer Vernooij
 Move some utility functions to xml_serializer.  | 
56  | 
lazy_regex,  | 
57  | 
errors,  | 
|
58  | 
    )
 | 
|
| 
1180
by Martin Pool
 - start splitting code for xml (de)serialization away from objects  | 
59  | 
|
60  | 
||
| 
4237.3.1
by Jelmer Vernooij
 Add new module with generic serializer information; keep XML-specific bits in  | 
61  | 
class XMLSerializer(Serializer):  | 
62  | 
"""Abstract XML object serialize/deserialize"""  | 
|
| 
1248
by Martin Pool
 - new weave based cleanup [broken]  | 
63  | 
|
| 
4416.5.1
by Jelmer Vernooij
 Move squashing of XML-invalid characters to XMLSerializer.  | 
64  | 
squashes_xml_invalid_characters = True  | 
65  | 
||
| 
3882.6.23
by John Arbash Meinel
 Change the XMLSerializer.read_inventory_from_string api.  | 
66  | 
def read_inventory_from_string(self, xml_string, revision_id=None,  | 
| 
4849.4.2
by John Arbash Meinel
 Change from being a per-serializer attribute to being a per-repo attribute.  | 
67  | 
entry_cache=None, return_from_cache=False):  | 
| 
2889.1.1
by Robert Collins
 * The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into  | 
68  | 
"""Read xml_string into an inventory object.  | 
69  | 
||
70  | 
        :param xml_string: The xml to read.
 | 
|
71  | 
        :param revision_id: If not-None, the expected revision id of the
 | 
|
72  | 
            inventory. Some serialisers use this to set the results' root
 | 
|
| 
3169.2.2
by Robert Collins
 Add a test to Repository.deserialise_inventory that the resulting ivnentory is the one asked for, and update relevant tests. Also tweak the model 1 to 2 regenerate inventories logic to use the revision trees parent marker which is more accurate in some cases.  | 
73  | 
            revision. This should be supplied for deserialising all
 | 
74  | 
            from-repository inventories so that xml5 inventories that were
 | 
|
75  | 
            serialised without a revision identifier can be given the right
 | 
|
76  | 
            revision id (but not for working tree inventories where users can
 | 
|
77  | 
            edit the data without triggering checksum errors or anything).
 | 
|
| 
3882.6.23
by John Arbash Meinel
 Change the XMLSerializer.read_inventory_from_string api.  | 
78  | 
        :param entry_cache: An optional cache of InventoryEntry objects. If
 | 
79  | 
            supplied we will look up entries via (file_id, revision_id) which
 | 
|
80  | 
            should map to a valid InventoryEntry (File/Directory/etc) object.
 | 
|
| 
4849.4.2
by John Arbash Meinel
 Change from being a per-serializer attribute to being a per-repo attribute.  | 
81  | 
        :param return_from_cache: Return entries directly from the cache,
 | 
82  | 
            rather than copying them first. This is only safe if the caller
 | 
|
83  | 
            promises not to mutate the returned inventory entries, but it can
 | 
|
84  | 
            make some operations significantly faster.
 | 
|
| 
2889.1.1
by Robert Collins
 * The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into  | 
85  | 
        """
 | 
| 
1910.2.31
by Aaron Bentley
 Fix bugs in basis inventory handling, change filename  | 
86  | 
try:  | 
| 
3882.6.23
by John Arbash Meinel
 Change the XMLSerializer.read_inventory_from_string api.  | 
87  | 
return self._unpack_inventory(fromstring(xml_string), revision_id,  | 
| 
4849.4.2
by John Arbash Meinel
 Change from being a per-serializer attribute to being a per-repo attribute.  | 
88  | 
entry_cache=entry_cache,  | 
89  | 
return_from_cache=return_from_cache)  | 
|
| 
2029.2.1
by Marien Zwart
 Handle the different exception (non-c)ElementTree raises.  | 
90  | 
except ParseError, e:  | 
| 
1910.2.31
by Aaron Bentley
 Fix bugs in basis inventory handling, change filename  | 
91  | 
raise errors.UnexpectedInventoryFormat(e)  | 
| 
1227
by Martin Pool
 - methods to deserialize objects from strings  | 
92  | 
|
| 
2889.1.1
by Robert Collins
 * The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into  | 
93  | 
def read_inventory(self, f, revision_id=None):  | 
| 
1910.2.31
by Aaron Bentley
 Fix bugs in basis inventory handling, change filename  | 
94  | 
try:  | 
| 
4708.2.1
by Martin
 Ensure all files opened by bazaar proper are explicitly closed  | 
95  | 
try:  | 
96  | 
return self._unpack_inventory(self._read_element(f),  | 
|
97  | 
revision_id=None)  | 
|
98  | 
finally:  | 
|
99  | 
f.close()  | 
|
| 
2029.2.1
by Marien Zwart
 Handle the different exception (non-c)ElementTree raises.  | 
100  | 
except ParseError, e:  | 
| 
1910.2.31
by Aaron Bentley
 Fix bugs in basis inventory handling, change filename  | 
101  | 
raise errors.UnexpectedInventoryFormat(e)  | 
| 
1180
by Martin Pool
 - start splitting code for xml (de)serialization away from objects  | 
102  | 
|
| 
1182
by Martin Pool
 - more disentangling of xml storage format from objects  | 
103  | 
def write_revision(self, rev, f):  | 
104  | 
self._write_element(self._pack_revision(rev), f)  | 
|
105  | 
||
| 
1248
by Martin Pool
 - new weave based cleanup [broken]  | 
106  | 
def write_revision_to_string(self, rev):  | 
| 
1185.16.123
by Martin Pool
 Fix syntax of serializer_v5.pack_revision_to_string  | 
107  | 
return tostring(self._pack_revision(rev)) + '\n'  | 
| 
1248
by Martin Pool
 - new weave based cleanup [broken]  | 
108  | 
|
| 
1182
by Martin Pool
 - more disentangling of xml storage format from objects  | 
109  | 
def read_revision(self, f):  | 
110  | 
return self._unpack_revision(self._read_element(f))  | 
|
111  | 
||
| 
1227
by Martin Pool
 - methods to deserialize objects from strings  | 
112  | 
def read_revision_from_string(self, xml_string):  | 
| 
1248
by Martin Pool
 - new weave based cleanup [broken]  | 
113  | 
return self._unpack_revision(fromstring(xml_string))  | 
| 
1227
by Martin Pool
 - methods to deserialize objects from strings  | 
114  | 
|
| 
1180
by Martin Pool
 - start splitting code for xml (de)serialization away from objects  | 
115  | 
def _write_element(self, elt, f):  | 
116  | 
ElementTree(elt).write(f, 'utf-8')  | 
|
117  | 
f.write('\n')  | 
|
118  | 
||
119  | 
def _read_element(self, f):  | 
|
120  | 
return ElementTree().parse(f)  | 
|
| 
1713.1.12
by Robert Collins
 Improve serialisation of xml performance by overriding elementree's escape routines.  | 
121  | 
|
122  | 
||
| 
4222.1.1
by Jelmer Vernooij
 Make function for escaping invalid XML characters public.  | 
123  | 
def escape_invalid_chars(message):  | 
124  | 
"""Escape the XML-invalid characters in a commit message.  | 
|
125  | 
||
126  | 
    :param message: Commit message to escape
 | 
|
| 
4354.1.1
by Jelmer Vernooij
 Fix docstring for bzrlib.xml_serializer.escape_invalid_chars.  | 
127  | 
    :return: tuple with escaped message and number of characters escaped
 | 
| 
4222.1.1
by Jelmer Vernooij
 Make function for escaping invalid XML characters public.  | 
128  | 
    """
 | 
| 
4416.5.1
by Jelmer Vernooij
 Move squashing of XML-invalid characters to XMLSerializer.  | 
129  | 
if message is None:  | 
130  | 
return None, 0  | 
|
| 
4222.1.1
by Jelmer Vernooij
 Make function for escaping invalid XML characters public.  | 
131  | 
    # Python strings can include characters that can't be
 | 
132  | 
    # represented in well-formed XML; escape characters that
 | 
|
133  | 
    # aren't listed in the XML specification
 | 
|
134  | 
    # (http://www.w3.org/TR/REC-xml/#NT-Char).
 | 
|
135  | 
return re.subn(u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',  | 
|
136  | 
lambda match: match.group(0).encode('unicode_escape'),  | 
|
137  | 
message)  | 
|
| 
6355.1.1
by Jelmer Vernooij
 Move some utility functions to xml_serializer.  | 
138  | 
|
139  | 
||
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
140  | 
def get_utf8_or_ascii(a_str, _encode_utf8=cache_utf8.encode):  | 
| 
6355.1.1
by Jelmer Vernooij
 Move some utility functions to xml_serializer.  | 
141  | 
"""Return a cached version of the string.  | 
142  | 
||
143  | 
    cElementTree will return a plain string if the XML is plain ascii. It only
 | 
|
144  | 
    returns Unicode when it needs to. We want to work in utf-8 strings. So if
 | 
|
145  | 
    cElementTree returns a plain string, we can just return the cached version.
 | 
|
146  | 
    If it is Unicode, then we need to encode it.
 | 
|
147  | 
||
148  | 
    :param a_str: An 8-bit string or Unicode as returned by
 | 
|
149  | 
                  cElementTree.Element.get()
 | 
|
150  | 
    :return: A utf-8 encoded 8-bit string.
 | 
|
151  | 
    """
 | 
|
152  | 
    # This is fairly optimized because we know what cElementTree does, this is
 | 
|
153  | 
    # not meant as a generic function for all cases. Because it is possible for
 | 
|
154  | 
    # an 8-bit string to not be ascii or valid utf8.
 | 
|
155  | 
if a_str.__class__ is unicode:  | 
|
156  | 
return _encode_utf8(a_str)  | 
|
157  | 
else:  | 
|
158  | 
return intern(a_str)  | 
|
159  | 
||
160  | 
||
161  | 
_utf8_re = lazy_regex.lazy_compile('[&<>\'\"]|[\x80-\xff]+')  | 
|
162  | 
_unicode_re = lazy_regex.lazy_compile(u'[&<>\'\"\u0080-\uffff]')  | 
|
163  | 
||
164  | 
||
165  | 
_xml_escape_map = {  | 
|
166  | 
"&":'&',  | 
|
167  | 
"'":"'", # FIXME: overkill  | 
|
168  | 
"\"":""",  | 
|
169  | 
"<":"<",  | 
|
170  | 
">":">",  | 
|
171  | 
    }
 | 
|
172  | 
||
173  | 
||
174  | 
def _unicode_escape_replace(match, _map=_xml_escape_map):  | 
|
175  | 
"""Replace a string of non-ascii, non XML safe characters with their escape  | 
|
176  | 
||
177  | 
    This will escape both Standard XML escapes, like <>"', etc.
 | 
|
178  | 
    As well as escaping non ascii characters, because ElementTree did.
 | 
|
179  | 
    This helps us remain compatible to older versions of bzr. We may change
 | 
|
180  | 
    our policy in the future, though.
 | 
|
181  | 
    """
 | 
|
182  | 
    # jam 20060816 Benchmarks show that try/KeyError is faster if you
 | 
|
183  | 
    # expect the entity to rarely miss. There is about a 10% difference
 | 
|
184  | 
    # in overall time. But if you miss frequently, then if None is much
 | 
|
185  | 
    # faster. For our use case, we *rarely* have a revision id, file id
 | 
|
186  | 
    # or path name that is unicode. So use try/KeyError.
 | 
|
187  | 
try:  | 
|
188  | 
return _map[match.group()]  | 
|
189  | 
except KeyError:  | 
|
190  | 
return "&#%d;" % ord(match.group())  | 
|
191  | 
||
192  | 
||
193  | 
def _utf8_escape_replace(match, _map=_xml_escape_map):  | 
|
194  | 
"""Escape utf8 characters into XML safe ones.  | 
|
195  | 
||
196  | 
    This uses 2 tricks. It is either escaping "standard" characters, like "&<>,
 | 
|
197  | 
    or it is handling characters with the high-bit set. For ascii characters,
 | 
|
198  | 
    we just lookup the replacement in the dictionary. For everything else, we
 | 
|
199  | 
    decode back into Unicode, and then use the XML escape code.
 | 
|
200  | 
    """
 | 
|
201  | 
try:  | 
|
202  | 
return _map[match.group()]  | 
|
203  | 
except KeyError:  | 
|
204  | 
return ''.join('&#%d;' % ord(uni_chr)  | 
|
205  | 
for uni_chr in match.group().decode('utf8'))  | 
|
206  | 
||
207  | 
||
208  | 
_to_escaped_map = {}  | 
|
209  | 
||
210  | 
def encode_and_escape(unicode_or_utf8_str, _map=_to_escaped_map):  | 
|
211  | 
"""Encode the string into utf8, and escape invalid XML characters"""  | 
|
212  | 
    # We frequently get entities we have not seen before, so it is better
 | 
|
213  | 
    # to check if None, rather than try/KeyError
 | 
|
214  | 
text = _map.get(unicode_or_utf8_str)  | 
|
215  | 
if text is None:  | 
|
216  | 
if unicode_or_utf8_str.__class__ is unicode:  | 
|
217  | 
            # The alternative policy is to do a regular UTF8 encoding
 | 
|
218  | 
            # and then escape only XML meta characters.
 | 
|
219  | 
            # Performance is equivalent once you use cache_utf8. *However*
 | 
|
220  | 
            # this makes the serialized texts incompatible with old versions
 | 
|
221  | 
            # of bzr. So no net gain. (Perhaps the read code would handle utf8
 | 
|
222  | 
            # better than entity escapes, but cElementTree seems to do just fine
 | 
|
223  | 
            # either way)
 | 
|
224  | 
text = str(_unicode_re.sub(_unicode_escape_replace,  | 
|
225  | 
unicode_or_utf8_str)) + '"'  | 
|
226  | 
else:  | 
|
227  | 
            # Plain strings are considered to already be in utf-8 so we do a
 | 
|
228  | 
            # slightly different method for escaping.
 | 
|
229  | 
text = _utf8_re.sub(_utf8_escape_replace,  | 
|
230  | 
unicode_or_utf8_str) + '"'  | 
|
231  | 
_map[unicode_or_utf8_str] = text  | 
|
232  | 
return text  | 
|
233  | 
||
234  | 
||
235  | 
def _clear_cache():  | 
|
236  | 
"""Clean out the unicode => escaped map"""  | 
|
237  | 
_to_escaped_map.clear()  | 
|
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
238  | 
|
239  | 
||
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
240  | 
def unpack_inventory_entry(elt, entry_cache=None, return_from_cache=False):  | 
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
241  | 
elt_get = elt.get  | 
242  | 
file_id = elt_get('file_id')  | 
|
243  | 
revision = elt_get('revision')  | 
|
244  | 
    # Check and see if we have already unpacked this exact entry
 | 
|
245  | 
    # Some timings for "repo.revision_trees(last_100_revs)"
 | 
|
246  | 
    #               bzr     mysql
 | 
|
247  | 
    #   unmodified  4.1s    40.8s
 | 
|
248  | 
    #   using lru   3.5s
 | 
|
249  | 
    #   using fifo  2.83s   29.1s
 | 
|
250  | 
    #   lru._cache  2.8s
 | 
|
251  | 
    #   dict        2.75s   26.8s
 | 
|
252  | 
    #   inv.add     2.5s    26.0s
 | 
|
253  | 
    #   no_copy     2.00s   20.5s
 | 
|
254  | 
    #   no_c,dict   1.95s   18.0s
 | 
|
255  | 
    # Note that a cache of 10k nodes is more than sufficient to hold all of
 | 
|
256  | 
    # the inventory for the last 100 revs for bzr, but not for mysql (20k
 | 
|
257  | 
    # is enough for mysql, which saves the same 2s as using a dict)
 | 
|
258  | 
||
259  | 
    # Breakdown of mysql using time.clock()
 | 
|
260  | 
    #   4.1s    2 calls to element.get for file_id, revision_id
 | 
|
261  | 
    #   4.5s    cache_hit lookup
 | 
|
262  | 
    #   7.1s    InventoryFile.copy()
 | 
|
263  | 
    #   2.4s    InventoryDirectory.copy()
 | 
|
264  | 
    #   0.4s    decoding unique entries
 | 
|
265  | 
    #   1.6s    decoding entries after FIFO fills up
 | 
|
266  | 
    #   0.8s    Adding nodes to FIFO (including flushes)
 | 
|
267  | 
    #   0.1s    cache miss lookups
 | 
|
268  | 
    # Using an LRU cache
 | 
|
269  | 
    #   4.1s    2 calls to element.get for file_id, revision_id
 | 
|
270  | 
    #   9.9s    cache_hit lookup
 | 
|
271  | 
    #   10.8s   InventoryEntry.copy()
 | 
|
272  | 
    #   0.3s    cache miss lookus
 | 
|
273  | 
    #   1.2s    decoding entries
 | 
|
274  | 
    #   1.0s    adding nodes to LRU
 | 
|
275  | 
if entry_cache is not None and revision is not None:  | 
|
276  | 
key = (file_id, revision)  | 
|
277  | 
try:  | 
|
278  | 
            # We copy it, because some operations may mutate it
 | 
|
279  | 
cached_ie = entry_cache[key]  | 
|
280  | 
except KeyError:  | 
|
281  | 
            pass
 | 
|
282  | 
else:  | 
|
283  | 
            # Only copying directory entries drops us 2.85s => 2.35s
 | 
|
284  | 
if return_from_cache:  | 
|
285  | 
if cached_ie.kind == 'directory':  | 
|
286  | 
return cached_ie.copy()  | 
|
287  | 
return cached_ie  | 
|
288  | 
return cached_ie.copy()  | 
|
289  | 
||
290  | 
kind = elt.tag  | 
|
291  | 
if not inventory.InventoryEntry.versionable_kind(kind):  | 
|
292  | 
raise AssertionError('unsupported entry kind %s' % kind)  | 
|
293  | 
||
294  | 
file_id = get_utf8_or_ascii(file_id)  | 
|
295  | 
if revision is not None:  | 
|
296  | 
revision = get_utf8_or_ascii(revision)  | 
|
297  | 
parent_id = elt_get('parent_id')  | 
|
298  | 
if parent_id is not None:  | 
|
299  | 
parent_id = get_utf8_or_ascii(parent_id)  | 
|
300  | 
||
301  | 
if kind == 'directory':  | 
|
302  | 
ie = inventory.InventoryDirectory(file_id,  | 
|
303  | 
elt_get('name'),  | 
|
304  | 
parent_id)  | 
|
305  | 
elif kind == 'file':  | 
|
306  | 
ie = inventory.InventoryFile(file_id,  | 
|
307  | 
elt_get('name'),  | 
|
308  | 
parent_id)  | 
|
309  | 
ie.text_sha1 = elt_get('text_sha1')  | 
|
310  | 
if elt_get('executable') == 'yes':  | 
|
311  | 
ie.executable = True  | 
|
312  | 
v = elt_get('text_size')  | 
|
313  | 
ie.text_size = v and int(v)  | 
|
314  | 
elif kind == 'symlink':  | 
|
315  | 
ie = inventory.InventoryLink(file_id,  | 
|
316  | 
elt_get('name'),  | 
|
317  | 
parent_id)  | 
|
318  | 
ie.symlink_target = elt_get('symlink_target')  | 
|
319  | 
else:  | 
|
320  | 
raise errors.UnsupportedInventoryKind(kind)  | 
|
321  | 
ie.revision = revision  | 
|
322  | 
if revision is not None and entry_cache is not None:  | 
|
323  | 
        # We cache a copy() because callers like to mutate objects, and
 | 
|
324  | 
        # that would cause the item in cache to mutate as well.
 | 
|
325  | 
        # This has a small effect on many-inventory performance, because
 | 
|
326  | 
        # the majority fraction is spent in cache hits, not misses.
 | 
|
327  | 
entry_cache[key] = ie.copy()  | 
|
328  | 
||
329  | 
return ie  | 
|
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
330  | 
|
331  | 
||
| 
6355.1.9
by Jelmer Vernooij
 Review feedback - pass entry_cache and_return_from_cache to unpack_inventory_flat.  | 
332  | 
def unpack_inventory_flat(elt, format_num, unpack_entry,  | 
333  | 
entry_cache=None, return_from_cache=False):  | 
|
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
334  | 
"""Unpack a flat XML inventory.  | 
335  | 
||
336  | 
    :param elt: XML element for the inventory
 | 
|
337  | 
    :param format_num: Expected format number
 | 
|
338  | 
    :param unpack_entry: Function for unpacking inventory entries
 | 
|
339  | 
    :return: An inventory
 | 
|
340  | 
    :raise UnexpectedInventoryFormat: When unexpected elements or data is
 | 
|
341  | 
        encountered
 | 
|
342  | 
    """
 | 
|
343  | 
if elt.tag != 'inventory':  | 
|
344  | 
raise errors.UnexpectedInventoryFormat('Root tag is %r' % elt.tag)  | 
|
345  | 
format = elt.get('format')  | 
|
346  | 
if format != format_num:  | 
|
347  | 
raise errors.UnexpectedInventoryFormat('Invalid format version %r'  | 
|
348  | 
% format)  | 
|
349  | 
revision_id = elt.get('revision_id')  | 
|
350  | 
if revision_id is not None:  | 
|
351  | 
revision_id = cache_utf8.encode(revision_id)  | 
|
352  | 
inv = inventory.Inventory(root_id=None, revision_id=revision_id)  | 
|
353  | 
for e in elt:  | 
|
| 
6355.1.9
by Jelmer Vernooij
 Review feedback - pass entry_cache and_return_from_cache to unpack_inventory_flat.  | 
354  | 
ie = unpack_entry(e, entry_cache, return_from_cache)  | 
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
355  | 
inv.add(ie)  | 
356  | 
return inv  | 
|
357  | 
||
358  | 
||
| 
6355.1.7
by Jelmer Vernooij
 Fix tests.  | 
359  | 
def serialize_inventory_flat(inv, append, root_id, supported_kinds, working):  | 
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
360  | 
"""Serialize an inventory to a flat XML file.  | 
361  | 
||
362  | 
    :param inv: Inventory to serialize
 | 
|
| 
6355.1.7
by Jelmer Vernooij
 Fix tests.  | 
363  | 
    :param append: Function for writing a line of output
 | 
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
364  | 
    :param working: If True skip history data - text_sha1, text_size,
 | 
365  | 
        reference_revision, symlink_target.    self._check_revisions(inv)
 | 
|
366  | 
    """
 | 
|
367  | 
entries = inv.iter_entries()  | 
|
368  | 
    # Skip the root
 | 
|
369  | 
root_path, root_ie = entries.next()  | 
|
370  | 
for path, ie in entries:  | 
|
371  | 
if ie.parent_id != root_id:  | 
|
372  | 
parent_str = ' parent_id="'  | 
|
373  | 
parent_id = encode_and_escape(ie.parent_id)  | 
|
374  | 
else:  | 
|
375  | 
parent_str = ''  | 
|
376  | 
parent_id = ''  | 
|
377  | 
if ie.kind == 'file':  | 
|
378  | 
if ie.executable:  | 
|
379  | 
executable = ' executable="yes"'  | 
|
380  | 
else:  | 
|
381  | 
executable = ''  | 
|
382  | 
if not working:  | 
|
383  | 
append('<file%s file_id="%s name="%s%s%s revision="%s '  | 
|
384  | 
'text_sha1="%s" text_size="%d" />\n' % (  | 
|
385  | 
executable, encode_and_escape(ie.file_id),  | 
|
386  | 
encode_and_escape(ie.name), parent_str, parent_id,  | 
|
387  | 
encode_and_escape(ie.revision), ie.text_sha1,  | 
|
388  | 
ie.text_size))  | 
|
389  | 
else:  | 
|
390  | 
append('<file%s file_id="%s name="%s%s%s />\n' % (  | 
|
391  | 
executable, encode_and_escape(ie.file_id),  | 
|
392  | 
encode_and_escape(ie.name), parent_str, parent_id))  | 
|
393  | 
elif ie.kind == 'directory':  | 
|
394  | 
if not working:  | 
|
395  | 
append('<directory file_id="%s name="%s%s%s revision="%s '  | 
|
396  | 
'/>\n' % (  | 
|
397  | 
encode_and_escape(ie.file_id),  | 
|
398  | 
encode_and_escape(ie.name),  | 
|
399  | 
parent_str, parent_id,  | 
|
400  | 
encode_and_escape(ie.revision)))  | 
|
401  | 
else:  | 
|
402  | 
append('<directory file_id="%s name="%s%s%s />\n' % (  | 
|
403  | 
encode_and_escape(ie.file_id),  | 
|
404  | 
encode_and_escape(ie.name),  | 
|
405  | 
parent_str, parent_id))  | 
|
406  | 
elif ie.kind == 'symlink':  | 
|
407  | 
if not working:  | 
|
408  | 
append('<symlink file_id="%s name="%s%s%s revision="%s '  | 
|
409  | 
'symlink_target="%s />\n' % (  | 
|
410  | 
encode_and_escape(ie.file_id),  | 
|
411  | 
encode_and_escape(ie.name),  | 
|
412  | 
parent_str, parent_id,  | 
|
413  | 
encode_and_escape(ie.revision),  | 
|
414  | 
encode_and_escape(ie.symlink_target)))  | 
|
415  | 
else:  | 
|
416  | 
append('<symlink file_id="%s name="%s%s%s />\n' % (  | 
|
417  | 
encode_and_escape(ie.file_id),  | 
|
418  | 
encode_and_escape(ie.name),  | 
|
419  | 
parent_str, parent_id))  | 
|
420  | 
elif ie.kind == 'tree-reference':  | 
|
421  | 
if ie.kind not in supported_kinds:  | 
|
422  | 
raise errors.UnsupportedInventoryKind(ie.kind)  | 
|
423  | 
if not working:  | 
|
424  | 
append('<tree-reference file_id="%s name="%s%s%s '  | 
|
425  | 
'revision="%s reference_revision="%s />\n' % (  | 
|
426  | 
encode_and_escape(ie.file_id),  | 
|
427  | 
encode_and_escape(ie.name),  | 
|
428  | 
parent_str, parent_id,  | 
|
429  | 
encode_and_escape(ie.revision),  | 
|
430  | 
encode_and_escape(ie.reference_revision)))  | 
|
431  | 
else:  | 
|
432  | 
append('<tree-reference file_id="%s name="%s%s%s />\n' % (  | 
|
433  | 
encode_and_escape(ie.file_id),  | 
|
434  | 
encode_and_escape(ie.name),  | 
|
435  | 
parent_str, parent_id))  | 
|
436  | 
else:  | 
|
437  | 
raise errors.UnsupportedInventoryKind(ie.kind)  | 
|
438  | 
append('</inventory>\n')  |