/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2005-2010 Canonical Ltd
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
2
#
1189 by Martin Pool
- BROKEN: partial support for commit into weave
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
7
#
1189 by Martin Pool
- BROKEN: partial support for commit into weave
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
12
#
1189 by Martin Pool
- BROKEN: partial support for commit into weave
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1189 by Martin Pool
- BROKEN: partial support for commit into weave
16
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
17
import cStringIO
1189 by Martin Pool
- BROKEN: partial support for commit into weave
18
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
19
from bzrlib import (
20
    cache_utf8,
2100.3.1 by Aaron Bentley
Start roundtripping tree-reference entries
21
    errors,
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
22
    inventory,
5671.2.3 by Jelmer Vernooij
Move Repository._find_text_key_references_from_xml_inventory_lines onto the serializer.
23
    lazy_regex,
2598.5.2 by Aaron Bentley
Got all tests passing with Branch returning 'null:' for null revision
24
    revision as _mod_revision,
3882.6.3 by John Arbash Meinel
If we are going to thrash the inventory entry cache, increase its size.
25
    trace,
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
26
    )
4237.3.1 by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in
27
from bzrlib.xml_serializer import (
28
    Element,
29
    SubElement,
30
    XMLSerializer,
6355.1.1 by Jelmer Vernooij
Move some utility functions to xml_serializer.
31
    encode_and_escape,
4416.5.1 by Jelmer Vernooij
Move squashing of XML-invalid characters to XMLSerializer.
32
    escape_invalid_chars,
6355.1.1 by Jelmer Vernooij
Move some utility functions to xml_serializer.
33
    get_utf8_or_ascii,
4237.3.1 by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in
34
    )
5121.2.4 by Jelmer Vernooij
Remove more unused imports.
35
from bzrlib.inventory import InventoryEntry
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
36
from bzrlib.revision import Revision
1189 by Martin Pool
- BROKEN: partial support for commit into weave
37
from bzrlib.errors import BzrError
38
39
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
40
_utf8_re = None
2249.5.10 by John Arbash Meinel
Make sure xml5 can handle unicode or utf8 strings
41
_unicode_re = None
5671.2.3 by Jelmer Vernooij
Move Repository._find_text_key_references_from_xml_inventory_lines onto the serializer.
42
_xml_unescape_map = {
43
    'apos':"'",
44
    'quot':'"',
45
    'amp':'&',
46
    'lt':'<',
47
    'gt':'>'
48
}
49
50
51
def _unescaper(match, _map=_xml_unescape_map):
52
    code = match.group(1)
53
    try:
54
        return _map[code]
55
    except KeyError:
56
        if not code.startswith('#'):
57
            raise
58
        return unichr(int(code[1:])).encode('utf8')
59
60
6355.1.1 by Jelmer Vernooij
Move some utility functions to xml_serializer.
61
_unescape_re = lazy_regex.lazy_compile('\&([^;]*);')
5671.2.3 by Jelmer Vernooij
Move Repository._find_text_key_references_from_xml_inventory_lines onto the serializer.
62
63
def _unescape_xml(data):
64
    """Unescape predefined XML entities in a string of data."""
65
    return _unescape_re.sub(_unescaper, data)
66
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
67
4237.3.1 by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in
68
class Serializer_v8(XMLSerializer):
3311.3.4 by Aaron Bentley
Have xml5 inherit from xml6 from xml8
69
    """This serialiser adds rich roots.
1189 by Martin Pool
- BROKEN: partial support for commit into weave
70
3311.3.4 by Aaron Bentley
Have xml5 inherit from xml6 from xml8
71
    Its revision format number matches its inventory number.
1189 by Martin Pool
- BROKEN: partial support for commit into weave
72
    """
3311.3.4 by Aaron Bentley
Have xml5 inherit from xml6 from xml8
73
3882.6.22 by John Arbash Meinel
Start moving things around so that the entry cache is passed in.
74
    __slots__ = []
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
75
3311.3.4 by Aaron Bentley
Have xml5 inherit from xml6 from xml8
76
    root_id = None
1910.2.48 by Aaron Bentley
Update from review comments
77
    support_altered_by_hack = True
78
    # This format supports the altered-by hack that reads file ids directly out
79
    # of the versionedfile, without doing XML parsing.
80
2100.3.1 by Aaron Bentley
Start roundtripping tree-reference entries
81
    supported_kinds = set(['file', 'directory', 'symlink'])
3311.3.4 by Aaron Bentley
Have xml5 inherit from xml6 from xml8
82
    format_num = '8'
3311.3.3 by Aaron Bentley
Handle format 5 revision
83
    revision_format_num = None
2100.3.1 by Aaron Bentley
Start roundtripping tree-reference entries
84
5671.2.3 by Jelmer Vernooij
Move Repository._find_text_key_references_from_xml_inventory_lines onto the serializer.
85
    # The search regex used by xml based repositories to determine what things
86
    # where changed in a single commit.
87
    _file_ids_altered_regex = lazy_regex.lazy_compile(
88
        r'file_id="(?P<file_id>[^"]+)"'
89
        r'.* revision="(?P<revision_id>[^"]+)"'
90
        )
91
2889.1.1 by Robert Collins
* The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into
92
    def _check_revisions(self, inv):
93
        """Extension point for subclasses to check during serialisation.
94
95
        :param inv: An inventory about to be serialised, to be checked.
4031.3.1 by Frank Aspell
Fixing various typos
96
        :raises: AssertionError if an error has occurred.
2889.1.1 by Robert Collins
* The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into
97
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
98
        if inv.revision_id is None:
4505.5.2 by Robert Collins
More informative assertions in xml serialisation.
99
            raise AssertionError("inv.revision_id is None")
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
100
        if inv.root.revision is None:
4505.5.2 by Robert Collins
More informative assertions in xml serialisation.
101
            raise AssertionError("inv.root.revision is None")
2889.1.1 by Robert Collins
* The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into
102
3882.6.22 by John Arbash Meinel
Start moving things around so that the entry cache is passed in.
103
    def _check_cache_size(self, inv_size, entry_cache):
104
        """Check that the entry_cache is large enough.
3882.6.12 by John Arbash Meinel
Use resize logic to ensure our inventory entry cache is at an optimal size.
105
106
        We want the cache to be ~2x the size of an inventory. The reason is
107
        because we use a FIFO cache, and how Inventory records are likely to
108
        change. In general, you have a small number of records which change
109
        often, and a lot of records which do not change at all. So when the
110
        cache gets full, you actually flush out a lot of the records you are
111
        interested in, which means you need to recreate all of those records.
112
        An LRU Cache would be better, but the overhead negates the cache
113
        coherency benefit.
114
115
        One way to look at it, only the size of the cache > len(inv) is your
116
        'working' set. And in general, it shouldn't be a problem to hold 2
117
        inventories in memory anyway.
118
119
        :param inv_size: The number of entries in an inventory.
120
        """
3882.6.22 by John Arbash Meinel
Start moving things around so that the entry cache is passed in.
121
        if entry_cache is None:
122
            return
3882.6.12 by John Arbash Meinel
Use resize logic to ensure our inventory entry cache is at an optimal size.
123
        # 1.5 times might also be reasonable.
3882.6.22 by John Arbash Meinel
Start moving things around so that the entry cache is passed in.
124
        recommended_min_cache_size = inv_size * 1.5
125
        if entry_cache.cache_size() < recommended_min_cache_size:
126
            recommended_cache_size = inv_size * 2
127
            trace.mutter('Resizing the inventory entry cache from %d to %d',
128
                         entry_cache.cache_size(), recommended_cache_size)
129
            entry_cache.resize(recommended_cache_size)
3882.6.12 by John Arbash Meinel
Use resize logic to ensure our inventory entry cache is at an optimal size.
130
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
131
    def write_inventory_to_lines(self, inv):
132
        """Return a list of lines with the encoded inventory."""
133
        return self.write_inventory(inv, None)
134
135
    def write_inventory_to_string(self, inv, working=False):
136
        """Just call write_inventory with a StringIO and return the value.
137
138
        :param working: If True skip history data - text_sha1, text_size,
139
            reference_revision, symlink_target.
140
        """
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
141
        sio = cStringIO.StringIO()
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
142
        self.write_inventory(inv, sio, working)
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
143
        return sio.getvalue()
144
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
145
    def write_inventory(self, inv, f, working=False):
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
146
        """Write inventory to a file.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
147
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
148
        :param inv: the inventory to write.
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
149
        :param f: the file to write. (May be None if the lines are the desired
150
            output).
151
        :param working: If True skip history data - text_sha1, text_size,
152
            reference_revision, symlink_target.
153
        :return: The inventory as a list of lines.
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
154
        """
6355.1.2 by Jelmer Vernooij
Factor out serializing of inventory in xml8.
155
        output = serialize_inventory_flat(inv, self._append_inventory_root,
156
            self.root_id, self.supported_kinds, working)
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
157
        if f is not None:
158
            f.writelines(output)
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
159
        # Just to keep the cache from growing without bounds
160
        # but we may actually not want to do clear the cache
1934.1.6 by John Arbash Meinel
With a full cache the time is down to 381 ms
161
        #_clear_cache()
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
162
        return output
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
163
1934.1.8 by John Arbash Meinel
Passing around the append function rather than the list shaves off another 10%, down to 400ms
164
    def _append_inventory_root(self, append, inv):
1934.1.3 by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences
165
        """Append the inventory root to output."""
166
        if inv.revision_id is not None:
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
167
            revid1 = ' revision_id="'
6355.1.1 by Jelmer Vernooij
Move some utility functions to xml_serializer.
168
            revid2 = encode_and_escape(inv.revision_id)
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
169
        else:
170
            revid1 = ""
171
            revid2 = ""
3311.3.4 by Aaron Bentley
Have xml5 inherit from xml6 from xml8
172
        append('<inventory format="%s"%s%s>\n' % (
173
            self.format_num, revid1, revid2))
174
        append('<directory file_id="%s name="%s revision="%s />\n' % (
6355.1.1 by Jelmer Vernooij
Move some utility functions to xml_serializer.
175
            encode_and_escape(inv.root.file_id),
176
            encode_and_escape(inv.root.name),
177
            encode_and_escape(inv.root.revision)))
3311.3.4 by Aaron Bentley
Have xml5 inherit from xml6 from xml8
178
1189 by Martin Pool
- BROKEN: partial support for commit into weave
179
    def _pack_revision(self, rev):
180
        """Revision object -> xml tree"""
2249.5.5 by John Arbash Meinel
better comment for why we are decoding
181
        # For the XML format, we need to write them as Unicode rather than as
182
        # utf-8 strings. So that cElementTree can handle properly escaping
183
        # them.
2249.5.4 by John Arbash Meinel
When reading XML, always return utf-8 revision ids.
184
        decode_utf8 = cache_utf8.decode
2249.5.5 by John Arbash Meinel
better comment for why we are decoding
185
        revision_id = rev.revision_id
186
        if isinstance(revision_id, str):
187
            revision_id = decode_utf8(revision_id)
3311.3.3 by Aaron Bentley
Handle format 5 revision
188
        format_num = self.format_num
189
        if self.revision_format_num is not None:
190
            format_num = self.revision_format_num
1189 by Martin Pool
- BROKEN: partial support for commit into weave
191
        root = Element('revision',
192
                       committer = rev.committer,
2102.4.1 by John Arbash Meinel
Switch to using millisecond resolution in Revision XML
193
                       timestamp = '%.3f' % rev.timestamp,
2249.5.5 by John Arbash Meinel
better comment for why we are decoding
194
                       revision_id = revision_id,
1189 by Martin Pool
- BROKEN: partial support for commit into weave
195
                       inventory_sha1 = rev.inventory_sha1,
3311.3.3 by Aaron Bentley
Handle format 5 revision
196
                       format=format_num,
1189 by Martin Pool
- BROKEN: partial support for commit into weave
197
                       )
1913.1.1 by John Arbash Meinel
Fix bug #55783
198
        if rev.timezone is not None:
1189 by Martin Pool
- BROKEN: partial support for commit into weave
199
            root.set('timezone', str(rev.timezone))
200
        root.text = '\n'
201
        msg = SubElement(root, 'message')
4416.5.1 by Jelmer Vernooij
Move squashing of XML-invalid characters to XMLSerializer.
202
        msg.text = escape_invalid_chars(rev.message)[0]
1189 by Martin Pool
- BROKEN: partial support for commit into weave
203
        msg.tail = '\n'
1313 by Martin Pool
- rename to Revision.parent_ids to avoid confusion with old usage
204
        if rev.parent_ids:
1189 by Martin Pool
- BROKEN: partial support for commit into weave
205
            pelts = SubElement(root, 'parents')
206
            pelts.tail = pelts.text = '\n'
1313 by Martin Pool
- rename to Revision.parent_ids to avoid confusion with old usage
207
            for parent_id in rev.parent_ids:
2598.5.2 by Aaron Bentley
Got all tests passing with Branch returning 'null:' for null revision
208
                _mod_revision.check_not_reserved_id(parent_id)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
209
                p = SubElement(pelts, 'revision_ref')
210
                p.tail = '\n'
2249.5.5 by John Arbash Meinel
better comment for why we are decoding
211
                if isinstance(parent_id, str):
212
                    parent_id = decode_utf8(parent_id)
213
                p.set('revision_id', parent_id)
1185.16.36 by Martin Pool
- store revision properties in revision xml
214
        if rev.properties:
215
            self._pack_revision_properties(rev, root)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
216
        return root
1185.16.36 by Martin Pool
- store revision properties in revision xml
217
218
    def _pack_revision_properties(self, rev, under_element):
219
        top_elt = SubElement(under_element, 'properties')
220
        for prop_name, prop_value in sorted(rev.properties.items()):
221
            prop_elt = SubElement(top_elt, 'property')
222
            prop_elt.set('name', prop_name)
223
            prop_elt.text = prop_value
224
            prop_elt.tail = '\n'
225
        top_elt.tail = '\n'
226
4849.4.2 by John Arbash Meinel
Change from being a per-serializer attribute to being a per-repo attribute.
227
    def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
228
                          return_from_cache=False):
3311.3.4 by Aaron Bentley
Have xml5 inherit from xml6 from xml8
229
        """Construct from XML Element"""
230
        if elt.tag != 'inventory':
231
            raise errors.UnexpectedInventoryFormat('Root tag is %r' % elt.tag)
1393.1.59 by Martin Pool
- put 'format=5' on inventory and revision xml
232
        format = elt.get('format')
3311.3.4 by Aaron Bentley
Have xml5 inherit from xml6 from xml8
233
        if format != self.format_num:
234
            raise errors.UnexpectedInventoryFormat('Invalid format version %r'
235
                                                   % format)
236
        revision_id = elt.get('revision_id')
237
        if revision_id is not None:
238
            revision_id = cache_utf8.encode(revision_id)
239
        inv = inventory.Inventory(root_id=None, revision_id=revision_id)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
240
        for e in elt:
4849.4.2 by John Arbash Meinel
Change from being a per-serializer attribute to being a per-repo attribute.
241
            ie = self._unpack_entry(e, entry_cache=entry_cache,
242
                                    return_from_cache=return_from_cache)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
243
            inv.add(ie)
3882.6.22 by John Arbash Meinel
Start moving things around so that the entry cache is passed in.
244
        self._check_cache_size(len(inv), entry_cache)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
245
        return inv
246
4849.4.2 by John Arbash Meinel
Change from being a per-serializer attribute to being a per-repo attribute.
247
    def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
3882.6.5 by John Arbash Meinel
Use a FIFOCache instead of an LRUCache, and factor out elt.get
248
        elt_get = elt.get
249
        file_id = elt_get('file_id')
250
        revision = elt_get('revision')
251
        # Check and see if we have already unpacked this exact entry
3882.6.8 by John Arbash Meinel
Add detailed timings on the last 100 mysql revisions.
252
        # Some timings for "repo.revision_trees(last_100_revs)"
253
        #               bzr     mysql
254
        #   unmodified  4.1s    40.8s
3882.6.6 by John Arbash Meinel
Add some actual timings, supporting why we use a FIFOCache.
255
        #   using lru   3.5s
3882.6.8 by John Arbash Meinel
Add detailed timings on the last 100 mysql revisions.
256
        #   using fifo  2.83s   29.1s
3882.6.6 by John Arbash Meinel
Add some actual timings, supporting why we use a FIFOCache.
257
        #   lru._cache  2.8s
3882.6.8 by John Arbash Meinel
Add detailed timings on the last 100 mysql revisions.
258
        #   dict        2.75s   26.8s
259
        #   inv.add     2.5s    26.0s
260
        #   no_copy     2.00s   20.5s
261
        #   no_c,dict   1.95s   18.0s
3882.6.6 by John Arbash Meinel
Add some actual timings, supporting why we use a FIFOCache.
262
        # Note that a cache of 10k nodes is more than sufficient to hold all of
3882.6.9 by John Arbash Meinel
Add some more direct timings using time.clock() instead of lsprof.
263
        # the inventory for the last 100 revs for bzr, but not for mysql (20k
264
        # is enough for mysql, which saves the same 2s as using a dict)
265
266
        # Breakdown of mysql using time.clock()
267
        #   4.1s    2 calls to element.get for file_id, revision_id
268
        #   4.5s    cache_hit lookup
269
        #   7.1s    InventoryFile.copy()
270
        #   2.4s    InventoryDirectory.copy()
271
        #   0.4s    decoding unique entries
3882.6.11 by John Arbash Meinel
comment update
272
        #   1.6s    decoding entries after FIFO fills up
3882.6.9 by John Arbash Meinel
Add some more direct timings using time.clock() instead of lsprof.
273
        #   0.8s    Adding nodes to FIFO (including flushes)
274
        #   0.1s    cache miss lookups
275
        # Using an LRU cache
276
        #   4.1s    2 calls to element.get for file_id, revision_id
277
        #   9.9s    cache_hit lookup
278
        #   10.8s   InventoryEntry.copy()
279
        #   0.3s    cache miss lookus
280
        #   1.2s    decoding entries
281
        #   1.0s    adding nodes to LRU
3882.6.22 by John Arbash Meinel
Start moving things around so that the entry cache is passed in.
282
        if entry_cache is not None and revision is not None:
283
            key = (file_id, revision)
284
            try:
4031.3.1 by Frank Aspell
Fixing various typos
285
                # We copy it, because some operations may mutate it
3882.6.22 by John Arbash Meinel
Start moving things around so that the entry cache is passed in.
286
                cached_ie = entry_cache[key]
287
            except KeyError:
288
                pass
289
            else:
290
                # Only copying directory entries drops us 2.85s => 2.35s
4849.4.2 by John Arbash Meinel
Change from being a per-serializer attribute to being a per-repo attribute.
291
                if return_from_cache:
4849.4.1 by John Arbash Meinel
Add a flag that controls if we will return InventoryEntries from the cache.
292
                    if cached_ie.kind == 'directory':
293
                        return cached_ie.copy()
294
                    return cached_ie
3882.6.22 by John Arbash Meinel
Start moving things around so that the entry cache is passed in.
295
                return cached_ie.copy()
3882.6.5 by John Arbash Meinel
Use a FIFOCache instead of an LRUCache, and factor out elt.get
296
1189 by Martin Pool
- BROKEN: partial support for commit into weave
297
        kind = elt.tag
1399.1.6 by Robert Collins
move exporting functionality into inventory.py - uncovers bug in symlink support
298
        if not InventoryEntry.versionable_kind(kind):
1092.2.20 by Robert Collins
symlink and weaves, whaddya know
299
            raise AssertionError('unsupported entry kind %s' % kind)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
300
6355.1.1 by Jelmer Vernooij
Move some utility functions to xml_serializer.
301
        get_cached = get_utf8_or_ascii
3882.6.13 by John Arbash Meinel
We don't need to inline get_cached until we've had the miss.
302
3882.6.1 by John Arbash Meinel
Add an InventoryEntry cache to the xml deserializer.
303
        file_id = get_cached(file_id)
304
        if revision is not None:
305
            revision = get_cached(revision)
3882.6.5 by John Arbash Meinel
Use a FIFOCache instead of an LRUCache, and factor out elt.get
306
        parent_id = elt_get('parent_id')
2294.1.10 by John Arbash Meinel
Switch all apis over to utf8 file ids. All tests pass
307
        if parent_id is not None:
308
            parent_id = get_cached(parent_id)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
309
1399.1.8 by Robert Collins
factor out inventory directory logic into 'InventoryDirectory' class
310
        if kind == 'directory':
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
311
            ie = inventory.InventoryDirectory(file_id,
3882.6.5 by John Arbash Meinel
Use a FIFOCache instead of an LRUCache, and factor out elt.get
312
                                              elt_get('name'),
1399.1.8 by Robert Collins
factor out inventory directory logic into 'InventoryDirectory' class
313
                                              parent_id)
1399.1.9 by Robert Collins
factor out file related logic from InventoryEntry to InventoryFile
314
        elif kind == 'file':
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
315
            ie = inventory.InventoryFile(file_id,
3882.6.5 by John Arbash Meinel
Use a FIFOCache instead of an LRUCache, and factor out elt.get
316
                                         elt_get('name'),
1399.1.9 by Robert Collins
factor out file related logic from InventoryEntry to InventoryFile
317
                                         parent_id)
3882.6.5 by John Arbash Meinel
Use a FIFOCache instead of an LRUCache, and factor out elt.get
318
            ie.text_sha1 = elt_get('text_sha1')
319
            if elt_get('executable') == 'yes':
1399.1.9 by Robert Collins
factor out file related logic from InventoryEntry to InventoryFile
320
                ie.executable = True
3882.6.5 by John Arbash Meinel
Use a FIFOCache instead of an LRUCache, and factor out elt.get
321
            v = elt_get('text_size')
1399.1.9 by Robert Collins
factor out file related logic from InventoryEntry to InventoryFile
322
            ie.text_size = v and int(v)
1399.1.10 by Robert Collins
remove kind from the InventoryEntry constructor - only child classes should be created now
323
        elif kind == 'symlink':
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
324
            ie = inventory.InventoryLink(file_id,
3882.6.5 by John Arbash Meinel
Use a FIFOCache instead of an LRUCache, and factor out elt.get
325
                                         elt_get('name'),
1399.1.10 by Robert Collins
remove kind from the InventoryEntry constructor - only child classes should be created now
326
                                         parent_id)
3882.6.5 by John Arbash Meinel
Use a FIFOCache instead of an LRUCache, and factor out elt.get
327
            ie.symlink_target = elt_get('symlink_target')
1399.1.8 by Robert Collins
factor out inventory directory logic into 'InventoryDirectory' class
328
        else:
2100.3.1 by Aaron Bentley
Start roundtripping tree-reference entries
329
            raise errors.UnsupportedInventoryKind(kind)
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
330
        ie.revision = revision
3882.6.22 by John Arbash Meinel
Start moving things around so that the entry cache is passed in.
331
        if revision is not None and entry_cache is not None:
3882.6.21 by John Arbash Meinel
Don't cache the InventoryEntry we will return, callers mutate those objects.
332
            # We cache a copy() because callers like to mutate objects, and
333
            # that would cause the item in cache to mutate as well.
334
            # This has a small effect on many-inventory performance, because
335
            # the majority fraction is spent in cache hits, not misses.
3882.6.22 by John Arbash Meinel
Start moving things around so that the entry cache is passed in.
336
            entry_cache[key] = ie.copy()
1189 by Martin Pool
- BROKEN: partial support for commit into weave
337
338
        return ie
339
340
    def _unpack_revision(self, elt):
341
        """XML Element -> Revision object"""
1393.1.59 by Martin Pool
- put 'format=5' on inventory and revision xml
342
        format = elt.get('format')
3311.3.3 by Aaron Bentley
Handle format 5 revision
343
        format_num = self.format_num
344
        if self.revision_format_num is not None:
345
            format_num = self.revision_format_num
1393.1.59 by Martin Pool
- put 'format=5' on inventory and revision xml
346
        if format is not None:
3311.3.3 by Aaron Bentley
Handle format 5 revision
347
            if format != format_num:
348
                raise BzrError("invalid format version %r on revision"
1393.1.59 by Martin Pool
- put 'format=5' on inventory and revision xml
349
                                % format)
6355.1.1 by Jelmer Vernooij
Move some utility functions to xml_serializer.
350
        get_cached = get_utf8_or_ascii
1189 by Martin Pool
- BROKEN: partial support for commit into weave
351
        rev = Revision(committer = elt.get('committer'),
352
                       timestamp = float(elt.get('timestamp')),
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
353
                       revision_id = get_cached(elt.get('revision_id')),
1189 by Martin Pool
- BROKEN: partial support for commit into weave
354
                       inventory_sha1 = elt.get('inventory_sha1')
355
                       )
356
        parents = elt.find('parents') or []
357
        for p in parents:
1911.2.6 by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly.
358
            rev.parent_ids.append(get_cached(p.get('revision_id')))
1185.16.37 by Martin Pool
- properties are retrieved when revisions are loaded
359
        self._unpack_revision_properties(elt, rev)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
360
        v = elt.get('timezone')
1913.1.1 by John Arbash Meinel
Fix bug #55783
361
        if v is None:
362
            rev.timezone = 0
363
        else:
364
            rev.timezone = int(v)
1189 by Martin Pool
- BROKEN: partial support for commit into weave
365
        rev.message = elt.findtext('message') # text of <message>
366
        return rev
367
1185.16.37 by Martin Pool
- properties are retrieved when revisions are loaded
368
    def _unpack_revision_properties(self, elt, rev):
369
        """Unpack properties onto a revision."""
370
        props_elt = elt.find('properties')
371
        if not props_elt:
372
            return
373
        for prop_elt in props_elt:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
374
            if prop_elt.tag != 'property':
375
                raise AssertionError(
376
                    "bad tag under properties list: %r" % prop_elt.tag)
1185.16.37 by Martin Pool
- properties are retrieved when revisions are loaded
377
            name = prop_elt.get('name')
378
            value = prop_elt.text
1886.1.1 by John Arbash Meinel
Fix bug #47782,
379
            # If a property had an empty value ('') cElementTree reads
380
            # that back as None, convert it back to '', so that all
381
            # properties have string values
382
            if value is None:
383
                value = ''
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
384
            if name in rev.properties:
385
                raise AssertionError("repeated property %r" % name)
1185.16.37 by Martin Pool
- properties are retrieved when revisions are loaded
386
            rev.properties[name] = value
387
5671.2.3 by Jelmer Vernooij
Move Repository._find_text_key_references_from_xml_inventory_lines onto the serializer.
388
    def _find_text_key_references(self, line_iterator):
389
        """Core routine for extracting references to texts from inventories.
390
391
        This performs the translation of xml lines to revision ids.
392
393
        :param line_iterator: An iterator of lines, origin_version_id
394
        :return: A dictionary mapping text keys ((fileid, revision_id) tuples)
395
            to whether they were referred to by the inventory of the
396
            revision_id that they contain. Note that if that revision_id was
397
            not part of the line_iterator's output then False will be given -
398
            even though it may actually refer to that key.
399
        """
400
        if not self.support_altered_by_hack:
401
            raise AssertionError(
402
                "_find_text_key_references only "
403
                "supported for branches which store inventory as unnested xml"
404
                ", not on %r" % self)
405
        result = {}
406
407
        # this code needs to read every new line in every inventory for the
408
        # inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
409
        # not present in one of those inventories is unnecessary but not
410
        # harmful because we are filtering by the revision id marker in the
411
        # inventory lines : we only select file ids altered in one of those
412
        # revisions. We don't need to see all lines in the inventory because
413
        # only those added in an inventory in rev X can contain a revision=X
414
        # line.
415
        unescape_revid_cache = {}
416
        unescape_fileid_cache = {}
417
418
        # jam 20061218 In a big fetch, this handles hundreds of thousands
419
        # of lines, so it has had a lot of inlining and optimizing done.
420
        # Sorry that it is a little bit messy.
421
        # Move several functions to be local variables, since this is a long
422
        # running loop.
423
        search = self._file_ids_altered_regex.search
424
        unescape = _unescape_xml
425
        setdefault = result.setdefault
426
        for line, line_key in line_iterator:
427
            match = search(line)
428
            if match is None:
429
                continue
430
            # One call to match.group() returning multiple items is quite a
431
            # bit faster than 2 calls to match.group() each returning 1
432
            file_id, revision_id = match.group('file_id', 'revision_id')
433
434
            # Inlining the cache lookups helps a lot when you make 170,000
435
            # lines and 350k ids, versus 8.4 unique ids.
436
            # Using a cache helps in 2 ways:
437
            #   1) Avoids unnecessary decoding calls
438
            #   2) Re-uses cached strings, which helps in future set and
439
            #      equality checks.
440
            # (2) is enough that removing encoding entirely along with
441
            # the cache (so we are using plain strings) results in no
442
            # performance improvement.
443
            try:
444
                revision_id = unescape_revid_cache[revision_id]
445
            except KeyError:
446
                unescaped = unescape(revision_id)
447
                unescape_revid_cache[revision_id] = unescaped
448
                revision_id = unescaped
449
450
            # Note that unconditionally unescaping means that we deserialise
451
            # every fileid, which for general 'pull' is not great, but we don't
452
            # really want to have some many fulltexts that this matters anyway.
453
            # RBC 20071114.
454
            try:
455
                file_id = unescape_fileid_cache[file_id]
456
            except KeyError:
457
                unescaped = unescape(file_id)
458
                unescape_fileid_cache[file_id] = unescaped
459
                file_id = unescaped
460
461
            key = (file_id, revision_id)
462
            setdefault(key, False)
463
            if revision_id == line_key[-1]:
464
                result[key] = True
465
        return result
466
1185.16.37 by Martin Pool
- properties are retrieved when revisions are loaded
467
3311.3.4 by Aaron Bentley
Have xml5 inherit from xml6 from xml8
468
serializer_v8 = Serializer_v8()
6355.1.2 by Jelmer Vernooij
Factor out serializing of inventory in xml8.
469
470
471
def serialize_inventory_flat(inv, append_inventory_root, root_id, supported_kinds, working):
472
    """Serialize an inventory to a flat XML file.
473
474
    :param inv: Inventory to serialize
475
    :param working: If True skip history data - text_sha1, text_size,
476
        reference_revision, symlink_target.    self._check_revisions(inv)
477
    """
478
    output = []
479
    append = output.append
480
    append_inventory_root(append, inv)
481
    entries = inv.iter_entries()
482
    # Skip the root
483
    root_path, root_ie = entries.next()
484
    for path, ie in entries:
485
        if ie.parent_id != root_id:
486
            parent_str = ' parent_id="'
487
            parent_id  = encode_and_escape(ie.parent_id)
488
        else:
489
            parent_str = ''
490
            parent_id  = ''
491
        if ie.kind == 'file':
492
            if ie.executable:
493
                executable = ' executable="yes"'
494
            else:
495
                executable = ''
496
            if not working:
497
                append('<file%s file_id="%s name="%s%s%s revision="%s '
498
                    'text_sha1="%s" text_size="%d" />\n' % (
499
                    executable, encode_and_escape(ie.file_id),
500
                    encode_and_escape(ie.name), parent_str, parent_id,
501
                    encode_and_escape(ie.revision), ie.text_sha1,
502
                    ie.text_size))
503
            else:
504
                append('<file%s file_id="%s name="%s%s%s />\n' % (
505
                    executable, encode_and_escape(ie.file_id),
506
                    encode_and_escape(ie.name), parent_str, parent_id))
507
        elif ie.kind == 'directory':
508
            if not working:
509
                append('<directory file_id="%s name="%s%s%s revision="%s '
510
                    '/>\n' % (
511
                    encode_and_escape(ie.file_id),
512
                    encode_and_escape(ie.name),
513
                    parent_str, parent_id,
514
                    encode_and_escape(ie.revision)))
515
            else:
516
                append('<directory file_id="%s name="%s%s%s />\n' % (
517
                    encode_and_escape(ie.file_id),
518
                    encode_and_escape(ie.name),
519
                    parent_str, parent_id))
520
        elif ie.kind == 'symlink':
521
            if not working:
522
                append('<symlink file_id="%s name="%s%s%s revision="%s '
523
                    'symlink_target="%s />\n' % (
524
                    encode_and_escape(ie.file_id),
525
                    encode_and_escape(ie.name),
526
                    parent_str, parent_id,
527
                    encode_and_escape(ie.revision),
528
                    encode_and_escape(ie.symlink_target)))
529
            else:
530
                append('<symlink file_id="%s name="%s%s%s />\n' % (
531
                    encode_and_escape(ie.file_id),
532
                    encode_and_escape(ie.name),
533
                    parent_str, parent_id))
534
        elif ie.kind == 'tree-reference':
535
            if ie.kind not in supported_kinds:
536
                raise errors.UnsupportedInventoryKind(ie.kind)
537
            if not working:
538
                append('<tree-reference file_id="%s name="%s%s%s '
539
                    'revision="%s reference_revision="%s />\n' % (
540
                    encode_and_escape(ie.file_id),
541
                    encode_and_escape(ie.name),
542
                    parent_str, parent_id,
543
                    encode_and_escape(ie.revision),
544
                    encode_and_escape(ie.reference_revision)))
545
            else:
546
                append('<tree-reference file_id="%s name="%s%s%s />\n' % (
547
                    encode_and_escape(ie.file_id),
548
                    encode_and_escape(ie.name),
549
                    parent_str, parent_id))
550
        else:
551
            raise errors.UnsupportedInventoryKind(ie.kind)
552
    append('</inventory>\n')
553
    return output