/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4763.2.4 by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry.
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
17
"""Serializer object for CHK based inventory storage."""
18
6379.6.1 by Jelmer Vernooij
Import absolute_import in a few places.
19
from __future__ import absolute_import
20
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
21
from . import lazy_import
6355.1.5 by Jelmer Vernooij
Use lazy imports.
22
lazy_import.lazy_import(globals(),
23
"""
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
24
from breezy import (
6355.1.5 by Jelmer Vernooij
Use lazy imports.
25
    xml_serializer,
26
    )
27
""")
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
28
from . import (
4398.5.2 by John Arbash Meinel
Merge the chk serializer, and update it for the new bencode locations.
29
    bencode,
4290.1.1 by Jelmer Vernooij
Add simple revision serializer based on RIO.
30
    cache_utf8,
6355.1.3 by Jelmer Vernooij
Split out more stuff.
31
    errors,
4290.1.1 by Jelmer Vernooij
Add simple revision serializer based on RIO.
32
    revision as _mod_revision,
6355.1.3 by Jelmer Vernooij
Split out more stuff.
33
    serializer,
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
34
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
35
from .sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
36
    BytesIO,
37
    )
38
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
39
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
40
41
def _validate_properties(props, _decode=cache_utf8._utf8_decode):
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
42
    # TODO: we really want an 'isascii' check for key
4398.5.15 by John Arbash Meinel
Change how schemas are validated (down to 1.02s)
43
    # Cast the utf8 properties into Unicode 'in place'
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
44
    for key, value in props.items():
4398.5.15 by John Arbash Meinel
Change how schemas are validated (down to 1.02s)
45
        props[key] = _decode(value)[0]
46
    return props
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
47
48
49
def _is_format_10(value):
50
    if value != 10:
51
        raise ValueError('Format number was not recognized, expected 10 got %d'
52
                         % (value,))
53
    return 10
54
55
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
56
class BEncodeRevisionSerializer1(object):
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
57
    """Simple revision serializer based around bencode.
4290.1.1 by Jelmer Vernooij
Add simple revision serializer based on RIO.
58
    """
59
4416.5.1 by Jelmer Vernooij
Move squashing of XML-invalid characters to XMLSerializer.
60
    squashes_xml_invalid_characters = False
61
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
62
    # Maps {key:(Revision attribute, bencode_type, validator)}
63
    # This tells us what kind we expect bdecode to create, what variable on
64
    # Revision we should be using, and a function to call to validate/transform
65
    # the type.
66
    # TODO: add a 'validate_utf8' for things like revision_id and file_id
67
    #       and a validator for parent-ids
68
    _schema = {'format': (None, int, _is_format_10),
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
69
               'committer': ('committer', str, cache_utf8.decode),
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
70
               'timezone': ('timezone', int, None),
71
               'timestamp': ('timestamp', str, float),
72
               'revision-id': ('revision_id', str, None),
4398.5.19 by John Arbash Meinel
Change parent_ids back to a list, because there are other tests that expect it.
73
               'parent-ids': ('parent_ids', list, None),
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
74
               'inventory-sha1': ('inventory_sha1', str, None),
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
75
               'message': ('message', str, cache_utf8.decode),
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
76
               'properties': ('properties', dict, _validate_properties),
77
    }
78
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
79
    def write_revision_to_string(self, rev):
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
80
        encode_utf8 = cache_utf8._utf8_encode
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
81
        # Use a list of tuples rather than a dict
82
        # This lets us control the ordering, so that we are able to create
83
        # smaller deltas
84
        ret = [
85
            ("format", 10),
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
86
            ("committer", encode_utf8(rev.committer)[0]),
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
87
        ]
88
        if rev.timezone is not None:
89
            ret.append(("timezone", rev.timezone))
90
        # For bzr revisions, the most common property is just 'branch-nick'
91
        # which changes infrequently.
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
92
        revprops = {}
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
93
        for key, value in rev.properties.items():
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
94
            revprops[key] = encode_utf8(value)[0]
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
95
        ret.append(('properties', revprops))
96
        ret.extend([
97
            ("timestamp", "%.3f" % rev.timestamp),
98
            ("revision-id", rev.revision_id),
99
            ("parent-ids", rev.parent_ids),
100
            ("inventory-sha1", rev.inventory_sha1),
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
101
            ("message", encode_utf8(rev.message)[0]),
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
102
        ])
4398.5.2 by John Arbash Meinel
Merge the chk serializer, and update it for the new bencode locations.
103
        return bencode.bencode(ret)
4290.1.8 by Jelmer Vernooij
Some performance tweaks.
104
105
    def write_revision(self, rev, f):
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
106
        f.write(self.write_revision_to_string(rev))
107
108
    def read_revision_from_string(self, text):
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
109
        # TODO: consider writing a Revision decoder, rather than using the
110
        #       generic bencode decoder
4398.5.8 by John Arbash Meinel
Update the TODO comment a bit.
111
        #       However, to decode all 25k revisions of bzr takes approx 1.3s
112
        #       If we remove all extra validation that goes down to about 1.2s.
113
        #       Of that time, probably 0.6s is spend in bencode.bdecode().
6622.1.30 by Jelmer Vernooij
Some more test fixes.
114
        #       Regardless 'time brz log' of everything is 7+s, so 1.3s to
4398.5.8 by John Arbash Meinel
Update the TODO comment a bit.
115
        #       extract revision texts isn't a majority of time.
4398.5.2 by John Arbash Meinel
Merge the chk serializer, and update it for the new bencode locations.
116
        ret = bencode.bdecode(text)
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
117
        if not isinstance(ret, list):
118
            raise ValueError("invalid revision text")
4398.5.15 by John Arbash Meinel
Change how schemas are validated (down to 1.02s)
119
        schema = self._schema
4398.5.7 by John Arbash Meinel
Spend a little bit more time optimizing the read_revision_from_string loop
120
        # timezone is allowed to be missing, but should be set
121
        bits = {'timezone': None}
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
122
        for key, value in ret:
4398.5.7 by John Arbash Meinel
Spend a little bit more time optimizing the read_revision_from_string loop
123
            # Will raise KeyError if not a valid part of the schema, or an
124
            # entry is given 2 times.
4398.5.15 by John Arbash Meinel
Change how schemas are validated (down to 1.02s)
125
            var_name, expected_type, validator = schema[key]
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
126
            if value.__class__ is not expected_type:
127
                raise ValueError('key %s did not conform to the expected type'
128
                                 ' %s, but was %s'
129
                                 % (key, expected_type, type(value)))
130
            if validator is not None:
131
                value = validator(value)
4398.5.7 by John Arbash Meinel
Spend a little bit more time optimizing the read_revision_from_string loop
132
            bits[var_name] = value
4398.5.15 by John Arbash Meinel
Change how schemas are validated (down to 1.02s)
133
        if len(bits) != len(schema):
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
134
            missing = [key for key, (var_name, _, _) in schema.items()
4398.5.15 by John Arbash Meinel
Change how schemas are validated (down to 1.02s)
135
                       if var_name not in bits]
136
            raise ValueError('Revision text was missing expected keys %s.'
137
                             ' text %r' % (missing, text))
138
        del bits[None]  # Get rid of 'format' since it doesn't get mapped
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
139
        rev = _mod_revision.Revision(**bits)
4290.1.8 by Jelmer Vernooij
Some performance tweaks.
140
        return rev
141
142
    def read_revision(self, f):
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
143
        return self.read_revision_from_string(f.read())
144
145
6355.1.3 by Jelmer Vernooij
Split out more stuff.
146
class CHKSerializer(serializer.Serializer):
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
147
    """A CHKInventory based serializer with 'plain' behaviour."""
148
149
    format_num = '9'
150
    revision_format_num = None
151
    support_altered_by_hack = False
6619.3.12 by Jelmer Vernooij
Use 2to3 set_literal fixer.
152
    supported_kinds = {'file', 'directory', 'symlink', 'tree-reference'}
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
153
154
    def __init__(self, node_size, search_key_name):
155
        self.maximum_size = node_size
156
        self.search_key_name = search_key_name
157
6355.1.6 by Jelmer Vernooij
Move core inventory code to xml_serializer.
158
    def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
159
                          return_from_cache=False):
160
        """Construct from XML Element"""
161
        inv = xml_serializer.unpack_inventory_flat(elt, self.format_num,
6355.1.9 by Jelmer Vernooij
Review feedback - pass entry_cache and_return_from_cache to unpack_inventory_flat.
162
            xml_serializer.unpack_inventory_entry, entry_cache,
163
            return_from_cache)
6355.1.6 by Jelmer Vernooij
Move core inventory code to xml_serializer.
164
        return inv
165
6355.1.3 by Jelmer Vernooij
Split out more stuff.
166
    def read_inventory_from_string(self, xml_string, revision_id=None,
167
                                   entry_cache=None, return_from_cache=False):
168
        """Read xml_string into an inventory object.
169
170
        :param xml_string: The xml to read.
171
        :param revision_id: If not-None, the expected revision id of the
172
            inventory.
173
        :param entry_cache: An optional cache of InventoryEntry objects. If
174
            supplied we will look up entries via (file_id, revision_id) which
175
            should map to a valid InventoryEntry (File/Directory/etc) object.
176
        :param return_from_cache: Return entries directly from the cache,
177
            rather than copying them first. This is only safe if the caller
178
            promises not to mutate the returned inventory entries, but it can
179
            make some operations significantly faster.
180
        """
181
        try:
6355.1.6 by Jelmer Vernooij
Move core inventory code to xml_serializer.
182
            return self._unpack_inventory(
183
                xml_serializer.fromstring(xml_string), revision_id,
184
                entry_cache=entry_cache,
185
                return_from_cache=return_from_cache)
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
186
        except xml_serializer.ParseError as e:
6355.1.3 by Jelmer Vernooij
Split out more stuff.
187
            raise errors.UnexpectedInventoryFormat(e)
188
189
    def read_inventory(self, f, revision_id=None):
6355.1.6 by Jelmer Vernooij
Move core inventory code to xml_serializer.
190
        """Read an inventory from a file-like object."""
6355.1.3 by Jelmer Vernooij
Split out more stuff.
191
        try:
192
            try:
193
                return self._unpack_inventory(self._read_element(f),
194
                    revision_id=None)
195
            finally:
196
                f.close()
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
197
        except xml_serializer.ParseError as e:
6355.1.3 by Jelmer Vernooij
Split out more stuff.
198
            raise errors.UnexpectedInventoryFormat(e)
199
200
    def write_inventory_to_lines(self, inv):
201
        """Return a list of lines with the encoded inventory."""
202
        return self.write_inventory(inv, None)
203
204
    def write_inventory_to_string(self, inv, working=False):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
205
        """Just call write_inventory with a BytesIO and return the value.
6355.1.3 by Jelmer Vernooij
Split out more stuff.
206
207
        :param working: If True skip history data - text_sha1, text_size,
208
            reference_revision, symlink_target.
209
        """
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
210
        sio = BytesIO()
6355.1.3 by Jelmer Vernooij
Split out more stuff.
211
        self.write_inventory(inv, sio, working)
212
        return sio.getvalue()
213
214
    def write_inventory(self, inv, f, working=False):
215
        """Write inventory to a file.
216
217
        :param inv: the inventory to write.
218
        :param f: the file to write. (May be None if the lines are the desired
219
            output).
220
        :param working: If True skip history data - text_sha1, text_size,
221
            reference_revision, symlink_target.
222
        :return: The inventory as a list of lines.
223
        """
6355.1.7 by Jelmer Vernooij
Fix tests.
224
        output = []
225
        append = output.append
6355.1.6 by Jelmer Vernooij
Move core inventory code to xml_serializer.
226
        if inv.revision_id is not None:
227
            revid1 = ' revision_id="'
228
            revid2 = xml_serializer.encode_and_escape(inv.revision_id)
229
        else:
230
            revid1 = ""
231
            revid2 = ""
232
        append('<inventory format="%s"%s%s>\n' % (
233
            self.format_num, revid1, revid2))
234
        append('<directory file_id="%s name="%s revision="%s />\n' % (
235
            xml_serializer.encode_and_escape(inv.root.file_id),
236
            xml_serializer.encode_and_escape(inv.root.name),
237
            xml_serializer.encode_and_escape(inv.root.revision)))
6355.1.7 by Jelmer Vernooij
Fix tests.
238
        xml_serializer.serialize_inventory_flat(inv,
239
            append,
240
            root_id=None, supported_kinds=self.supported_kinds, 
241
            working=working)
242
        if f is not None:
243
            f.writelines(output)
244
        return output
6355.1.6 by Jelmer Vernooij
Move core inventory code to xml_serializer.
245
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
246
247
chk_serializer_255_bigpage = CHKSerializer(65536, 'hash-255-way')
4290.1.7 by Jelmer Vernooij
Add development7-rich-root format that uses the RIO Serializer.
248
249
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
250
class CHKBEncodeSerializer(BEncodeRevisionSerializer1, CHKSerializer):
251
    """A CHKInventory and BEncode based serializer with 'plain' behaviour."""
4290.1.7 by Jelmer Vernooij
Add development7-rich-root format that uses the RIO Serializer.
252
253
    format_num = '10'
254
255
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
256
chk_bencode_serializer = CHKBEncodeSerializer(65536, 'hash-255-way')