/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17
"""Serializer object for CHK based inventory storage."""
18
4290.1.1 by Jelmer Vernooij
Add simple revision serializer based on RIO.
19
from cStringIO import (
20
    StringIO,
21
    )
22
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
23
from bzrlib import (
4398.5.2 by John Arbash Meinel
Merge the chk serializer, and update it for the new bencode locations.
24
    bencode,
4290.1.1 by Jelmer Vernooij
Add simple revision serializer based on RIO.
25
    cache_utf8,
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
26
    inventory,
4290.1.1 by Jelmer Vernooij
Add simple revision serializer based on RIO.
27
    osutils,
28
    revision as _mod_revision,
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
29
    xml5,
30
    xml6,
31
    )
32
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
33
34
def _validate_properties(props, _decode=cache_utf8._utf8_decode):
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
35
    # TODO: we really want an 'isascii' check for key
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
36
    unicode_props = dict([(key, _decode(value)[0])
4398.5.7 by John Arbash Meinel
Spend a little bit more time optimizing the read_revision_from_string loop
37
                          for key, value in props.iteritems()])
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
38
    return unicode_props
39
40
41
def _is_format_10(value):
42
    if value != 10:
43
        raise ValueError('Format number was not recognized, expected 10 got %d'
44
                         % (value,))
45
    return 10
46
47
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
48
class BEncodeRevisionSerializer1(object):
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
49
    """Simple revision serializer based around bencode.
4290.1.1 by Jelmer Vernooij
Add simple revision serializer based on RIO.
50
    """
51
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
52
    # Maps {key:(Revision attribute, bencode_type, validator)}
53
    # This tells us what kind we expect bdecode to create, what variable on
54
    # Revision we should be using, and a function to call to validate/transform
55
    # the type.
56
    # TODO: add a 'validate_utf8' for things like revision_id and file_id
57
    #       and a validator for parent-ids
58
    _schema = {'format': (None, int, _is_format_10),
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
59
               'committer': ('committer', str, cache_utf8.decode),
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
60
               'timezone': ('timezone', int, None),
61
               'timestamp': ('timestamp', str, float),
62
               'revision-id': ('revision_id', str, None),
63
               'parent-ids': ('parent_ids', list, tuple),
64
               'inventory-sha1': ('inventory_sha1', str, None),
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
65
               'message': ('message', str, cache_utf8.decode),
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
66
               'properties': ('properties', dict, _validate_properties),
67
    }
68
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
69
    def write_revision_to_string(self, rev):
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
70
        encode_utf8 = cache_utf8._utf8_encode
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
71
        # Use a list of tuples rather than a dict
72
        # This lets us control the ordering, so that we are able to create
73
        # smaller deltas
74
        ret = [
75
            ("format", 10),
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
76
            ("committer", encode_utf8(rev.committer)[0]),
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
77
        ]
78
        if rev.timezone is not None:
79
            ret.append(("timezone", rev.timezone))
80
        # For bzr revisions, the most common property is just 'branch-nick'
81
        # which changes infrequently.
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
82
        revprops = {}
83
        for key, value in rev.properties.iteritems():
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
84
            revprops[key] = encode_utf8(value)[0]
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
85
        ret.append(('properties', revprops))
86
        ret.extend([
87
            ("timestamp", "%.3f" % rev.timestamp),
88
            ("revision-id", rev.revision_id),
89
            ("parent-ids", rev.parent_ids),
90
            ("inventory-sha1", rev.inventory_sha1),
4398.5.9 by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')
91
            ("message", encode_utf8(rev.message)[0]),
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
92
        ])
4398.5.2 by John Arbash Meinel
Merge the chk serializer, and update it for the new bencode locations.
93
        return bencode.bencode(ret)
4290.1.8 by Jelmer Vernooij
Some performance tweaks.
94
95
    def write_revision(self, rev, f):
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
96
        f.write(self.write_revision_to_string(rev))
97
98
    def read_revision_from_string(self, text):
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
99
        # TODO: consider writing a Revision decoder, rather than using the
100
        #       generic bencode decoder
4398.5.8 by John Arbash Meinel
Update the TODO comment a bit.
101
        #       However, to decode all 25k revisions of bzr takes approx 1.3s
102
        #       If we remove all extra validation that goes down to about 1.2s.
103
        #       Of that time, probably 0.6s is spend in bencode.bdecode().
104
        #       Regardless 'time bzr log' of everything is 7+s, so 1.3s to
105
        #       extract revision texts isn't a majority of time.
4398.5.2 by John Arbash Meinel
Merge the chk serializer, and update it for the new bencode locations.
106
        ret = bencode.bdecode(text)
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
107
        if not isinstance(ret, list):
108
            raise ValueError("invalid revision text")
109
        schema = dict(self._schema)
4398.5.7 by John Arbash Meinel
Spend a little bit more time optimizing the read_revision_from_string loop
110
        schema_pop = schema.pop
111
        # timezone is allowed to be missing, but should be set
112
        bits = {'timezone': None}
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
113
        for key, value in ret:
4398.5.7 by John Arbash Meinel
Spend a little bit more time optimizing the read_revision_from_string loop
114
            # Will raise KeyError if not a valid part of the schema, or an
115
            # entry is given 2 times.
116
            var_name, expected_type, validator = schema_pop(key)
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
117
            if value.__class__ is not expected_type:
118
                raise ValueError('key %s did not conform to the expected type'
119
                                 ' %s, but was %s'
120
                                 % (key, expected_type, type(value)))
121
            if validator is not None:
122
                value = validator(value)
4398.5.7 by John Arbash Meinel
Spend a little bit more time optimizing the read_revision_from_string loop
123
            bits[var_name] = value
124
        if schema:
125
            if schema.keys() != ['timezone']:
126
                raise ValueError('Revision text was missing expected keys %s.'
127
                                 ' text %r' % (schema.keys(), text))
128
        del bits[None]  # Get rid of bits that don't get mapped
4398.5.5 by John Arbash Meinel
Update the CHK Serializer to do lots more validation.
129
        rev = _mod_revision.Revision(**bits)
4290.1.8 by Jelmer Vernooij
Some performance tweaks.
130
        return rev
131
132
    def read_revision(self, f):
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
133
        return self.read_revision_from_string(f.read())
134
135
136
class CHKSerializerSubtree(BEncodeRevisionSerializer1, xml6.Serializer_v6):
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
137
    """A CHKInventory based serializer that supports tree references"""
138
139
    supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference'])
140
    format_num = '9'
141
    revision_format_num = None
142
    support_altered_by_hack = False
143
144
    def _unpack_entry(self, elt):
145
        kind = elt.tag
146
        if not kind in self.supported_kinds:
147
            raise AssertionError('unsupported entry kind %s' % kind)
148
        if kind == 'tree-reference':
149
            file_id = elt.attrib['file_id']
150
            name = elt.attrib['name']
151
            parent_id = elt.attrib['parent_id']
152
            revision = elt.get('revision')
153
            reference_revision = elt.get('reference_revision')
154
            return inventory.TreeReference(file_id, name, parent_id, revision,
155
                                           reference_revision)
156
        else:
157
            return xml6.Serializer_v6._unpack_entry(self, elt)
158
159
    def __init__(self, node_size, search_key_name):
160
        self.maximum_size = node_size
161
        self.search_key_name = search_key_name
162
163
4290.1.7 by Jelmer Vernooij
Add development7-rich-root format that uses the RIO Serializer.
164
class CHKSerializer(xml5.Serializer_v5):
4241.6.1 by Ian Clatworthy
chk_map code from brisbane-core
165
    """A CHKInventory based serializer with 'plain' behaviour."""
166
167
    format_num = '9'
168
    revision_format_num = None
169
    support_altered_by_hack = False
170
171
    def __init__(self, node_size, search_key_name):
172
        self.maximum_size = node_size
173
        self.search_key_name = search_key_name
174
175
176
chk_serializer_255_bigpage = CHKSerializer(65536, 'hash-255-way')
4290.1.7 by Jelmer Vernooij
Add development7-rich-root format that uses the RIO Serializer.
177
178
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
179
class CHKBEncodeSerializer(BEncodeRevisionSerializer1, CHKSerializer):
180
    """A CHKInventory and BEncode based serializer with 'plain' behaviour."""
4290.1.7 by Jelmer Vernooij
Add development7-rich-root format that uses the RIO Serializer.
181
182
    format_num = '10'
183
184
4290.1.12 by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer.
185
chk_bencode_serializer = CHKBEncodeSerializer(65536, 'hash-255-way')