bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
1  | 
# Copyright (C) 2008 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
|
16  | 
||
17  | 
"""Serializer object for CHK based inventory storage."""
 | 
|
18  | 
||
19  | 
from bzrlib import (  | 
|
| 
4398.5.2
by John Arbash Meinel
 Merge the chk serializer, and update it for the new bencode locations.  | 
20  | 
bencode,  | 
| 
4290.1.1
by Jelmer Vernooij
 Add simple revision serializer based on RIO.  | 
21  | 
cache_utf8,  | 
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
22  | 
inventory,  | 
| 
4290.1.1
by Jelmer Vernooij
 Add simple revision serializer based on RIO.  | 
23  | 
revision as _mod_revision,  | 
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
24  | 
xml5,  | 
25  | 
xml6,  | 
|
26  | 
    )
 | 
|
27  | 
||
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
28  | 
|
29  | 
def _validate_properties(props, _decode=cache_utf8._utf8_decode):  | 
|
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
30  | 
    # TODO: we really want an 'isascii' check for key
 | 
| 
4398.5.15
by John Arbash Meinel
 Change how schemas are validated (down to 1.02s)  | 
31  | 
    # Cast the utf8 properties into Unicode 'in place'
 | 
32  | 
for key, value in props.iteritems():  | 
|
33  | 
props[key] = _decode(value)[0]  | 
|
34  | 
return props  | 
|
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
35  | 
|
36  | 
||
37  | 
def _is_format_10(value):  | 
|
38  | 
if value != 10:  | 
|
39  | 
raise ValueError('Format number was not recognized, expected 10 got %d'  | 
|
40  | 
% (value,))  | 
|
41  | 
return 10  | 
|
42  | 
||
43  | 
||
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
44  | 
class BEncodeRevisionSerializer1(object):  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
45  | 
"""Simple revision serializer based around bencode.  | 
| 
4290.1.1
by Jelmer Vernooij
 Add simple revision serializer based on RIO.  | 
46  | 
    """
 | 
47  | 
||
| 
4416.5.1
by Jelmer Vernooij
 Move squashing of XML-invalid characters to XMLSerializer.  | 
48  | 
squashes_xml_invalid_characters = False  | 
49  | 
||
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
50  | 
    # Maps {key:(Revision attribute, bencode_type, validator)}
 | 
51  | 
    # This tells us what kind we expect bdecode to create, what variable on
 | 
|
52  | 
    # Revision we should be using, and a function to call to validate/transform
 | 
|
53  | 
    # the type.
 | 
|
54  | 
    # TODO: add a 'validate_utf8' for things like revision_id and file_id
 | 
|
55  | 
    #       and a validator for parent-ids
 | 
|
56  | 
_schema = {'format': (None, int, _is_format_10),  | 
|
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
57  | 
'committer': ('committer', str, cache_utf8.decode),  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
58  | 
'timezone': ('timezone', int, None),  | 
59  | 
'timestamp': ('timestamp', str, float),  | 
|
60  | 
'revision-id': ('revision_id', str, None),  | 
|
| 
4398.5.19
by John Arbash Meinel
 Change parent_ids back to a list, because there are other tests that expect it.  | 
61  | 
'parent-ids': ('parent_ids', list, None),  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
62  | 
'inventory-sha1': ('inventory_sha1', str, None),  | 
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
63  | 
'message': ('message', str, cache_utf8.decode),  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
64  | 
'properties': ('properties', dict, _validate_properties),  | 
65  | 
    }
 | 
|
66  | 
||
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
67  | 
def write_revision_to_string(self, rev):  | 
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
68  | 
encode_utf8 = cache_utf8._utf8_encode  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
69  | 
        # Use a list of tuples rather than a dict
 | 
70  | 
        # This lets us control the ordering, so that we are able to create
 | 
|
71  | 
        # smaller deltas
 | 
|
72  | 
ret = [  | 
|
73  | 
("format", 10),  | 
|
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
74  | 
("committer", encode_utf8(rev.committer)[0]),  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
75  | 
        ]
 | 
76  | 
if rev.timezone is not None:  | 
|
77  | 
ret.append(("timezone", rev.timezone))  | 
|
78  | 
        # For bzr revisions, the most common property is just 'branch-nick'
 | 
|
79  | 
        # which changes infrequently.
 | 
|
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
80  | 
revprops = {}  | 
81  | 
for key, value in rev.properties.iteritems():  | 
|
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
82  | 
revprops[key] = encode_utf8(value)[0]  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
83  | 
ret.append(('properties', revprops))  | 
84  | 
ret.extend([  | 
|
85  | 
("timestamp", "%.3f" % rev.timestamp),  | 
|
86  | 
("revision-id", rev.revision_id),  | 
|
87  | 
("parent-ids", rev.parent_ids),  | 
|
88  | 
("inventory-sha1", rev.inventory_sha1),  | 
|
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
89  | 
("message", encode_utf8(rev.message)[0]),  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
90  | 
        ])
 | 
| 
4398.5.2
by John Arbash Meinel
 Merge the chk serializer, and update it for the new bencode locations.  | 
91  | 
return bencode.bencode(ret)  | 
| 
4290.1.8
by Jelmer Vernooij
 Some performance tweaks.  | 
92  | 
|
93  | 
def write_revision(self, rev, f):  | 
|
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
94  | 
f.write(self.write_revision_to_string(rev))  | 
95  | 
||
96  | 
def read_revision_from_string(self, text):  | 
|
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
97  | 
        # TODO: consider writing a Revision decoder, rather than using the
 | 
98  | 
        #       generic bencode decoder
 | 
|
| 
4398.5.8
by John Arbash Meinel
 Update the TODO comment a bit.  | 
99  | 
        #       However, to decode all 25k revisions of bzr takes approx 1.3s
 | 
100  | 
        #       If we remove all extra validation that goes down to about 1.2s.
 | 
|
101  | 
        #       Of that time, probably 0.6s is spend in bencode.bdecode().
 | 
|
102  | 
        #       Regardless 'time bzr log' of everything is 7+s, so 1.3s to
 | 
|
103  | 
        #       extract revision texts isn't a majority of time.
 | 
|
| 
4398.5.2
by John Arbash Meinel
 Merge the chk serializer, and update it for the new bencode locations.  | 
104  | 
ret = bencode.bdecode(text)  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
105  | 
if not isinstance(ret, list):  | 
106  | 
raise ValueError("invalid revision text")  | 
|
| 
4398.5.15
by John Arbash Meinel
 Change how schemas are validated (down to 1.02s)  | 
107  | 
schema = self._schema  | 
| 
4398.5.7
by John Arbash Meinel
 Spend a little bit more time optimizing the read_revision_from_string loop  | 
108  | 
        # timezone is allowed to be missing, but should be set
 | 
109  | 
bits = {'timezone': None}  | 
|
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
110  | 
for key, value in ret:  | 
| 
4398.5.7
by John Arbash Meinel
 Spend a little bit more time optimizing the read_revision_from_string loop  | 
111  | 
            # Will raise KeyError if not a valid part of the schema, or an
 | 
112  | 
            # entry is given 2 times.
 | 
|
| 
4398.5.15
by John Arbash Meinel
 Change how schemas are validated (down to 1.02s)  | 
113  | 
var_name, expected_type, validator = schema[key]  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
114  | 
if value.__class__ is not expected_type:  | 
115  | 
raise ValueError('key %s did not conform to the expected type'  | 
|
116  | 
' %s, but was %s'  | 
|
117  | 
% (key, expected_type, type(value)))  | 
|
118  | 
if validator is not None:  | 
|
119  | 
value = validator(value)  | 
|
| 
4398.5.7
by John Arbash Meinel
 Spend a little bit more time optimizing the read_revision_from_string loop  | 
120  | 
bits[var_name] = value  | 
| 
4398.5.15
by John Arbash Meinel
 Change how schemas are validated (down to 1.02s)  | 
121  | 
if len(bits) != len(schema):  | 
122  | 
missing = [key for key, (var_name, _, _) in schema.iteritems()  | 
|
123  | 
if var_name not in bits]  | 
|
124  | 
raise ValueError('Revision text was missing expected keys %s.'  | 
|
125  | 
' text %r' % (missing, text))  | 
|
126  | 
del bits[None] # Get rid of 'format' since it doesn't get mapped  | 
|
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
127  | 
rev = _mod_revision.Revision(**bits)  | 
| 
4290.1.8
by Jelmer Vernooij
 Some performance tweaks.  | 
128  | 
return rev  | 
129  | 
||
130  | 
def read_revision(self, f):  | 
|
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
131  | 
return self.read_revision_from_string(f.read())  | 
132  | 
||
133  | 
||
134  | 
class CHKSerializerSubtree(BEncodeRevisionSerializer1, xml6.Serializer_v6):  | 
|
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
135  | 
"""A CHKInventory based serializer that supports tree references"""  | 
136  | 
||
137  | 
supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference'])  | 
|
138  | 
format_num = '9'  | 
|
139  | 
revision_format_num = None  | 
|
140  | 
support_altered_by_hack = False  | 
|
141  | 
||
142  | 
def _unpack_entry(self, elt):  | 
|
143  | 
kind = elt.tag  | 
|
144  | 
if not kind in self.supported_kinds:  | 
|
145  | 
raise AssertionError('unsupported entry kind %s' % kind)  | 
|
146  | 
if kind == 'tree-reference':  | 
|
147  | 
file_id = elt.attrib['file_id']  | 
|
148  | 
name = elt.attrib['name']  | 
|
149  | 
parent_id = elt.attrib['parent_id']  | 
|
150  | 
revision = elt.get('revision')  | 
|
151  | 
reference_revision = elt.get('reference_revision')  | 
|
152  | 
return inventory.TreeReference(file_id, name, parent_id, revision,  | 
|
153  | 
reference_revision)  | 
|
154  | 
else:  | 
|
155  | 
return xml6.Serializer_v6._unpack_entry(self, elt)  | 
|
156  | 
||
157  | 
def __init__(self, node_size, search_key_name):  | 
|
158  | 
self.maximum_size = node_size  | 
|
159  | 
self.search_key_name = search_key_name  | 
|
160  | 
||
161  | 
||
| 
4290.1.7
by Jelmer Vernooij
 Add development7-rich-root format that uses the RIO Serializer.  | 
162  | 
class CHKSerializer(xml5.Serializer_v5):  | 
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
163  | 
"""A CHKInventory based serializer with 'plain' behaviour."""  | 
164  | 
||
165  | 
format_num = '9'  | 
|
166  | 
revision_format_num = None  | 
|
167  | 
support_altered_by_hack = False  | 
|
168  | 
||
169  | 
def __init__(self, node_size, search_key_name):  | 
|
170  | 
self.maximum_size = node_size  | 
|
171  | 
self.search_key_name = search_key_name  | 
|
172  | 
||
173  | 
||
174  | 
chk_serializer_255_bigpage = CHKSerializer(65536, 'hash-255-way')  | 
|
| 
4290.1.7
by Jelmer Vernooij
 Add development7-rich-root format that uses the RIO Serializer.  | 
175  | 
|
176  | 
||
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
177  | 
class CHKBEncodeSerializer(BEncodeRevisionSerializer1, CHKSerializer):  | 
178  | 
"""A CHKInventory and BEncode based serializer with 'plain' behaviour."""  | 
|
| 
4290.1.7
by Jelmer Vernooij
 Add development7-rich-root format that uses the RIO Serializer.  | 
179  | 
|
180  | 
format_num = '10'  | 
|
181  | 
||
182  | 
||
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
183  | 
chk_bencode_serializer = CHKBEncodeSerializer(65536, 'hash-255-way')  |