bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
4763.2.4
by John Arbash Meinel
 merge bzr.2.1 in preparation for NEWS entry.  | 
1  | 
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
 | 
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
2  | 
#
 | 
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
|
16  | 
||
| 
6379.6.7
by Jelmer Vernooij
 Move importing from future until after doc string, otherwise the doc string will disappear.  | 
17  | 
"""Serializer object for CHK based inventory storage."""
 | 
18  | 
||
| 
6379.6.1
by Jelmer Vernooij
 Import absolute_import in a few places.  | 
19  | 
from __future__ import absolute_import  | 
20  | 
||
| 
6355.1.5
by Jelmer Vernooij
 Use lazy imports.  | 
21  | 
from cStringIO import StringIO  | 
22  | 
||
23  | 
from bzrlib import lazy_import  | 
|
24  | 
lazy_import.lazy_import(globals(),  | 
|
25  | 
"""
 | 
|
26  | 
from bzrlib import (
 | 
|
27  | 
    xml_serializer,
 | 
|
28  | 
    )
 | 
|
29  | 
""")  | 
|
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
30  | 
from bzrlib import (  | 
| 
4398.5.2
by John Arbash Meinel
 Merge the chk serializer, and update it for the new bencode locations.  | 
31  | 
bencode,  | 
| 
4290.1.1
by Jelmer Vernooij
 Add simple revision serializer based on RIO.  | 
32  | 
cache_utf8,  | 
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
33  | 
errors,  | 
| 
4290.1.1
by Jelmer Vernooij
 Add simple revision serializer based on RIO.  | 
34  | 
revision as _mod_revision,  | 
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
35  | 
serializer,  | 
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
36  | 
    )
 | 
37  | 
||
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
38  | 
|
39  | 
def _validate_properties(props, _decode=cache_utf8._utf8_decode):  | 
|
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
40  | 
    # TODO: we really want an 'isascii' check for key
 | 
| 
4398.5.15
by John Arbash Meinel
 Change how schemas are validated (down to 1.02s)  | 
41  | 
    # Cast the utf8 properties into Unicode 'in place'
 | 
42  | 
for key, value in props.iteritems():  | 
|
43  | 
props[key] = _decode(value)[0]  | 
|
44  | 
return props  | 
|
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
45  | 
|
46  | 
||
47  | 
def _is_format_10(value):  | 
|
48  | 
if value != 10:  | 
|
49  | 
raise ValueError('Format number was not recognized, expected 10 got %d'  | 
|
50  | 
% (value,))  | 
|
51  | 
return 10  | 
|
52  | 
||
53  | 
||
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
54  | 
class BEncodeRevisionSerializer1(object):  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
55  | 
"""Simple revision serializer based around bencode.  | 
| 
4290.1.1
by Jelmer Vernooij
 Add simple revision serializer based on RIO.  | 
56  | 
    """
 | 
57  | 
||
| 
4416.5.1
by Jelmer Vernooij
 Move squashing of XML-invalid characters to XMLSerializer.  | 
58  | 
squashes_xml_invalid_characters = False  | 
59  | 
||
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
60  | 
    # Maps {key:(Revision attribute, bencode_type, validator)}
 | 
61  | 
    # This tells us what kind we expect bdecode to create, what variable on
 | 
|
62  | 
    # Revision we should be using, and a function to call to validate/transform
 | 
|
63  | 
    # the type.
 | 
|
64  | 
    # TODO: add a 'validate_utf8' for things like revision_id and file_id
 | 
|
65  | 
    #       and a validator for parent-ids
 | 
|
66  | 
_schema = {'format': (None, int, _is_format_10),  | 
|
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
67  | 
'committer': ('committer', str, cache_utf8.decode),  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
68  | 
'timezone': ('timezone', int, None),  | 
69  | 
'timestamp': ('timestamp', str, float),  | 
|
70  | 
'revision-id': ('revision_id', str, None),  | 
|
| 
4398.5.19
by John Arbash Meinel
 Change parent_ids back to a list, because there are other tests that expect it.  | 
71  | 
'parent-ids': ('parent_ids', list, None),  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
72  | 
'inventory-sha1': ('inventory_sha1', str, None),  | 
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
73  | 
'message': ('message', str, cache_utf8.decode),  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
74  | 
'properties': ('properties', dict, _validate_properties),  | 
75  | 
    }
 | 
|
76  | 
||
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
77  | 
def write_revision_to_string(self, rev):  | 
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
78  | 
encode_utf8 = cache_utf8._utf8_encode  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
79  | 
        # Use a list of tuples rather than a dict
 | 
80  | 
        # This lets us control the ordering, so that we are able to create
 | 
|
81  | 
        # smaller deltas
 | 
|
82  | 
ret = [  | 
|
83  | 
("format", 10),  | 
|
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
84  | 
("committer", encode_utf8(rev.committer)[0]),  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
85  | 
        ]
 | 
86  | 
if rev.timezone is not None:  | 
|
87  | 
ret.append(("timezone", rev.timezone))  | 
|
88  | 
        # For bzr revisions, the most common property is just 'branch-nick'
 | 
|
89  | 
        # which changes infrequently.
 | 
|
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
90  | 
revprops = {}  | 
91  | 
for key, value in rev.properties.iteritems():  | 
|
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
92  | 
revprops[key] = encode_utf8(value)[0]  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
93  | 
ret.append(('properties', revprops))  | 
94  | 
ret.extend([  | 
|
95  | 
("timestamp", "%.3f" % rev.timestamp),  | 
|
96  | 
("revision-id", rev.revision_id),  | 
|
97  | 
("parent-ids", rev.parent_ids),  | 
|
98  | 
("inventory-sha1", rev.inventory_sha1),  | 
|
| 
4398.5.9
by John Arbash Meinel
 it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')  | 
99  | 
("message", encode_utf8(rev.message)[0]),  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
100  | 
        ])
 | 
| 
4398.5.2
by John Arbash Meinel
 Merge the chk serializer, and update it for the new bencode locations.  | 
101  | 
return bencode.bencode(ret)  | 
| 
4290.1.8
by Jelmer Vernooij
 Some performance tweaks.  | 
102  | 
|
103  | 
def write_revision(self, rev, f):  | 
|
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
104  | 
f.write(self.write_revision_to_string(rev))  | 
105  | 
||
106  | 
def read_revision_from_string(self, text):  | 
|
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
107  | 
        # TODO: consider writing a Revision decoder, rather than using the
 | 
108  | 
        #       generic bencode decoder
 | 
|
| 
4398.5.8
by John Arbash Meinel
 Update the TODO comment a bit.  | 
109  | 
        #       However, to decode all 25k revisions of bzr takes approx 1.3s
 | 
110  | 
        #       If we remove all extra validation that goes down to about 1.2s.
 | 
|
111  | 
        #       Of that time, probably 0.6s is spend in bencode.bdecode().
 | 
|
112  | 
        #       Regardless 'time bzr log' of everything is 7+s, so 1.3s to
 | 
|
113  | 
        #       extract revision texts isn't a majority of time.
 | 
|
| 
4398.5.2
by John Arbash Meinel
 Merge the chk serializer, and update it for the new bencode locations.  | 
114  | 
ret = bencode.bdecode(text)  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
115  | 
if not isinstance(ret, list):  | 
116  | 
raise ValueError("invalid revision text")  | 
|
| 
4398.5.15
by John Arbash Meinel
 Change how schemas are validated (down to 1.02s)  | 
117  | 
schema = self._schema  | 
| 
4398.5.7
by John Arbash Meinel
 Spend a little bit more time optimizing the read_revision_from_string loop  | 
118  | 
        # timezone is allowed to be missing, but should be set
 | 
119  | 
bits = {'timezone': None}  | 
|
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
120  | 
for key, value in ret:  | 
| 
4398.5.7
by John Arbash Meinel
 Spend a little bit more time optimizing the read_revision_from_string loop  | 
121  | 
            # Will raise KeyError if not a valid part of the schema, or an
 | 
122  | 
            # entry is given 2 times.
 | 
|
| 
4398.5.15
by John Arbash Meinel
 Change how schemas are validated (down to 1.02s)  | 
123  | 
var_name, expected_type, validator = schema[key]  | 
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
124  | 
if value.__class__ is not expected_type:  | 
125  | 
raise ValueError('key %s did not conform to the expected type'  | 
|
126  | 
' %s, but was %s'  | 
|
127  | 
% (key, expected_type, type(value)))  | 
|
128  | 
if validator is not None:  | 
|
129  | 
value = validator(value)  | 
|
| 
4398.5.7
by John Arbash Meinel
 Spend a little bit more time optimizing the read_revision_from_string loop  | 
130  | 
bits[var_name] = value  | 
| 
4398.5.15
by John Arbash Meinel
 Change how schemas are validated (down to 1.02s)  | 
131  | 
if len(bits) != len(schema):  | 
132  | 
missing = [key for key, (var_name, _, _) in schema.iteritems()  | 
|
133  | 
if var_name not in bits]  | 
|
134  | 
raise ValueError('Revision text was missing expected keys %s.'  | 
|
135  | 
' text %r' % (missing, text))  | 
|
136  | 
del bits[None] # Get rid of 'format' since it doesn't get mapped  | 
|
| 
4398.5.5
by John Arbash Meinel
 Update the CHK Serializer to do lots more validation.  | 
137  | 
rev = _mod_revision.Revision(**bits)  | 
| 
4290.1.8
by Jelmer Vernooij
 Some performance tweaks.  | 
138  | 
return rev  | 
139  | 
||
140  | 
def read_revision(self, f):  | 
|
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
141  | 
return self.read_revision_from_string(f.read())  | 
142  | 
||
143  | 
||
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
144  | 
class CHKSerializer(serializer.Serializer):  | 
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
145  | 
"""A CHKInventory based serializer with 'plain' behaviour."""  | 
146  | 
||
147  | 
format_num = '9'  | 
|
148  | 
revision_format_num = None  | 
|
149  | 
support_altered_by_hack = False  | 
|
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
150  | 
supported_kinds = set(['file', 'directory', 'symlink'])  | 
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
151  | 
|
152  | 
def __init__(self, node_size, search_key_name):  | 
|
153  | 
self.maximum_size = node_size  | 
|
154  | 
self.search_key_name = search_key_name  | 
|
155  | 
||
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
156  | 
def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,  | 
157  | 
return_from_cache=False):  | 
|
158  | 
"""Construct from XML Element"""  | 
|
159  | 
inv = xml_serializer.unpack_inventory_flat(elt, self.format_num,  | 
|
| 
6355.1.9
by Jelmer Vernooij
 Review feedback - pass entry_cache and_return_from_cache to unpack_inventory_flat.  | 
160  | 
xml_serializer.unpack_inventory_entry, entry_cache,  | 
161  | 
return_from_cache)  | 
|
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
162  | 
return inv  | 
163  | 
||
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
164  | 
def read_inventory_from_string(self, xml_string, revision_id=None,  | 
165  | 
entry_cache=None, return_from_cache=False):  | 
|
166  | 
"""Read xml_string into an inventory object.  | 
|
167  | 
||
168  | 
        :param xml_string: The xml to read.
 | 
|
169  | 
        :param revision_id: If not-None, the expected revision id of the
 | 
|
170  | 
            inventory.
 | 
|
171  | 
        :param entry_cache: An optional cache of InventoryEntry objects. If
 | 
|
172  | 
            supplied we will look up entries via (file_id, revision_id) which
 | 
|
173  | 
            should map to a valid InventoryEntry (File/Directory/etc) object.
 | 
|
174  | 
        :param return_from_cache: Return entries directly from the cache,
 | 
|
175  | 
            rather than copying them first. This is only safe if the caller
 | 
|
176  | 
            promises not to mutate the returned inventory entries, but it can
 | 
|
177  | 
            make some operations significantly faster.
 | 
|
178  | 
        """
 | 
|
179  | 
try:  | 
|
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
180  | 
return self._unpack_inventory(  | 
181  | 
xml_serializer.fromstring(xml_string), revision_id,  | 
|
182  | 
entry_cache=entry_cache,  | 
|
183  | 
return_from_cache=return_from_cache)  | 
|
| 
6355.1.5
by Jelmer Vernooij
 Use lazy imports.  | 
184  | 
except xml_serializer.ParseError, e:  | 
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
185  | 
raise errors.UnexpectedInventoryFormat(e)  | 
186  | 
||
187  | 
def read_inventory(self, f, revision_id=None):  | 
|
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
188  | 
"""Read an inventory from a file-like object."""  | 
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
189  | 
try:  | 
190  | 
try:  | 
|
191  | 
return self._unpack_inventory(self._read_element(f),  | 
|
192  | 
revision_id=None)  | 
|
193  | 
finally:  | 
|
194  | 
f.close()  | 
|
| 
6355.1.5
by Jelmer Vernooij
 Use lazy imports.  | 
195  | 
except xml_serializer.ParseError, e:  | 
| 
6355.1.3
by Jelmer Vernooij
 Split out more stuff.  | 
196  | 
raise errors.UnexpectedInventoryFormat(e)  | 
197  | 
||
198  | 
def write_inventory_to_lines(self, inv):  | 
|
199  | 
"""Return a list of lines with the encoded inventory."""  | 
|
200  | 
return self.write_inventory(inv, None)  | 
|
201  | 
||
202  | 
def write_inventory_to_string(self, inv, working=False):  | 
|
203  | 
"""Just call write_inventory with a StringIO and return the value.  | 
|
204  | 
||
205  | 
        :param working: If True skip history data - text_sha1, text_size,
 | 
|
206  | 
            reference_revision, symlink_target.
 | 
|
207  | 
        """
 | 
|
208  | 
sio = StringIO()  | 
|
209  | 
self.write_inventory(inv, sio, working)  | 
|
210  | 
return sio.getvalue()  | 
|
211  | 
||
212  | 
def write_inventory(self, inv, f, working=False):  | 
|
213  | 
"""Write inventory to a file.  | 
|
214  | 
||
215  | 
        :param inv: the inventory to write.
 | 
|
216  | 
        :param f: the file to write. (May be None if the lines are the desired
 | 
|
217  | 
            output).
 | 
|
218  | 
        :param working: If True skip history data - text_sha1, text_size,
 | 
|
219  | 
            reference_revision, symlink_target.
 | 
|
220  | 
        :return: The inventory as a list of lines.
 | 
|
221  | 
        """
 | 
|
| 
6355.1.7
by Jelmer Vernooij
 Fix tests.  | 
222  | 
output = []  | 
223  | 
append = output.append  | 
|
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
224  | 
if inv.revision_id is not None:  | 
225  | 
revid1 = ' revision_id="'  | 
|
226  | 
revid2 = xml_serializer.encode_and_escape(inv.revision_id)  | 
|
227  | 
else:  | 
|
228  | 
revid1 = ""  | 
|
229  | 
revid2 = ""  | 
|
230  | 
append('<inventory format="%s"%s%s>\n' % (  | 
|
231  | 
self.format_num, revid1, revid2))  | 
|
232  | 
append('<directory file_id="%s name="%s revision="%s />\n' % (  | 
|
233  | 
xml_serializer.encode_and_escape(inv.root.file_id),  | 
|
234  | 
xml_serializer.encode_and_escape(inv.root.name),  | 
|
235  | 
xml_serializer.encode_and_escape(inv.root.revision)))  | 
|
| 
6355.1.7
by Jelmer Vernooij
 Fix tests.  | 
236  | 
xml_serializer.serialize_inventory_flat(inv,  | 
237  | 
append,  | 
|
238  | 
root_id=None, supported_kinds=self.supported_kinds,  | 
|
239  | 
working=working)  | 
|
240  | 
if f is not None:  | 
|
241  | 
f.writelines(output)  | 
|
242  | 
return output  | 
|
| 
6355.1.6
by Jelmer Vernooij
 Move core inventory code to xml_serializer.  | 
243  | 
|
| 
4241.6.1
by Ian Clatworthy
 chk_map code from brisbane-core  | 
244  | 
|
245  | 
chk_serializer_255_bigpage = CHKSerializer(65536, 'hash-255-way')  | 
|
| 
4290.1.7
by Jelmer Vernooij
 Add development7-rich-root format that uses the RIO Serializer.  | 
246  | 
|
247  | 
||
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
248  | 
class CHKBEncodeSerializer(BEncodeRevisionSerializer1, CHKSerializer):  | 
249  | 
"""A CHKInventory and BEncode based serializer with 'plain' behaviour."""  | 
|
| 
4290.1.7
by Jelmer Vernooij
 Add development7-rich-root format that uses the RIO Serializer.  | 
250  | 
|
251  | 
format_num = '10'  | 
|
252  | 
||
253  | 
||
| 
4290.1.12
by Jelmer Vernooij
 Use bencode rather than rio in the new revision serialiszer.  | 
254  | 
chk_bencode_serializer = CHKBEncodeSerializer(65536, 'hash-255-way')  |