/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to breezy/bzr/chk_serializer.py

  • Committer: Jelmer Vernooij
  • Date: 2020-05-06 02:13:25 UTC
  • mfrom: (7490.7.21 work)
  • mto: This revision was merged to the branch mainline in revision 7501.
  • Revision ID: jelmer@jelmer.uk-20200506021325-awbmmqu1zyorz7sj
Merge 3.1 branch.

Show diffs side-by-side

added added

removed removed

Lines of Context:
16
16
 
17
17
"""Serializer object for CHK based inventory storage."""
18
18
 
19
 
from __future__ import absolute_import
 
19
from io import (
 
20
    BytesIO,
 
21
    )
20
22
 
21
23
from .. import lazy_import
22
24
lazy_import.lazy_import(globals(),
23
 
"""
 
25
                        """
24
26
from breezy.bzr import (
25
27
    xml_serializer,
26
28
    )
34
36
from . import (
35
37
    serializer,
36
38
    )
37
 
from ..sixish import (
38
 
    BytesIO,
39
 
    )
40
 
 
41
39
 
42
40
 
43
41
def _validate_properties(props, _decode=cache_utf8._utf8_decode):
44
42
    # TODO: we really want an 'isascii' check for key
45
43
    # Cast the utf8 properties into Unicode 'in place'
46
 
    for key, value in props.items():
47
 
        props[key] = _decode(value)[0]
48
 
    return props
 
44
    return {_decode(key)[0]: _decode(value)[0] for key, value in props.items()}
49
45
 
50
46
 
51
47
def _is_format_10(value):
67
63
    # the type.
68
64
    # TODO: add a 'validate_utf8' for things like revision_id and file_id
69
65
    #       and a validator for parent-ids
70
 
    _schema = {'format': (None, int, _is_format_10),
71
 
               'committer': ('committer', str, cache_utf8.decode),
72
 
               'timezone': ('timezone', int, None),
73
 
               'timestamp': ('timestamp', str, float),
74
 
               'revision-id': ('revision_id', str, None),
75
 
               'parent-ids': ('parent_ids', list, None),
76
 
               'inventory-sha1': ('inventory_sha1', str, None),
77
 
               'message': ('message', str, cache_utf8.decode),
78
 
               'properties': ('properties', dict, _validate_properties),
79
 
    }
 
66
    _schema = {b'format': (None, int, _is_format_10),
 
67
               b'committer': ('committer', bytes, cache_utf8.decode),
 
68
               b'timezone': ('timezone', int, None),
 
69
               b'timestamp': ('timestamp', bytes, float),
 
70
               b'revision-id': ('revision_id', bytes, None),
 
71
               b'parent-ids': ('parent_ids', list, None),
 
72
               b'inventory-sha1': ('inventory_sha1', bytes, None),
 
73
               b'message': ('message', bytes, cache_utf8.decode),
 
74
               b'properties': ('properties', dict, _validate_properties),
 
75
               }
80
76
 
81
77
    def write_revision_to_string(self, rev):
82
78
        encode_utf8 = cache_utf8._utf8_encode
84
80
        # This lets us control the ordering, so that we are able to create
85
81
        # smaller deltas
86
82
        ret = [
87
 
            ("format", 10),
88
 
            ("committer", encode_utf8(rev.committer)[0]),
 
83
            (b"format", 10),
 
84
            (b"committer", encode_utf8(rev.committer)[0]),
89
85
        ]
90
86
        if rev.timezone is not None:
91
 
            ret.append(("timezone", rev.timezone))
 
87
            ret.append((b"timezone", rev.timezone))
92
88
        # For bzr revisions, the most common property is just 'branch-nick'
93
89
        # which changes infrequently.
94
90
        revprops = {}
95
91
        for key, value in rev.properties.items():
96
 
            revprops[key] = encode_utf8(value)[0]
97
 
        ret.append(('properties', revprops))
 
92
            revprops[encode_utf8(key)[0]] = encode_utf8(value)[0]
 
93
        ret.append((b'properties', revprops))
98
94
        ret.extend([
99
 
            ("timestamp", "%.3f" % rev.timestamp),
100
 
            ("revision-id", rev.revision_id),
101
 
            ("parent-ids", rev.parent_ids),
102
 
            ("inventory-sha1", rev.inventory_sha1),
103
 
            ("message", encode_utf8(rev.message)[0]),
 
95
            (b"timestamp", b"%.3f" % rev.timestamp),
 
96
            (b"revision-id", rev.revision_id),
 
97
            (b"parent-ids", rev.parent_ids),
 
98
            (b"inventory-sha1", rev.inventory_sha1),
 
99
            (b"message", encode_utf8(rev.message)[0]),
104
100
        ])
105
101
        return bencode.bencode(ret)
106
102
 
107
 
    def write_revision(self, rev, f):
108
 
        f.write(self.write_revision_to_string(rev))
 
103
    def write_revision_to_lines(self, rev):
 
104
        return self.write_revision_to_string(rev).splitlines(True)
109
105
 
110
106
    def read_revision_from_string(self, text):
111
107
        # TODO: consider writing a Revision decoder, rather than using the
148
144
class CHKSerializer(serializer.Serializer):
149
145
    """A CHKInventory based serializer with 'plain' behaviour."""
150
146
 
151
 
    format_num = '9'
 
147
    format_num = b'9'
152
148
    revision_format_num = None
153
149
    support_altered_by_hack = False
154
150
    supported_kinds = {'file', 'directory', 'symlink', 'tree-reference'}
161
157
                          return_from_cache=False):
162
158
        """Construct from XML Element"""
163
159
        inv = xml_serializer.unpack_inventory_flat(elt, self.format_num,
164
 
            xml_serializer.unpack_inventory_entry, entry_cache,
165
 
            return_from_cache)
 
160
                                                   xml_serializer.unpack_inventory_entry, entry_cache,
 
161
                                                   return_from_cache)
166
162
        return inv
167
163
 
168
 
    def read_inventory_from_string(self, xml_string, revision_id=None,
169
 
                                   entry_cache=None, return_from_cache=False):
 
164
    def read_inventory_from_lines(self, xml_lines, revision_id=None,
 
165
                                  entry_cache=None, return_from_cache=False):
170
166
        """Read xml_string into an inventory object.
171
167
 
172
168
        :param xml_string: The xml to read.
182
178
        """
183
179
        try:
184
180
            return self._unpack_inventory(
185
 
                xml_serializer.fromstring(xml_string), revision_id,
 
181
                xml_serializer.fromstringlist(xml_lines), revision_id,
186
182
                entry_cache=entry_cache,
187
183
                return_from_cache=return_from_cache)
188
184
        except xml_serializer.ParseError as e:
193
189
        try:
194
190
            try:
195
191
                return self._unpack_inventory(self._read_element(f),
196
 
                    revision_id=None)
 
192
                                              revision_id=None)
197
193
            finally:
198
194
                f.close()
199
195
        except xml_serializer.ParseError as e:
203
199
        """Return a list of lines with the encoded inventory."""
204
200
        return self.write_inventory(inv, None)
205
201
 
206
 
    def write_inventory_to_string(self, inv, working=False):
207
 
        """Just call write_inventory with a BytesIO and return the value.
208
 
 
209
 
        :param working: If True skip history data - text_sha1, text_size,
210
 
            reference_revision, symlink_target.
211
 
        """
212
 
        sio = BytesIO()
213
 
        self.write_inventory(inv, sio, working)
214
 
        return sio.getvalue()
 
202
    def write_inventory_to_chunks(self, inv):
 
203
        """Return a list of lines with the encoded inventory."""
 
204
        return self.write_inventory(inv, None)
215
205
 
216
206
    def write_inventory(self, inv, f, working=False):
217
207
        """Write inventory to a file.
226
216
        output = []
227
217
        append = output.append
228
218
        if inv.revision_id is not None:
229
 
            revid1 = ' revision_id="'
230
 
            revid2 = xml_serializer.encode_and_escape(inv.revision_id)
 
219
            revid = b''.join(
 
220
                [b' revision_id="',
 
221
                 xml_serializer.encode_and_escape(inv.revision_id), b'"'])
231
222
        else:
232
 
            revid1 = ""
233
 
            revid2 = ""
234
 
        append('<inventory format="%s"%s%s>\n' % (
235
 
            self.format_num, revid1, revid2))
236
 
        append('<directory file_id="%s name="%s revision="%s />\n' % (
 
223
            revid = b""
 
224
        append(b'<inventory format="%s"%s>\n' % (
 
225
            self.format_num, revid))
 
226
        append(b'<directory file_id="%s" name="%s" revision="%s" />\n' % (
237
227
            xml_serializer.encode_and_escape(inv.root.file_id),
238
228
            xml_serializer.encode_and_escape(inv.root.name),
239
229
            xml_serializer.encode_and_escape(inv.root.revision)))
240
230
        xml_serializer.serialize_inventory_flat(inv,
241
 
            append,
242
 
            root_id=None, supported_kinds=self.supported_kinds, 
243
 
            working=working)
 
231
                                                append,
 
232
                                                root_id=None, supported_kinds=self.supported_kinds,
 
233
                                                working=working)
244
234
        if f is not None:
245
235
            f.writelines(output)
246
236
        return output
247
237
 
248
238
 
249
 
chk_serializer_255_bigpage = CHKSerializer(65536, 'hash-255-way')
 
239
chk_serializer_255_bigpage = CHKSerializer(65536, b'hash-255-way')
250
240
 
251
241
 
252
242
class CHKBEncodeSerializer(BEncodeRevisionSerializer1, CHKSerializer):
253
243
    """A CHKInventory and BEncode based serializer with 'plain' behaviour."""
254
244
 
255
 
    format_num = '10'
256
 
 
257
 
 
258
 
chk_bencode_serializer = CHKBEncodeSerializer(65536, 'hash-255-way')
 
245
    format_num = b'10'
 
246
 
 
247
 
 
248
chk_bencode_serializer = CHKBEncodeSerializer(65536, b'hash-255-way')