/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
1
# Copyright (C) 2007 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Container format for Bazaar data.
18
19
"Containers" and "records" are described in doc/developers/container-format.txt.
20
"""
21
2506.5.2 by Andrew Bennetts
Raise InvalidRecordError on invalid names.
22
import re
23
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
24
from bzrlib import errors
25
26
2506.5.3 by Andrew Bennetts
Change format marker to use the word 'Bazaar' rather than 'bzr'.
27
FORMAT_ONE = "Bazaar pack format 1"
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
28
29
2506.5.2 by Andrew Bennetts
Raise InvalidRecordError on invalid names.
30
_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]')
31
32
33
def _check_name(name):
34
    """Do some basic checking of 'name'.
35
    
36
    At the moment, this just checks that there are no whitespace characters in a
37
    name.
38
39
    :raises InvalidRecordError: if name is not valid.
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
40
    :seealso: _check_name_encoding
2506.5.2 by Andrew Bennetts
Raise InvalidRecordError on invalid names.
41
    """
42
    if _whitespace_re.search(name) is not None:
43
        raise errors.InvalidRecordError("%r is not a valid name." % (name,))
44
45
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
46
def _check_name_encoding(name):
47
    """Check that 'name' is valid UTF-8.
48
    
49
    This is separate from _check_name because UTF-8 decoding is relatively
50
    expensive, and we usually want to avoid it.
51
52
    :raises InvalidRecordError: if name is not valid UTF-8.
53
    """
54
    try:
55
        name.decode('utf-8')
56
    except UnicodeDecodeError, e:
57
        raise errors.InvalidRecordError(str(e))
58
59
2506.3.1 by Andrew Bennetts
More progress:
60
class ContainerWriter(object):
61
    """A class for writing containers."""
62
63
    def __init__(self, write_func):
64
        """Constructor.
65
66
        :param write_func: a callable that will be called when this
67
            ContainerWriter needs to write some bytes.
68
        """
69
        self.write_func = write_func
70
71
    def begin(self):
72
        """Begin writing a container."""
73
        self.write_func(FORMAT_ONE + "\n")
74
75
    def end(self):
76
        """Finish writing a container."""
77
        self.write_func("E")
78
79
    def add_bytes_record(self, bytes, names):
80
        """Add a Bytes record with the given names."""
81
        # Kind marker
82
        self.write_func("B")
83
        # Length
84
        self.write_func(str(len(bytes)) + "\n")
85
        # Names
86
        for name in names:
2506.5.2 by Andrew Bennetts
Raise InvalidRecordError on invalid names.
87
            # Make sure we're writing valid names.  Note that we will leave a
88
            # half-written record if a name is bad!
89
            _check_name(name)
2506.3.1 by Andrew Bennetts
More progress:
90
            self.write_func(name + "\n")
91
        # End of headers
92
        self.write_func("\n")
93
        # Finally, the contents.
94
        self.write_func(bytes)
95
96
97
class BaseReader(object):
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
98
99
    def __init__(self, reader_func):
100
        """Constructor.
101
102
        :param reader_func: a callable that takes one optional argument,
103
            ``size``, and returns at most that many bytes.  When the callable
2506.3.3 by Andrew Bennetts
Deal with EOF in the middle of a bytes record.
104
            returns less than the requested number of bytes, then the end of the
105
            file/stream has been reached.
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
106
        """
107
        self.reader_func = reader_func
108
2506.3.1 by Andrew Bennetts
More progress:
109
    def _read_line(self):
110
        """Read a line from the input stream.
111
112
        This is a simple but inefficient implementation that just reads one byte
113
        at a time.  Lines should not be very long, so this is probably
114
        tolerable.
115
116
        :returns: a line, without the trailing newline
117
        """
118
        # XXX: Have a maximum line length, to prevent malicious input from
119
        # consuming an unreasonable amount of resources?
120
        #   -- Andrew Bennetts, 2007-05-07.
121
        line = ''
122
        while not line.endswith('\n'):
2506.3.3 by Andrew Bennetts
Deal with EOF in the middle of a bytes record.
123
            byte = self.reader_func(1)
124
            if byte == '':
125
                raise errors.UnexpectedEndOfContainerError()
126
            line += byte
2506.3.1 by Andrew Bennetts
More progress:
127
        return line[:-1]
128
129
130
class ContainerReader(BaseReader):
131
    """A class for reading Bazaar's container format."""
132
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
133
    def iter_records(self):
134
        """Iterate over the container, yielding each record as it is read.
135
2506.6.2 by Andrew Bennetts
Docstring improvements.
136
        Each yielded record will be a 2-tuple of (names, callable), where names
137
        is a ``list`` and bytes is a function that takes one argument,
138
        ``max_length``.
139
140
        You **must not** call the callable after advancing the interator to the
141
        next record.  That is, this code is invalid::
142
143
            record_iter = container.iter_records()
144
            names1, callable1 = record_iter.next()
145
            names2, callable2 = record_iter.next()
146
            bytes1 = callable1(None)
147
        
148
        As it will give incorrect results and invalidate the state of the
149
        ContainerReader.
2506.3.1 by Andrew Bennetts
More progress:
150
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
151
        :raises ContainerError: if any sort of containter corruption is
152
            detected, e.g. UnknownContainerFormatError is the format of the
153
            container is unrecognised.
2506.6.2 by Andrew Bennetts
Docstring improvements.
154
        :seealso: ContainerReader.read
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
155
        """
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
156
        self._read_format()
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
157
        return self._iter_records()
158
    
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
159
    def iter_record_objects(self):
160
        """Iterate over the container, yielding each record as it is read.
161
162
        Each yielded record will be an object with ``read`` and ``validate``
2506.6.2 by Andrew Bennetts
Docstring improvements.
163
        methods.  Like with iter_records, it is not safe to use a record object
164
        after advancing the iterator to yield next record.
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
165
166
        :raises ContainerError: if any sort of containter corruption is
167
            detected, e.g. UnknownContainerFormatError is the format of the
168
            container is unrecognised.
2506.6.2 by Andrew Bennetts
Docstring improvements.
169
        :seealso: iter_records
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
170
        """
171
        self._read_format()
172
        return self._iter_record_objects()
173
    
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
174
    def _iter_records(self):
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
175
        for record in self._iter_record_objects():
176
            yield record.read()
177
178
    def _iter_record_objects(self):
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
179
        while True:
180
            record_kind = self.reader_func(1)
181
            if record_kind == 'B':
182
                # Bytes record.
2506.3.1 by Andrew Bennetts
More progress:
183
                reader = BytesRecordReader(self.reader_func)
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
184
                yield reader
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
185
            elif record_kind == 'E':
186
                # End marker.  There are no more records.
187
                return
188
            elif record_kind == '':
189
                # End of stream encountered, but no End Marker record seen, so
190
                # this container is incomplete.
191
                raise errors.UnexpectedEndOfContainerError()
192
            else:
193
                # Unknown record type.
194
                raise errors.UnknownRecordTypeError(record_kind)
195
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
196
    def _read_format(self):
197
        format = self._read_line()
198
        if format != FORMAT_ONE:
199
            raise errors.UnknownContainerFormatError(format)
200
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
201
    def validate(self):
202
        """Validate this container and its records.
203
2506.2.7 by Andrew Bennetts
Change read/iter_records to return a callable, add more validation, and
204
        Validating consumes the data stream just like iter_records and
205
        iter_record_objects, so you cannot call it after
206
        iter_records/iter_record_objects.
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
207
208
        :raises ContainerError: if something is invalid.
209
        """
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
210
        all_names = set()
211
        for record_names, read_bytes in self.iter_records():
212
            read_bytes(None)
213
            for name in record_names:
214
                _check_name_encoding(name)
215
                # Check that the name is unique.  Note that Python will refuse
216
                # to decode non-shortest forms of UTF-8 encoding, so there is no
217
                # risk that the same unicode string has been encoded two
218
                # different ways.
219
                if name in all_names:
220
                    raise errors.DuplicateRecordNameError(name)
221
                all_names.add(name)
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
222
        excess_bytes = self.reader_func(1)
223
        if excess_bytes != '':
224
            raise errors.ContainerHasExcessDataError(excess_bytes)
225
2506.3.1 by Andrew Bennetts
More progress:
226
227
class BytesRecordReader(BaseReader):
228
229
    def read(self):
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
230
        """Read this record.
231
2506.6.2 by Andrew Bennetts
Docstring improvements.
232
        You can either validate or read a record, you can't do both.
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
233
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
234
        :returns: A tuple of (names, callable).  The callable can be called
235
            repeatedly to obtain the bytes for the record, with a max_length
236
            argument.  If max_length is None, returns all the bytes.  Because
237
            records can be arbitrarily large, using None is not recommended
238
            unless you have reason to believe the content will fit in memory.
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
239
        """
2506.3.1 by Andrew Bennetts
More progress:
240
        # Read the content length.
241
        length_line = self._read_line()
242
        try:
243
            length = int(length_line)
244
        except ValueError:
245
            raise errors.InvalidRecordError(
246
                "%r is not a valid length." % (length_line,))
247
        
248
        # Read the list of names.
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
249
        names = []
250
        while True:
251
            name = self._read_line()
252
            if name == '':
253
                break
2506.5.2 by Andrew Bennetts
Raise InvalidRecordError on invalid names.
254
            _check_name(name)
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
255
            names.append(name)
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
256
257
        self._remaining_length = length
258
        return names, self._content_reader
259
260
    def _content_reader(self, max_length):
261
        if max_length is None:
262
            length_to_read = self._remaining_length
263
        else:
264
            length_to_read = min(max_length, self._remaining_length)
265
        self._remaining_length -= length_to_read
266
        bytes = self.reader_func(length_to_read)
267
        if len(bytes) != length_to_read:
2506.3.3 by Andrew Bennetts
Deal with EOF in the middle of a bytes record.
268
            raise errors.UnexpectedEndOfContainerError()
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
269
        return bytes
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
270
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
271
    def validate(self):
272
        """Validate this record.
273
274
        You can either validate or read, you can't do both.
275
276
        :raises ContainerError: if this record is invalid.
277
        """
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
278
        names, read_bytes = self.read()
279
        for name in names:
280
            _check_name_encoding(name)
281
        read_bytes(None)
282