/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5089.1.1 by Martin Pool
Fix typo in ReadVFile.readline (thanks mnordhoff)
1
# Copyright (C) 2007, 2009, 2010 Canonical Ltd
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
16
17
"""Container format for Bazaar data.
18
2916.2.13 by Andrew Bennetts
Improve some docstrings.
19
"Containers" and "records" are described in
20
doc/developers/container-format.txt.
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
21
"""
22
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
23
from __future__ import absolute_import
24
2506.5.2 by Andrew Bennetts
Raise InvalidRecordError on invalid names.
25
import re
26
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
27
from . import errors
28
from .sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
29
    BytesIO,
30
    )
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
31
32
2535.3.26 by Andrew Bennetts
Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).
33
FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)"
34
35
36
_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]')
2506.5.2 by Andrew Bennetts
Raise InvalidRecordError on invalid names.
37
38
39
def _check_name(name):
40
    """Do some basic checking of 'name'.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
41
2535.3.26 by Andrew Bennetts
Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).
42
    At the moment, this just checks that there are no whitespace characters in a
43
    name.
2506.5.2 by Andrew Bennetts
Raise InvalidRecordError on invalid names.
44
45
    :raises InvalidRecordError: if name is not valid.
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
46
    :seealso: _check_name_encoding
2506.5.2 by Andrew Bennetts
Raise InvalidRecordError on invalid names.
47
    """
2535.3.26 by Andrew Bennetts
Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).
48
    if _whitespace_re.search(name) is not None:
2506.5.2 by Andrew Bennetts
Raise InvalidRecordError on invalid names.
49
        raise errors.InvalidRecordError("%r is not a valid name." % (name,))
50
51
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
52
def _check_name_encoding(name):
53
    """Check that 'name' is valid UTF-8.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
54
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
55
    This is separate from _check_name because UTF-8 decoding is relatively
56
    expensive, and we usually want to avoid it.
57
58
    :raises InvalidRecordError: if name is not valid UTF-8.
59
    """
60
    try:
61
        name.decode('utf-8')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
62
    except UnicodeDecodeError as e:
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
63
        raise errors.InvalidRecordError(str(e))
64
65
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
66
class ContainerSerialiser(object):
2916.2.6 by Andrew Bennetts
Better docstrings.
67
    """A helper class for serialising containers.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
68
2916.2.6 by Andrew Bennetts
Better docstrings.
69
    It simply returns bytes from method calls to 'begin', 'end' and
70
    'bytes_record'.  You may find ContainerWriter to be a more convenient
71
    interface.
72
    """
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
73
74
    def begin(self):
2916.2.6 by Andrew Bennetts
Better docstrings.
75
        """Return the bytes to begin a container."""
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
76
        return FORMAT_ONE + "\n"
77
78
    def end(self):
2916.2.6 by Andrew Bennetts
Better docstrings.
79
        """Return the bytes to finish a container."""
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
80
        return "E"
81
6257.5.1 by Martin Pool
ContainerWriter: Avoid one possible large-string join
82
    def bytes_header(self, length, names):
83
        """Return the header for a Bytes record."""
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
84
        # Kind marker
85
        byte_sections = ["B"]
86
        # Length
6257.5.1 by Martin Pool
ContainerWriter: Avoid one possible large-string join
87
        byte_sections.append(str(length) + "\n")
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
88
        # Names
89
        for name_tuple in names:
90
            # Make sure we're writing valid names.  Note that we will leave a
91
            # half-written record if a name is bad!
92
            for name in name_tuple:
93
                _check_name(name)
94
            byte_sections.append('\x00'.join(name_tuple) + "\n")
95
        # End of headers
96
        byte_sections.append("\n")
97
        return ''.join(byte_sections)
98
6257.5.1 by Martin Pool
ContainerWriter: Avoid one possible large-string join
99
    def bytes_record(self, bytes, names):
100
        """Return the bytes for a Bytes record with the given name and
101
        contents.
102
103
        If the content may be large, construct the header separately and then
104
        stream out the contents.
105
        """
106
        return self.bytes_header(len(bytes), names) + bytes
107
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
108
2506.3.1 by Andrew Bennetts
More progress:
109
class ContainerWriter(object):
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
110
    """A class for writing containers to a file.
2698.1.1 by Robert Collins
Add records_written attribute to ContainerWriter's. (Robert Collins).
111
112
    :attribute records_written: The number of user records added to the
113
        container. This does not count the prelude or suffix of the container
114
        introduced by the begin() and end() methods.
115
    """
2506.3.1 by Andrew Bennetts
More progress:
116
6257.5.1 by Martin Pool
ContainerWriter: Avoid one possible large-string join
117
    # Join up headers with the body if writing fewer than this many bytes:
118
    # trades off memory usage and copying to do less IO ops.
119
    _JOIN_WRITES_THRESHOLD = 100000
120
2506.3.1 by Andrew Bennetts
More progress:
121
    def __init__(self, write_func):
122
        """Constructor.
123
124
        :param write_func: a callable that will be called when this
125
            ContainerWriter needs to write some bytes.
126
        """
2661.2.1 by Robert Collins
* ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to
127
        self._write_func = write_func
128
        self.current_offset = 0
2698.1.1 by Robert Collins
Add records_written attribute to ContainerWriter's. (Robert Collins).
129
        self.records_written = 0
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
130
        self._serialiser = ContainerSerialiser()
2506.3.1 by Andrew Bennetts
More progress:
131
132
    def begin(self):
133
        """Begin writing a container."""
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
134
        self.write_func(self._serialiser.begin())
2506.3.1 by Andrew Bennetts
More progress:
135
2661.2.1 by Robert Collins
* ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to
136
    def write_func(self, bytes):
137
        self._write_func(bytes)
138
        self.current_offset += len(bytes)
139
2506.3.1 by Andrew Bennetts
More progress:
140
    def end(self):
141
        """Finish writing a container."""
2916.2.5 by Andrew Bennetts
Extract a ContainerSerialiser class from ContainerWriter.
142
        self.write_func(self._serialiser.end())
2506.3.1 by Andrew Bennetts
More progress:
143
144
    def add_bytes_record(self, bytes, names):
2661.2.1 by Robert Collins
* ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to
145
        """Add a Bytes record with the given names.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
146
2661.2.1 by Robert Collins
* ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to
147
        :param bytes: The bytes to insert.
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
148
        :param names: The names to give the inserted bytes. Each name is
149
            a tuple of bytestrings. The bytestrings may not contain
150
            whitespace.
2661.2.1 by Robert Collins
* ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to
151
        :return: An offset, length tuple. The offset is the offset
152
            of the record within the container, and the length is the
153
            length of data that will need to be read to reconstitute the
154
            record. These offset and length can only be used with the pack
155
            interface - they might be offset by headers or other such details
156
            and thus are only suitable for use by a ContainerReader.
157
        """
158
        current_offset = self.current_offset
6257.5.1 by Martin Pool
ContainerWriter: Avoid one possible large-string join
159
        length = len(bytes)
160
        if length < self._JOIN_WRITES_THRESHOLD:
161
            self.write_func(self._serialiser.bytes_header(length, names)
162
                + bytes)
163
        else:
164
            self.write_func(self._serialiser.bytes_header(length, names))
165
            self.write_func(bytes)
2916.2.4 by Andrew Bennetts
Extract a _serialise_byte_records function.
166
        self.records_written += 1
167
        # return a memo of where we wrote data to allow random access.
168
        return current_offset, self.current_offset - current_offset
169
2506.3.1 by Andrew Bennetts
More progress:
170
2661.2.2 by Robert Collins
* ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
171
class ReadVFile(object):
4491.2.1 by Martin Pool
Clearer documentation and variable name in ReadVFile
172
    """Adapt a readv result iterator to a file like protocol.
5757.1.3 by Jelmer Vernooij
Revert noknit branch for the moment.
173
    
4491.2.1 by Martin Pool
Clearer documentation and variable name in ReadVFile
174
    The readv result must support the iterator protocol returning (offset,
175
    data_bytes) pairs.
176
    """
177
178
    # XXX: This could be a generic transport class, as other code may want to
179
    # gradually consume the readv result.
2661.2.2 by Robert Collins
* ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
180
181
    def __init__(self, readv_result):
4491.2.4 by Martin Pool
ReadVFile copes if readv result isn't an iter; also better errors
182
        """Construct a new ReadVFile wrapper.
183
184
        :seealso: make_readv_reader
185
186
        :param readv_result: the most recent readv result - list or generator
187
        """
4491.2.8 by Martin Pool
iter(i) returns i so we don't need a check
188
        # readv can return a sequence or an iterator, but we require an
189
        # iterator to know how much has been consumed.
190
        readv_result = iter(readv_result)
2661.2.2 by Robert Collins
* ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
191
        self.readv_result = readv_result
192
        self._string = None
193
194
    def _next(self):
195
        if (self._string is None or
196
            self._string.tell() == self._string_length):
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
197
            offset, data = next(self.readv_result)
2661.2.2 by Robert Collins
* ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
198
            self._string_length = len(data)
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
199
            self._string = BytesIO(data)
2661.2.2 by Robert Collins
* ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
200
201
    def read(self, length):
202
        self._next()
203
        result = self._string.read(length)
204
        if len(result) < length:
4491.2.4 by Martin Pool
ReadVFile copes if readv result isn't an iter; also better errors
205
            raise errors.BzrError('wanted %d bytes but next '
206
                'hunk only contains %d: %r...' %
207
                (length, len(result), result[:20]))
2661.2.2 by Robert Collins
* ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
208
        return result
209
210
    def readline(self):
211
        """Note that readline will not cross readv segments."""
212
        self._next()
213
        result = self._string.readline()
214
        if self._string.tell() == self._string_length and result[-1] != '\n':
4491.2.4 by Martin Pool
ReadVFile copes if readv result isn't an iter; also better errors
215
            raise errors.BzrError('short readline in the readvfile hunk: %r'
5089.1.1 by Martin Pool
Fix typo in ReadVFile.readline (thanks mnordhoff)
216
                % (result, ))
2661.2.2 by Robert Collins
* ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
217
        return result
218
219
220
def make_readv_reader(transport, filename, requested_records):
221
    """Create a ContainerReader that will read selected records only.
222
223
    :param transport: The transport the pack file is located on.
224
    :param filename: The filename of the pack file.
225
    :param requested_records: The record offset, length tuples as returned
226
        by add_bytes_record for the desired records.
227
    """
228
    readv_blocks = [(0, len(FORMAT_ONE)+1)]
229
    readv_blocks.extend(requested_records)
230
    result = ContainerReader(ReadVFile(
231
        transport.readv(filename, readv_blocks)))
232
    return result
233
234
2506.3.1 by Andrew Bennetts
More progress:
235
class BaseReader(object):
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
236
2506.2.9 by Aaron Bentley
Use file-like objects as container input, not callables
237
    def __init__(self, source_file):
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
238
        """Constructor.
239
2506.2.12 by Andrew Bennetts
Update docstring for Aaron's changes.
240
        :param source_file: a file-like object with `read` and `readline`
241
            methods.
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
242
        """
2506.2.9 by Aaron Bentley
Use file-like objects as container input, not callables
243
        self._source = source_file
244
245
    def reader_func(self, length=None):
246
        return self._source.read(length)
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
247
2506.3.1 by Andrew Bennetts
More progress:
248
    def _read_line(self):
2506.2.9 by Aaron Bentley
Use file-like objects as container input, not callables
249
        line = self._source.readline()
250
        if not line.endswith('\n'):
251
            raise errors.UnexpectedEndOfContainerError()
252
        return line.rstrip('\n')
2506.3.1 by Andrew Bennetts
More progress:
253
254
255
class ContainerReader(BaseReader):
256
    """A class for reading Bazaar's container format."""
257
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
258
    def iter_records(self):
259
        """Iterate over the container, yielding each record as it is read.
260
2506.6.2 by Andrew Bennetts
Docstring improvements.
261
        Each yielded record will be a 2-tuple of (names, callable), where names
262
        is a ``list`` and bytes is a function that takes one argument,
263
        ``max_length``.
264
4031.3.1 by Frank Aspell
Fixing various typos
265
        You **must not** call the callable after advancing the iterator to the
2506.6.2 by Andrew Bennetts
Docstring improvements.
266
        next record.  That is, this code is invalid::
267
268
            record_iter = container.iter_records()
269
            names1, callable1 = record_iter.next()
270
            names2, callable2 = record_iter.next()
271
            bytes1 = callable1(None)
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
272
2506.6.2 by Andrew Bennetts
Docstring improvements.
273
        As it will give incorrect results and invalidate the state of the
274
        ContainerReader.
2506.3.1 by Andrew Bennetts
More progress:
275
4031.3.1 by Frank Aspell
Fixing various typos
276
        :raises ContainerError: if any sort of container corruption is
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
277
            detected, e.g. UnknownContainerFormatError is the format of the
278
            container is unrecognised.
2506.6.2 by Andrew Bennetts
Docstring improvements.
279
        :seealso: ContainerReader.read
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
280
        """
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
281
        self._read_format()
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
282
        return self._iter_records()
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
283
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
284
    def iter_record_objects(self):
285
        """Iterate over the container, yielding each record as it is read.
286
287
        Each yielded record will be an object with ``read`` and ``validate``
2506.6.2 by Andrew Bennetts
Docstring improvements.
288
        methods.  Like with iter_records, it is not safe to use a record object
289
        after advancing the iterator to yield next record.
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
290
4031.3.1 by Frank Aspell
Fixing various typos
291
        :raises ContainerError: if any sort of container corruption is
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
292
            detected, e.g. UnknownContainerFormatError is the format of the
293
            container is unrecognised.
2506.6.2 by Andrew Bennetts
Docstring improvements.
294
        :seealso: iter_records
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
295
        """
296
        self._read_format()
297
        return self._iter_record_objects()
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
298
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
299
    def _iter_records(self):
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
300
        for record in self._iter_record_objects():
301
            yield record.read()
302
303
    def _iter_record_objects(self):
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
304
        while True:
305
            record_kind = self.reader_func(1)
306
            if record_kind == 'B':
307
                # Bytes record.
2506.2.9 by Aaron Bentley
Use file-like objects as container input, not callables
308
                reader = BytesRecordReader(self._source)
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
309
                yield reader
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
310
            elif record_kind == 'E':
311
                # End marker.  There are no more records.
312
                return
313
            elif record_kind == '':
314
                # End of stream encountered, but no End Marker record seen, so
315
                # this container is incomplete.
316
                raise errors.UnexpectedEndOfContainerError()
317
            else:
318
                # Unknown record type.
319
                raise errors.UnknownRecordTypeError(record_kind)
320
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
321
    def _read_format(self):
322
        format = self._read_line()
2535.3.26 by Andrew Bennetts
Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).
323
        if format != FORMAT_ONE:
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
324
            raise errors.UnknownContainerFormatError(format)
325
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
326
    def validate(self):
327
        """Validate this container and its records.
328
2506.2.7 by Andrew Bennetts
Change read/iter_records to return a callable, add more validation, and
329
        Validating consumes the data stream just like iter_records and
330
        iter_record_objects, so you cannot call it after
331
        iter_records/iter_record_objects.
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
332
333
        :raises ContainerError: if something is invalid.
334
        """
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
335
        all_names = set()
336
        for record_names, read_bytes in self.iter_records():
337
            read_bytes(None)
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
338
            for name_tuple in record_names:
339
                for name in name_tuple:
340
                    _check_name_encoding(name)
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
341
                # Check that the name is unique.  Note that Python will refuse
342
                # to decode non-shortest forms of UTF-8 encoding, so there is no
343
                # risk that the same unicode string has been encoded two
344
                # different ways.
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
345
                if name_tuple in all_names:
6112.5.13 by Jonathan Riddell
pass in a name not a tuple
346
                    raise errors.DuplicateRecordNameError(name_tuple[0])
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
347
                all_names.add(name_tuple)
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
348
        excess_bytes = self.reader_func(1)
349
        if excess_bytes != '':
350
            raise errors.ContainerHasExcessDataError(excess_bytes)
351
2506.3.1 by Andrew Bennetts
More progress:
352
353
class BytesRecordReader(BaseReader):
354
355
    def read(self):
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
356
        """Read this record.
357
2506.6.2 by Andrew Bennetts
Docstring improvements.
358
        You can either validate or read a record, you can't do both.
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
359
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
360
        :returns: A tuple of (names, callable).  The callable can be called
361
            repeatedly to obtain the bytes for the record, with a max_length
362
            argument.  If max_length is None, returns all the bytes.  Because
363
            records can be arbitrarily large, using None is not recommended
364
            unless you have reason to believe the content will fit in memory.
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
365
        """
2506.3.1 by Andrew Bennetts
More progress:
366
        # Read the content length.
367
        length_line = self._read_line()
368
        try:
369
            length = int(length_line)
370
        except ValueError:
371
            raise errors.InvalidRecordError(
372
                "%r is not a valid length." % (length_line,))
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
373
2506.3.1 by Andrew Bennetts
More progress:
374
        # Read the list of names.
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
375
        names = []
376
        while True:
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
377
            name_line = self._read_line()
378
            if name_line == '':
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
379
                break
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
380
            name_tuple = tuple(name_line.split('\x00'))
381
            for name in name_tuple:
382
                _check_name(name)
383
            names.append(name_tuple)
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
384
385
        self._remaining_length = length
386
        return names, self._content_reader
387
388
    def _content_reader(self, max_length):
389
        if max_length is None:
390
            length_to_read = self._remaining_length
391
        else:
392
            length_to_read = min(max_length, self._remaining_length)
393
        self._remaining_length -= length_to_read
394
        bytes = self.reader_func(length_to_read)
395
        if len(bytes) != length_to_read:
2506.3.3 by Andrew Bennetts
Deal with EOF in the middle of a bytes record.
396
            raise errors.UnexpectedEndOfContainerError()
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
397
        return bytes
2506.2.1 by Andrew Bennetts
Start implementing container format reading and writing.
398
2506.2.6 by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader.
399
    def validate(self):
400
        """Validate this record.
401
402
        You can either validate or read, you can't do both.
403
404
        :raises ContainerError: if this record is invalid.
405
        """
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
406
        names, read_bytes = self.read()
2682.1.1 by Robert Collins
* The ``bzrlib.pack`` interface has changed to use tuples of bytestrings
407
        for name_tuple in names:
408
            for name in name_tuple:
409
                _check_name_encoding(name)
2506.6.1 by Andrew Bennetts
Return a callable instead of a str from read, and add more validation.
410
        read_bytes(None)
411
2916.2.1 by Andrew Bennetts
Initial implementation of a 'push' parser for the container format.
412
413
class ContainerPushParser(object):
2916.2.14 by Andrew Bennetts
Add a docstring.
414
    """A "push" parser for container format 1.
415
416
    It accepts bytes via the ``accept_bytes`` method, and parses them into
417
    records which can be retrieved via the ``read_pending_records`` method.
418
    """
2916.2.1 by Andrew Bennetts
Initial implementation of a 'push' parser for the container format.
419
420
    def __init__(self):
421
        self._buffer = ''
422
        self._state_handler = self._state_expecting_format_line
423
        self._parsed_records = []
424
        self._reset_current_record()
2916.2.10 by Andrew Bennetts
Simpler iter_records_from_file implementation.
425
        self.finished = False
2916.2.1 by Andrew Bennetts
Initial implementation of a 'push' parser for the container format.
426
427
    def _reset_current_record(self):
428
        self._current_record_length = None
429
        self._current_record_names = []
430
431
    def accept_bytes(self, bytes):
432
        self._buffer += bytes
433
        # Keep iterating the state machine until it stops consuming bytes from
434
        # the buffer.
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
435
        last_buffer_length = None
436
        cur_buffer_length = len(self._buffer)
4464.1.1 by Aaron Bentley
ContainerPushParser.accept_bytes handles zero-length records correctly.
437
        last_state_handler = None
438
        while (cur_buffer_length != last_buffer_length
439
               or last_state_handler != self._state_handler):
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
440
            last_buffer_length = cur_buffer_length
4464.1.1 by Aaron Bentley
ContainerPushParser.accept_bytes handles zero-length records correctly.
441
            last_state_handler = self._state_handler
2916.2.1 by Andrew Bennetts
Initial implementation of a 'push' parser for the container format.
442
            self._state_handler()
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
443
            cur_buffer_length = len(self._buffer)
2916.2.1 by Andrew Bennetts
Initial implementation of a 'push' parser for the container format.
444
4060.1.4 by Robert Collins
Streaming fetch from remote servers.
445
    def read_pending_records(self, max=None):
446
        if max:
447
            records = self._parsed_records[:max]
448
            del self._parsed_records[:max]
449
            return records
450
        else:
451
            records = self._parsed_records
452
            self._parsed_records = []
453
            return records
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
454
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
455
    def _consume_line(self):
456
        """Take a line out of the buffer, and return the line.
2916.2.1 by Andrew Bennetts
Initial implementation of a 'push' parser for the container format.
457
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
458
        If a newline byte is not found in the buffer, the buffer is
2916.2.1 by Andrew Bennetts
Initial implementation of a 'push' parser for the container format.
459
        unchanged and this returns None instead.
460
        """
461
        newline_pos = self._buffer.find('\n')
462
        if newline_pos != -1:
463
            line = self._buffer[:newline_pos]
464
            self._buffer = self._buffer[newline_pos+1:]
465
            return line
466
        else:
467
            return None
468
469
    def _state_expecting_format_line(self):
470
        line = self._consume_line()
471
        if line is not None:
472
            if line != FORMAT_ONE:
473
                raise errors.UnknownContainerFormatError(line)
474
            self._state_handler = self._state_expecting_record_type
475
476
    def _state_expecting_record_type(self):
477
        if len(self._buffer) >= 1:
478
            record_type = self._buffer[0]
479
            self._buffer = self._buffer[1:]
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
480
            if record_type == 'B':
481
                self._state_handler = self._state_expecting_length
482
            elif record_type == 'E':
2916.2.10 by Andrew Bennetts
Simpler iter_records_from_file implementation.
483
                self.finished = True
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
484
                self._state_handler = self._state_expecting_nothing
485
            else:
486
                raise errors.UnknownRecordTypeError(record_type)
2916.2.1 by Andrew Bennetts
Initial implementation of a 'push' parser for the container format.
487
488
    def _state_expecting_length(self):
489
        line = self._consume_line()
490
        if line is not None:
491
            try:
492
                self._current_record_length = int(line)
493
            except ValueError:
494
                raise errors.InvalidRecordError(
495
                    "%r is not a valid length." % (line,))
496
            self._state_handler = self._state_expecting_name
497
498
    def _state_expecting_name(self):
499
        encoded_name_parts = self._consume_line()
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
500
        if encoded_name_parts == '':
501
            self._state_handler = self._state_expecting_body
502
        elif encoded_name_parts:
503
            name_parts = tuple(encoded_name_parts.split('\x00'))
504
            for name_part in name_parts:
505
                _check_name(name_part)
506
            self._current_record_names.append(name_parts)
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
507
2916.2.1 by Andrew Bennetts
Initial implementation of a 'push' parser for the container format.
508
    def _state_expecting_body(self):
509
        if len(self._buffer) >= self._current_record_length:
510
            body_bytes = self._buffer[:self._current_record_length]
511
            self._buffer = self._buffer[self._current_record_length:]
512
            record = (self._current_record_names, body_bytes)
513
            self._parsed_records.append(record)
514
            self._reset_current_record()
515
            self._state_handler = self._state_expecting_record_type
516
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
517
    def _state_expecting_nothing(self):
518
        pass
519
2916.2.10 by Andrew Bennetts
Simpler iter_records_from_file implementation.
520
    def read_size_hint(self):
521
        hint = 16384
522
        if self._state_handler == self._state_expecting_body:
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
523
            remaining = self._current_record_length - len(self._buffer)
524
            if remaining < 0:
525
                remaining = 0
2916.2.10 by Andrew Bennetts
Simpler iter_records_from_file implementation.
526
            return max(hint, remaining)
527
        return hint
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
528
529
530
def iter_records_from_file(source_file):
531
    parser = ContainerPushParser()
532
    while True:
2916.2.10 by Andrew Bennetts
Simpler iter_records_from_file implementation.
533
        bytes = source_file.read(parser.read_size_hint())
2916.2.8 by Andrew Bennetts
Add bzrlib.pack.iter_records_from_file.
534
        parser.accept_bytes(bytes)
535
        for record in parser.read_pending_records():
536
            yield record
2916.2.10 by Andrew Bennetts
Simpler iter_records_from_file implementation.
537
        if parser.finished:
538
            break
2916.2.1 by Andrew Bennetts
Initial implementation of a 'push' parser for the container format.
539