bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
1  | 
# Copyright (C) 2007 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
17  | 
"""Container format for Bazaar data.
 | 
|
18  | 
||
| 
2916.2.13
by Andrew Bennetts
 Improve some docstrings.  | 
19  | 
"Containers" and "records" are described in
 | 
20  | 
doc/developers/container-format.txt.
 | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
21  | 
"""
 | 
22  | 
||
| 
2661.2.2
by Robert Collins
 * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack  | 
23  | 
from cStringIO import StringIO  | 
| 
2506.5.2
by Andrew Bennetts
 Raise InvalidRecordError on invalid names.  | 
24  | 
import re  | 
25  | 
||
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
26  | 
from bzrlib import errors  | 
27  | 
||
28  | 
||
| 
2535.3.26
by Andrew Bennetts
 Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).  | 
29  | 
FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)"  | 
30  | 
||
31  | 
||
32  | 
_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]')  | 
|
| 
2506.5.2
by Andrew Bennetts
 Raise InvalidRecordError on invalid names.  | 
33  | 
|
34  | 
||
35  | 
def _check_name(name):  | 
|
36  | 
"""Do some basic checking of 'name'.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
37  | 
|
| 
2535.3.26
by Andrew Bennetts
 Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).  | 
38  | 
    At the moment, this just checks that there are no whitespace characters in a
 | 
39  | 
    name.
 | 
|
| 
2506.5.2
by Andrew Bennetts
 Raise InvalidRecordError on invalid names.  | 
40  | 
|
41  | 
    :raises InvalidRecordError: if name is not valid.
 | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
42  | 
    :seealso: _check_name_encoding
 | 
| 
2506.5.2
by Andrew Bennetts
 Raise InvalidRecordError on invalid names.  | 
43  | 
    """
 | 
| 
2535.3.26
by Andrew Bennetts
 Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).  | 
44  | 
if _whitespace_re.search(name) is not None:  | 
| 
2506.5.2
by Andrew Bennetts
 Raise InvalidRecordError on invalid names.  | 
45  | 
raise errors.InvalidRecordError("%r is not a valid name." % (name,))  | 
46  | 
||
47  | 
||
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
48  | 
def _check_name_encoding(name):  | 
49  | 
"""Check that 'name' is valid UTF-8.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
50  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
51  | 
    This is separate from _check_name because UTF-8 decoding is relatively
 | 
52  | 
    expensive, and we usually want to avoid it.
 | 
|
53  | 
||
54  | 
    :raises InvalidRecordError: if name is not valid UTF-8.
 | 
|
55  | 
    """
 | 
|
56  | 
try:  | 
|
57  | 
name.decode('utf-8')  | 
|
58  | 
except UnicodeDecodeError, e:  | 
|
59  | 
raise errors.InvalidRecordError(str(e))  | 
|
60  | 
||
61  | 
||
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
62  | 
class ContainerSerialiser(object):  | 
| 
2916.2.6
by Andrew Bennetts
 Better docstrings.  | 
63  | 
"""A helper class for serialising containers.  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
64  | 
|
| 
2916.2.6
by Andrew Bennetts
 Better docstrings.  | 
65  | 
    It simply returns bytes from method calls to 'begin', 'end' and
 | 
66  | 
    'bytes_record'.  You may find ContainerWriter to be a more convenient
 | 
|
67  | 
    interface.
 | 
|
68  | 
    """
 | 
|
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
69  | 
|
70  | 
def begin(self):  | 
|
| 
2916.2.6
by Andrew Bennetts
 Better docstrings.  | 
71  | 
"""Return the bytes to begin a container."""  | 
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
72  | 
return FORMAT_ONE + "\n"  | 
73  | 
||
74  | 
def end(self):  | 
|
| 
2916.2.6
by Andrew Bennetts
 Better docstrings.  | 
75  | 
"""Return the bytes to finish a container."""  | 
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
76  | 
return "E"  | 
77  | 
||
78  | 
def bytes_record(self, bytes, names):  | 
|
| 
2916.2.6
by Andrew Bennetts
 Better docstrings.  | 
79  | 
"""Return the bytes for a Bytes record with the given name and  | 
80  | 
        contents.
 | 
|
81  | 
        """
 | 
|
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
82  | 
        # Kind marker
 | 
83  | 
byte_sections = ["B"]  | 
|
84  | 
        # Length
 | 
|
85  | 
byte_sections.append(str(len(bytes)) + "\n")  | 
|
86  | 
        # Names
 | 
|
87  | 
for name_tuple in names:  | 
|
88  | 
            # Make sure we're writing valid names.  Note that we will leave a
 | 
|
89  | 
            # half-written record if a name is bad!
 | 
|
90  | 
for name in name_tuple:  | 
|
91  | 
_check_name(name)  | 
|
92  | 
byte_sections.append('\x00'.join(name_tuple) + "\n")  | 
|
93  | 
        # End of headers
 | 
|
94  | 
byte_sections.append("\n")  | 
|
95  | 
        # Finally, the contents.
 | 
|
96  | 
byte_sections.append(bytes)  | 
|
97  | 
        # XXX: This causes a memory copy of bytes in size, but is usually
 | 
|
98  | 
        # faster than two write calls (12 vs 13 seconds to output a gig of
 | 
|
99  | 
        # 1k records.) - results may differ on significantly larger records
 | 
|
100  | 
        # like .iso's but as they should be rare in any case and thus not
 | 
|
101  | 
        # likely to be the common case. The biggest issue is causing extreme
 | 
|
102  | 
        # memory pressure in that case. One possibly improvement here is to
 | 
|
103  | 
        # check the size of the content before deciding to join here vs call
 | 
|
104  | 
        # write twice.
 | 
|
105  | 
return ''.join(byte_sections)  | 
|
106  | 
||
107  | 
||
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
108  | 
class ContainerWriter(object):  | 
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
109  | 
"""A class for writing containers to a file.  | 
| 
2698.1.1
by Robert Collins
 Add records_written attribute to ContainerWriter's. (Robert Collins).  | 
110  | 
|
111  | 
    :attribute records_written: The number of user records added to the
 | 
|
112  | 
        container. This does not count the prelude or suffix of the container
 | 
|
113  | 
        introduced by the begin() and end() methods.
 | 
|
114  | 
    """
 | 
|
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
115  | 
|
116  | 
def __init__(self, write_func):  | 
|
117  | 
"""Constructor.  | 
|
118  | 
||
119  | 
        :param write_func: a callable that will be called when this
 | 
|
120  | 
            ContainerWriter needs to write some bytes.
 | 
|
121  | 
        """
 | 
|
| 
2661.2.1
by Robert Collins
 * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to  | 
122  | 
self._write_func = write_func  | 
123  | 
self.current_offset = 0  | 
|
| 
2698.1.1
by Robert Collins
 Add records_written attribute to ContainerWriter's. (Robert Collins).  | 
124  | 
self.records_written = 0  | 
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
125  | 
self._serialiser = ContainerSerialiser()  | 
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
126  | 
|
127  | 
def begin(self):  | 
|
128  | 
"""Begin writing a container."""  | 
|
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
129  | 
self.write_func(self._serialiser.begin())  | 
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
130  | 
|
| 
2661.2.1
by Robert Collins
 * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to  | 
131  | 
def write_func(self, bytes):  | 
132  | 
self._write_func(bytes)  | 
|
133  | 
self.current_offset += len(bytes)  | 
|
134  | 
||
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
135  | 
def end(self):  | 
136  | 
"""Finish writing a container."""  | 
|
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
137  | 
self.write_func(self._serialiser.end())  | 
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
138  | 
|
139  | 
def add_bytes_record(self, bytes, names):  | 
|
| 
2661.2.1
by Robert Collins
 * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to  | 
140  | 
"""Add a Bytes record with the given names.  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
141  | 
|
| 
2661.2.1
by Robert Collins
 * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to  | 
142  | 
        :param bytes: The bytes to insert.
 | 
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
143  | 
        :param names: The names to give the inserted bytes. Each name is
 | 
144  | 
            a tuple of bytestrings. The bytestrings may not contain
 | 
|
145  | 
            whitespace.
 | 
|
| 
2661.2.1
by Robert Collins
 * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to  | 
146  | 
        :return: An offset, length tuple. The offset is the offset
 | 
147  | 
            of the record within the container, and the length is the
 | 
|
148  | 
            length of data that will need to be read to reconstitute the
 | 
|
149  | 
            record. These offset and length can only be used with the pack
 | 
|
150  | 
            interface - they might be offset by headers or other such details
 | 
|
151  | 
            and thus are only suitable for use by a ContainerReader.
 | 
|
152  | 
        """
 | 
|
153  | 
current_offset = self.current_offset  | 
|
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
154  | 
serialised_record = self._serialiser.bytes_record(bytes, names)  | 
| 
2916.2.4
by Andrew Bennetts
 Extract a _serialise_byte_records function.  | 
155  | 
self.write_func(serialised_record)  | 
156  | 
self.records_written += 1  | 
|
157  | 
        # return a memo of where we wrote data to allow random access.
 | 
|
158  | 
return current_offset, self.current_offset - current_offset  | 
|
159  | 
||
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
160  | 
|
| 
2661.2.2
by Robert Collins
 * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack  | 
161  | 
class ReadVFile(object):  | 
162  | 
"""Adapt a readv result iterator to a file like protocol."""  | 
|
163  | 
||
164  | 
def __init__(self, readv_result):  | 
|
165  | 
self.readv_result = readv_result  | 
|
166  | 
        # the most recent readv result block
 | 
|
167  | 
self._string = None  | 
|
168  | 
||
169  | 
def _next(self):  | 
|
170  | 
if (self._string is None or  | 
|
171  | 
self._string.tell() == self._string_length):  | 
|
172  | 
length, data = self.readv_result.next()  | 
|
173  | 
self._string_length = len(data)  | 
|
174  | 
self._string = StringIO(data)  | 
|
175  | 
||
176  | 
def read(self, length):  | 
|
177  | 
self._next()  | 
|
178  | 
result = self._string.read(length)  | 
|
179  | 
if len(result) < length:  | 
|
180  | 
raise errors.BzrError('request for too much data from a readv hunk.')  | 
|
181  | 
return result  | 
|
182  | 
||
183  | 
def readline(self):  | 
|
184  | 
"""Note that readline will not cross readv segments."""  | 
|
185  | 
self._next()  | 
|
186  | 
result = self._string.readline()  | 
|
187  | 
if self._string.tell() == self._string_length and result[-1] != '\n':  | 
|
188  | 
raise errors.BzrError('short readline in the readvfile hunk.')  | 
|
189  | 
return result  | 
|
190  | 
||
191  | 
||
192  | 
def make_readv_reader(transport, filename, requested_records):  | 
|
193  | 
"""Create a ContainerReader that will read selected records only.  | 
|
194  | 
||
195  | 
    :param transport: The transport the pack file is located on.
 | 
|
196  | 
    :param filename: The filename of the pack file.
 | 
|
197  | 
    :param requested_records: The record offset, length tuples as returned
 | 
|
198  | 
        by add_bytes_record for the desired records.
 | 
|
199  | 
    """
 | 
|
200  | 
readv_blocks = [(0, len(FORMAT_ONE)+1)]  | 
|
201  | 
readv_blocks.extend(requested_records)  | 
|
202  | 
result = ContainerReader(ReadVFile(  | 
|
203  | 
transport.readv(filename, readv_blocks)))  | 
|
204  | 
return result  | 
|
205  | 
||
206  | 
||
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
207  | 
class BaseReader(object):  | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
208  | 
|
| 
2506.2.9
by Aaron Bentley
 Use file-like objects as container input, not callables  | 
209  | 
def __init__(self, source_file):  | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
210  | 
"""Constructor.  | 
211  | 
||
| 
2506.2.12
by Andrew Bennetts
 Update docstring for Aaron's changes.  | 
212  | 
        :param source_file: a file-like object with `read` and `readline`
 | 
213  | 
            methods.
 | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
214  | 
        """
 | 
| 
2506.2.9
by Aaron Bentley
 Use file-like objects as container input, not callables  | 
215  | 
self._source = source_file  | 
216  | 
||
217  | 
def reader_func(self, length=None):  | 
|
218  | 
return self._source.read(length)  | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
219  | 
|
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
220  | 
def _read_line(self):  | 
| 
2506.2.9
by Aaron Bentley
 Use file-like objects as container input, not callables  | 
221  | 
line = self._source.readline()  | 
222  | 
if not line.endswith('\n'):  | 
|
223  | 
raise errors.UnexpectedEndOfContainerError()  | 
|
224  | 
return line.rstrip('\n')  | 
|
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
225  | 
|
226  | 
||
227  | 
class ContainerReader(BaseReader):  | 
|
228  | 
"""A class for reading Bazaar's container format."""  | 
|
229  | 
||
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
230  | 
def iter_records(self):  | 
231  | 
"""Iterate over the container, yielding each record as it is read.  | 
|
232  | 
||
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
233  | 
        Each yielded record will be a 2-tuple of (names, callable), where names
 | 
234  | 
        is a ``list`` and bytes is a function that takes one argument,
 | 
|
235  | 
        ``max_length``.
 | 
|
236  | 
||
237  | 
        You **must not** call the callable after advancing the interator to the
 | 
|
238  | 
        next record.  That is, this code is invalid::
 | 
|
239  | 
||
240  | 
            record_iter = container.iter_records()
 | 
|
241  | 
            names1, callable1 = record_iter.next()
 | 
|
242  | 
            names2, callable2 = record_iter.next()
 | 
|
243  | 
            bytes1 = callable1(None)
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
244  | 
|
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
245  | 
        As it will give incorrect results and invalidate the state of the
 | 
246  | 
        ContainerReader.
 | 
|
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
247  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
248  | 
        :raises ContainerError: if any sort of containter corruption is
 | 
249  | 
            detected, e.g. UnknownContainerFormatError is the format of the
 | 
|
250  | 
            container is unrecognised.
 | 
|
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
251  | 
        :seealso: ContainerReader.read
 | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
252  | 
        """
 | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
253  | 
self._read_format()  | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
254  | 
return self._iter_records()  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
255  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
256  | 
def iter_record_objects(self):  | 
257  | 
"""Iterate over the container, yielding each record as it is read.  | 
|
258  | 
||
259  | 
        Each yielded record will be an object with ``read`` and ``validate``
 | 
|
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
260  | 
        methods.  Like with iter_records, it is not safe to use a record object
 | 
261  | 
        after advancing the iterator to yield next record.
 | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
262  | 
|
263  | 
        :raises ContainerError: if any sort of containter corruption is
 | 
|
264  | 
            detected, e.g. UnknownContainerFormatError is the format of the
 | 
|
265  | 
            container is unrecognised.
 | 
|
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
266  | 
        :seealso: iter_records
 | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
267  | 
        """
 | 
268  | 
self._read_format()  | 
|
269  | 
return self._iter_record_objects()  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
270  | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
271  | 
def _iter_records(self):  | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
272  | 
for record in self._iter_record_objects():  | 
273  | 
yield record.read()  | 
|
274  | 
||
275  | 
def _iter_record_objects(self):  | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
276  | 
while True:  | 
277  | 
record_kind = self.reader_func(1)  | 
|
278  | 
if record_kind == 'B':  | 
|
279  | 
                # Bytes record.
 | 
|
| 
2506.2.9
by Aaron Bentley
 Use file-like objects as container input, not callables  | 
280  | 
reader = BytesRecordReader(self._source)  | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
281  | 
yield reader  | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
282  | 
elif record_kind == 'E':  | 
283  | 
                # End marker.  There are no more records.
 | 
|
284  | 
                return
 | 
|
285  | 
elif record_kind == '':  | 
|
286  | 
                # End of stream encountered, but no End Marker record seen, so
 | 
|
287  | 
                # this container is incomplete.
 | 
|
288  | 
raise errors.UnexpectedEndOfContainerError()  | 
|
289  | 
else:  | 
|
290  | 
                # Unknown record type.
 | 
|
291  | 
raise errors.UnknownRecordTypeError(record_kind)  | 
|
292  | 
||
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
293  | 
def _read_format(self):  | 
294  | 
format = self._read_line()  | 
|
| 
2535.3.26
by Andrew Bennetts
 Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).  | 
295  | 
if format != FORMAT_ONE:  | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
296  | 
raise errors.UnknownContainerFormatError(format)  | 
297  | 
||
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
298  | 
def validate(self):  | 
299  | 
"""Validate this container and its records.  | 
|
300  | 
||
| 
2506.2.7
by Andrew Bennetts
 Change read/iter_records to return a callable, add more validation, and  | 
301  | 
        Validating consumes the data stream just like iter_records and
 | 
302  | 
        iter_record_objects, so you cannot call it after
 | 
|
303  | 
        iter_records/iter_record_objects.
 | 
|
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
304  | 
|
305  | 
        :raises ContainerError: if something is invalid.
 | 
|
306  | 
        """
 | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
307  | 
all_names = set()  | 
308  | 
for record_names, read_bytes in self.iter_records():  | 
|
309  | 
read_bytes(None)  | 
|
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
310  | 
for name_tuple in record_names:  | 
311  | 
for name in name_tuple:  | 
|
312  | 
_check_name_encoding(name)  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
313  | 
                # Check that the name is unique.  Note that Python will refuse
 | 
314  | 
                # to decode non-shortest forms of UTF-8 encoding, so there is no
 | 
|
315  | 
                # risk that the same unicode string has been encoded two
 | 
|
316  | 
                # different ways.
 | 
|
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
317  | 
if name_tuple in all_names:  | 
318  | 
raise errors.DuplicateRecordNameError(name_tuple)  | 
|
319  | 
all_names.add(name_tuple)  | 
|
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
320  | 
excess_bytes = self.reader_func(1)  | 
321  | 
if excess_bytes != '':  | 
|
322  | 
raise errors.ContainerHasExcessDataError(excess_bytes)  | 
|
323  | 
||
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
324  | 
|
325  | 
class BytesRecordReader(BaseReader):  | 
|
326  | 
||
327  | 
def read(self):  | 
|
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
328  | 
"""Read this record.  | 
329  | 
||
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
330  | 
        You can either validate or read a record, you can't do both.
 | 
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
331  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
332  | 
        :returns: A tuple of (names, callable).  The callable can be called
 | 
333  | 
            repeatedly to obtain the bytes for the record, with a max_length
 | 
|
334  | 
            argument.  If max_length is None, returns all the bytes.  Because
 | 
|
335  | 
            records can be arbitrarily large, using None is not recommended
 | 
|
336  | 
            unless you have reason to believe the content will fit in memory.
 | 
|
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
337  | 
        """
 | 
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
338  | 
        # Read the content length.
 | 
339  | 
length_line = self._read_line()  | 
|
340  | 
try:  | 
|
341  | 
length = int(length_line)  | 
|
342  | 
except ValueError:  | 
|
343  | 
raise errors.InvalidRecordError(  | 
|
344  | 
"%r is not a valid length." % (length_line,))  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
345  | 
|
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
346  | 
        # Read the list of names.
 | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
347  | 
names = []  | 
348  | 
while True:  | 
|
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
349  | 
name_line = self._read_line()  | 
350  | 
if name_line == '':  | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
351  | 
                break
 | 
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
352  | 
name_tuple = tuple(name_line.split('\x00'))  | 
353  | 
for name in name_tuple:  | 
|
354  | 
_check_name(name)  | 
|
355  | 
names.append(name_tuple)  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
356  | 
|
357  | 
self._remaining_length = length  | 
|
358  | 
return names, self._content_reader  | 
|
359  | 
||
360  | 
def _content_reader(self, max_length):  | 
|
361  | 
if max_length is None:  | 
|
362  | 
length_to_read = self._remaining_length  | 
|
363  | 
else:  | 
|
364  | 
length_to_read = min(max_length, self._remaining_length)  | 
|
365  | 
self._remaining_length -= length_to_read  | 
|
366  | 
bytes = self.reader_func(length_to_read)  | 
|
367  | 
if len(bytes) != length_to_read:  | 
|
| 
2506.3.3
by Andrew Bennetts
 Deal with EOF in the middle of a bytes record.  | 
368  | 
raise errors.UnexpectedEndOfContainerError()  | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
369  | 
return bytes  | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
370  | 
|
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
371  | 
def validate(self):  | 
372  | 
"""Validate this record.  | 
|
373  | 
||
374  | 
        You can either validate or read, you can't do both.
 | 
|
375  | 
||
376  | 
        :raises ContainerError: if this record is invalid.
 | 
|
377  | 
        """
 | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
378  | 
names, read_bytes = self.read()  | 
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
379  | 
for name_tuple in names:  | 
380  | 
for name in name_tuple:  | 
|
381  | 
_check_name_encoding(name)  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
382  | 
read_bytes(None)  | 
383  | 
||
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
384  | 
|
385  | 
class ContainerPushParser(object):  | 
|
| 
2916.2.14
by Andrew Bennetts
 Add a docstring.  | 
386  | 
"""A "push" parser for container format 1.  | 
387  | 
||
388  | 
    It accepts bytes via the ``accept_bytes`` method, and parses them into
 | 
|
389  | 
    records which can be retrieved via the ``read_pending_records`` method.
 | 
|
390  | 
    """
 | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
391  | 
|
392  | 
def __init__(self):  | 
|
393  | 
self._buffer = ''  | 
|
394  | 
self._state_handler = self._state_expecting_format_line  | 
|
395  | 
self._parsed_records = []  | 
|
396  | 
self._reset_current_record()  | 
|
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
397  | 
self.finished = False  | 
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
398  | 
|
399  | 
def _reset_current_record(self):  | 
|
400  | 
self._current_record_length = None  | 
|
401  | 
self._current_record_names = []  | 
|
402  | 
||
403  | 
def accept_bytes(self, bytes):  | 
|
404  | 
self._buffer += bytes  | 
|
405  | 
        # Keep iterating the state machine until it stops consuming bytes from
 | 
|
406  | 
        # the buffer.
 | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
407  | 
last_buffer_length = None  | 
408  | 
cur_buffer_length = len(self._buffer)  | 
|
409  | 
while cur_buffer_length != last_buffer_length:  | 
|
410  | 
last_buffer_length = cur_buffer_length  | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
411  | 
self._state_handler()  | 
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
412  | 
cur_buffer_length = len(self._buffer)  | 
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
413  | 
|
| 
4060.1.4
by Robert Collins
 Streaming fetch from remote servers.  | 
414  | 
def read_pending_records(self, max=None):  | 
415  | 
if max:  | 
|
416  | 
records = self._parsed_records[:max]  | 
|
417  | 
del self._parsed_records[:max]  | 
|
418  | 
return records  | 
|
419  | 
else:  | 
|
420  | 
records = self._parsed_records  | 
|
421  | 
self._parsed_records = []  | 
|
422  | 
return records  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
423  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
424  | 
def _consume_line(self):  | 
425  | 
"""Take a line out of the buffer, and return the line.  | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
426  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
427  | 
        If a newline byte is not found in the buffer, the buffer is
 | 
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
428  | 
        unchanged and this returns None instead.
 | 
429  | 
        """
 | 
|
430  | 
newline_pos = self._buffer.find('\n')  | 
|
431  | 
if newline_pos != -1:  | 
|
432  | 
line = self._buffer[:newline_pos]  | 
|
433  | 
self._buffer = self._buffer[newline_pos+1:]  | 
|
434  | 
return line  | 
|
435  | 
else:  | 
|
436  | 
return None  | 
|
437  | 
||
438  | 
def _state_expecting_format_line(self):  | 
|
439  | 
line = self._consume_line()  | 
|
440  | 
if line is not None:  | 
|
441  | 
if line != FORMAT_ONE:  | 
|
442  | 
raise errors.UnknownContainerFormatError(line)  | 
|
443  | 
self._state_handler = self._state_expecting_record_type  | 
|
444  | 
||
445  | 
def _state_expecting_record_type(self):  | 
|
446  | 
if len(self._buffer) >= 1:  | 
|
447  | 
record_type = self._buffer[0]  | 
|
448  | 
self._buffer = self._buffer[1:]  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
449  | 
if record_type == 'B':  | 
450  | 
self._state_handler = self._state_expecting_length  | 
|
451  | 
elif record_type == 'E':  | 
|
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
452  | 
self.finished = True  | 
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
453  | 
self._state_handler = self._state_expecting_nothing  | 
454  | 
else:  | 
|
455  | 
raise errors.UnknownRecordTypeError(record_type)  | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
456  | 
|
457  | 
def _state_expecting_length(self):  | 
|
458  | 
line = self._consume_line()  | 
|
459  | 
if line is not None:  | 
|
460  | 
try:  | 
|
461  | 
self._current_record_length = int(line)  | 
|
462  | 
except ValueError:  | 
|
463  | 
raise errors.InvalidRecordError(  | 
|
464  | 
"%r is not a valid length." % (line,))  | 
|
465  | 
self._state_handler = self._state_expecting_name  | 
|
466  | 
||
467  | 
def _state_expecting_name(self):  | 
|
468  | 
encoded_name_parts = self._consume_line()  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
469  | 
if encoded_name_parts == '':  | 
470  | 
self._state_handler = self._state_expecting_body  | 
|
471  | 
elif encoded_name_parts:  | 
|
472  | 
name_parts = tuple(encoded_name_parts.split('\x00'))  | 
|
473  | 
for name_part in name_parts:  | 
|
474  | 
_check_name(name_part)  | 
|
475  | 
self._current_record_names.append(name_parts)  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
476  | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
477  | 
def _state_expecting_body(self):  | 
478  | 
if len(self._buffer) >= self._current_record_length:  | 
|
479  | 
body_bytes = self._buffer[:self._current_record_length]  | 
|
480  | 
self._buffer = self._buffer[self._current_record_length:]  | 
|
481  | 
record = (self._current_record_names, body_bytes)  | 
|
482  | 
self._parsed_records.append(record)  | 
|
483  | 
self._reset_current_record()  | 
|
484  | 
self._state_handler = self._state_expecting_record_type  | 
|
485  | 
||
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
486  | 
def _state_expecting_nothing(self):  | 
487  | 
        pass
 | 
|
488  | 
||
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
489  | 
def read_size_hint(self):  | 
490  | 
hint = 16384  | 
|
491  | 
if self._state_handler == self._state_expecting_body:  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
492  | 
remaining = self._current_record_length - len(self._buffer)  | 
493  | 
if remaining < 0:  | 
|
494  | 
remaining = 0  | 
|
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
495  | 
return max(hint, remaining)  | 
496  | 
return hint  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
497  | 
|
498  | 
||
499  | 
def iter_records_from_file(source_file):  | 
|
500  | 
parser = ContainerPushParser()  | 
|
501  | 
while True:  | 
|
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
502  | 
bytes = source_file.read(parser.read_size_hint())  | 
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
503  | 
parser.accept_bytes(bytes)  | 
504  | 
for record in parser.read_pending_records():  | 
|
505  | 
yield record  | 
|
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
506  | 
if parser.finished:  | 
507  | 
            break
 | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
508  |