bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
5089.1.1
by Martin Pool
 Fix typo in ReadVFile.readline (thanks mnordhoff)  | 
1  | 
# Copyright (C) 2007, 2009, 2010 Canonical Ltd
 | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
2  | 
#
 | 
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
| 
4183.7.1
by Sabin Iacob
 update FSF mailing address  | 
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
16  | 
|
17  | 
"""Container format for Bazaar data.
 | 
|
18  | 
||
| 
2916.2.13
by Andrew Bennetts
 Improve some docstrings.  | 
19  | 
"Containers" and "records" are described in
 | 
20  | 
doc/developers/container-format.txt.
 | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
21  | 
"""
 | 
22  | 
||
| 
2661.2.2
by Robert Collins
 * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack  | 
23  | 
from cStringIO import StringIO  | 
| 
2506.5.2
by Andrew Bennetts
 Raise InvalidRecordError on invalid names.  | 
24  | 
import re  | 
25  | 
||
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
26  | 
from bzrlib import errors  | 
27  | 
||
28  | 
||
| 
2535.3.26
by Andrew Bennetts
 Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).  | 
29  | 
FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)"  | 
30  | 
||
31  | 
||
32  | 
_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]')  | 
|
| 
2506.5.2
by Andrew Bennetts
 Raise InvalidRecordError on invalid names.  | 
33  | 
|
34  | 
||
35  | 
def _check_name(name):  | 
|
36  | 
"""Do some basic checking of 'name'.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
37  | 
|
| 
2535.3.26
by Andrew Bennetts
 Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).  | 
38  | 
    At the moment, this just checks that there are no whitespace characters in a
 | 
39  | 
    name.
 | 
|
| 
2506.5.2
by Andrew Bennetts
 Raise InvalidRecordError on invalid names.  | 
40  | 
|
41  | 
    :raises InvalidRecordError: if name is not valid.
 | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
42  | 
    :seealso: _check_name_encoding
 | 
| 
2506.5.2
by Andrew Bennetts
 Raise InvalidRecordError on invalid names.  | 
43  | 
    """
 | 
| 
2535.3.26
by Andrew Bennetts
 Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).  | 
44  | 
if _whitespace_re.search(name) is not None:  | 
| 
2506.5.2
by Andrew Bennetts
 Raise InvalidRecordError on invalid names.  | 
45  | 
raise errors.InvalidRecordError("%r is not a valid name." % (name,))  | 
46  | 
||
47  | 
||
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
48  | 
def _check_name_encoding(name):  | 
49  | 
"""Check that 'name' is valid UTF-8.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
50  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
51  | 
    This is separate from _check_name because UTF-8 decoding is relatively
 | 
52  | 
    expensive, and we usually want to avoid it.
 | 
|
53  | 
||
54  | 
    :raises InvalidRecordError: if name is not valid UTF-8.
 | 
|
55  | 
    """
 | 
|
56  | 
try:  | 
|
57  | 
name.decode('utf-8')  | 
|
58  | 
except UnicodeDecodeError, e:  | 
|
59  | 
raise errors.InvalidRecordError(str(e))  | 
|
60  | 
||
61  | 
||
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
62  | 
class ContainerSerialiser(object):  | 
| 
2916.2.6
by Andrew Bennetts
 Better docstrings.  | 
63  | 
"""A helper class for serialising containers.  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
64  | 
|
| 
2916.2.6
by Andrew Bennetts
 Better docstrings.  | 
65  | 
    It simply returns bytes from method calls to 'begin', 'end' and
 | 
66  | 
    'bytes_record'.  You may find ContainerWriter to be a more convenient
 | 
|
67  | 
    interface.
 | 
|
68  | 
    """
 | 
|
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
69  | 
|
70  | 
def begin(self):  | 
|
| 
2916.2.6
by Andrew Bennetts
 Better docstrings.  | 
71  | 
"""Return the bytes to begin a container."""  | 
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
72  | 
return FORMAT_ONE + "\n"  | 
73  | 
||
74  | 
def end(self):  | 
|
| 
2916.2.6
by Andrew Bennetts
 Better docstrings.  | 
75  | 
"""Return the bytes to finish a container."""  | 
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
76  | 
return "E"  | 
77  | 
||
78  | 
def bytes_record(self, bytes, names):  | 
|
| 
2916.2.6
by Andrew Bennetts
 Better docstrings.  | 
79  | 
"""Return the bytes for a Bytes record with the given name and  | 
80  | 
        contents.
 | 
|
81  | 
        """
 | 
|
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
82  | 
        # Kind marker
 | 
83  | 
byte_sections = ["B"]  | 
|
84  | 
        # Length
 | 
|
85  | 
byte_sections.append(str(len(bytes)) + "\n")  | 
|
86  | 
        # Names
 | 
|
87  | 
for name_tuple in names:  | 
|
88  | 
            # Make sure we're writing valid names.  Note that we will leave a
 | 
|
89  | 
            # half-written record if a name is bad!
 | 
|
90  | 
for name in name_tuple:  | 
|
91  | 
_check_name(name)  | 
|
92  | 
byte_sections.append('\x00'.join(name_tuple) + "\n")  | 
|
93  | 
        # End of headers
 | 
|
94  | 
byte_sections.append("\n")  | 
|
95  | 
        # Finally, the contents.
 | 
|
96  | 
byte_sections.append(bytes)  | 
|
97  | 
        # XXX: This causes a memory copy of bytes in size, but is usually
 | 
|
98  | 
        # faster than two write calls (12 vs 13 seconds to output a gig of
 | 
|
99  | 
        # 1k records.) - results may differ on significantly larger records
 | 
|
100  | 
        # like .iso's but as they should be rare in any case and thus not
 | 
|
101  | 
        # likely to be the common case. The biggest issue is causing extreme
 | 
|
102  | 
        # memory pressure in that case. One possibly improvement here is to
 | 
|
103  | 
        # check the size of the content before deciding to join here vs call
 | 
|
104  | 
        # write twice.
 | 
|
105  | 
return ''.join(byte_sections)  | 
|
106  | 
||
107  | 
||
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
108  | 
class ContainerWriter(object):  | 
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
109  | 
"""A class for writing containers to a file.  | 
| 
2698.1.1
by Robert Collins
 Add records_written attribute to ContainerWriter's. (Robert Collins).  | 
110  | 
|
111  | 
    :attribute records_written: The number of user records added to the
 | 
|
112  | 
        container. This does not count the prelude or suffix of the container
 | 
|
113  | 
        introduced by the begin() and end() methods.
 | 
|
114  | 
    """
 | 
|
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
115  | 
|
116  | 
def __init__(self, write_func):  | 
|
117  | 
"""Constructor.  | 
|
118  | 
||
119  | 
        :param write_func: a callable that will be called when this
 | 
|
120  | 
            ContainerWriter needs to write some bytes.
 | 
|
121  | 
        """
 | 
|
| 
2661.2.1
by Robert Collins
 * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to  | 
122  | 
self._write_func = write_func  | 
123  | 
self.current_offset = 0  | 
|
| 
2698.1.1
by Robert Collins
 Add records_written attribute to ContainerWriter's. (Robert Collins).  | 
124  | 
self.records_written = 0  | 
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
125  | 
self._serialiser = ContainerSerialiser()  | 
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
126  | 
|
127  | 
def begin(self):  | 
|
128  | 
"""Begin writing a container."""  | 
|
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
129  | 
self.write_func(self._serialiser.begin())  | 
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
130  | 
|
| 
2661.2.1
by Robert Collins
 * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to  | 
131  | 
def write_func(self, bytes):  | 
132  | 
self._write_func(bytes)  | 
|
133  | 
self.current_offset += len(bytes)  | 
|
134  | 
||
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
135  | 
def end(self):  | 
136  | 
"""Finish writing a container."""  | 
|
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
137  | 
self.write_func(self._serialiser.end())  | 
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
138  | 
|
139  | 
def add_bytes_record(self, bytes, names):  | 
|
| 
2661.2.1
by Robert Collins
 * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to  | 
140  | 
"""Add a Bytes record with the given names.  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
141  | 
|
| 
2661.2.1
by Robert Collins
 * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to  | 
142  | 
        :param bytes: The bytes to insert.
 | 
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
143  | 
        :param names: The names to give the inserted bytes. Each name is
 | 
144  | 
            a tuple of bytestrings. The bytestrings may not contain
 | 
|
145  | 
            whitespace.
 | 
|
| 
2661.2.1
by Robert Collins
 * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to  | 
146  | 
        :return: An offset, length tuple. The offset is the offset
 | 
147  | 
            of the record within the container, and the length is the
 | 
|
148  | 
            length of data that will need to be read to reconstitute the
 | 
|
149  | 
            record. These offset and length can only be used with the pack
 | 
|
150  | 
            interface - they might be offset by headers or other such details
 | 
|
151  | 
            and thus are only suitable for use by a ContainerReader.
 | 
|
152  | 
        """
 | 
|
153  | 
current_offset = self.current_offset  | 
|
| 
2916.2.5
by Andrew Bennetts
 Extract a ContainerSerialiser class from ContainerWriter.  | 
154  | 
serialised_record = self._serialiser.bytes_record(bytes, names)  | 
| 
2916.2.4
by Andrew Bennetts
 Extract a _serialise_byte_records function.  | 
155  | 
self.write_func(serialised_record)  | 
156  | 
self.records_written += 1  | 
|
157  | 
        # return a memo of where we wrote data to allow random access.
 | 
|
158  | 
return current_offset, self.current_offset - current_offset  | 
|
159  | 
||
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
160  | 
|
| 
2661.2.2
by Robert Collins
 * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack  | 
161  | 
class ReadVFile(object):  | 
| 
4491.2.1
by Martin Pool
 Clearer documentation and variable name in ReadVFile  | 
162  | 
"""Adapt a readv result iterator to a file like protocol.  | 
163  | 
    
 | 
|
164  | 
    The readv result must support the iterator protocol returning (offset,
 | 
|
165  | 
    data_bytes) pairs.
 | 
|
166  | 
    """
 | 
|
167  | 
||
168  | 
    # XXX: This could be a generic transport class, as other code may want to
 | 
|
169  | 
    # gradually consume the readv result.
 | 
|
| 
2661.2.2
by Robert Collins
 * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack  | 
170  | 
|
171  | 
def __init__(self, readv_result):  | 
|
| 
4491.2.4
by Martin Pool
 ReadVFile copes if readv result isn't an iter; also better errors  | 
172  | 
"""Construct a new ReadVFile wrapper.  | 
173  | 
||
174  | 
        :seealso: make_readv_reader
 | 
|
175  | 
||
176  | 
        :param readv_result: the most recent readv result - list or generator
 | 
|
177  | 
        """
 | 
|
| 
4491.2.8
by Martin Pool
 iter(i) returns i so we don't need a check  | 
178  | 
        # readv can return a sequence or an iterator, but we require an
 | 
179  | 
        # iterator to know how much has been consumed.
 | 
|
180  | 
readv_result = iter(readv_result)  | 
|
| 
2661.2.2
by Robert Collins
 * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack  | 
181  | 
self.readv_result = readv_result  | 
182  | 
self._string = None  | 
|
183  | 
||
184  | 
def _next(self):  | 
|
185  | 
if (self._string is None or  | 
|
186  | 
self._string.tell() == self._string_length):  | 
|
| 
4491.2.1
by Martin Pool
 Clearer documentation and variable name in ReadVFile  | 
187  | 
offset, data = self.readv_result.next()  | 
| 
2661.2.2
by Robert Collins
 * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack  | 
188  | 
self._string_length = len(data)  | 
189  | 
self._string = StringIO(data)  | 
|
190  | 
||
191  | 
def read(self, length):  | 
|
192  | 
self._next()  | 
|
193  | 
result = self._string.read(length)  | 
|
194  | 
if len(result) < length:  | 
|
| 
4491.2.4
by Martin Pool
 ReadVFile copes if readv result isn't an iter; also better errors  | 
195  | 
raise errors.BzrError('wanted %d bytes but next '  | 
196  | 
'hunk only contains %d: %r...' %  | 
|
197  | 
(length, len(result), result[:20]))  | 
|
| 
2661.2.2
by Robert Collins
 * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack  | 
198  | 
return result  | 
199  | 
||
200  | 
def readline(self):  | 
|
201  | 
"""Note that readline will not cross readv segments."""  | 
|
202  | 
self._next()  | 
|
203  | 
result = self._string.readline()  | 
|
204  | 
if self._string.tell() == self._string_length and result[-1] != '\n':  | 
|
| 
4491.2.4
by Martin Pool
 ReadVFile copes if readv result isn't an iter; also better errors  | 
205  | 
raise errors.BzrError('short readline in the readvfile hunk: %r'  | 
| 
5089.1.1
by Martin Pool
 Fix typo in ReadVFile.readline (thanks mnordhoff)  | 
206  | 
% (result, ))  | 
| 
2661.2.2
by Robert Collins
 * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack  | 
207  | 
return result  | 
208  | 
||
209  | 
||
210  | 
def make_readv_reader(transport, filename, requested_records):  | 
|
211  | 
"""Create a ContainerReader that will read selected records only.  | 
|
212  | 
||
213  | 
    :param transport: The transport the pack file is located on.
 | 
|
214  | 
    :param filename: The filename of the pack file.
 | 
|
215  | 
    :param requested_records: The record offset, length tuples as returned
 | 
|
216  | 
        by add_bytes_record for the desired records.
 | 
|
217  | 
    """
 | 
|
218  | 
readv_blocks = [(0, len(FORMAT_ONE)+1)]  | 
|
219  | 
readv_blocks.extend(requested_records)  | 
|
220  | 
result = ContainerReader(ReadVFile(  | 
|
221  | 
transport.readv(filename, readv_blocks)))  | 
|
222  | 
return result  | 
|
223  | 
||
224  | 
||
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
225  | 
class BaseReader(object):  | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
226  | 
|
| 
2506.2.9
by Aaron Bentley
 Use file-like objects as container input, not callables  | 
227  | 
def __init__(self, source_file):  | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
228  | 
"""Constructor.  | 
229  | 
||
| 
2506.2.12
by Andrew Bennetts
 Update docstring for Aaron's changes.  | 
230  | 
        :param source_file: a file-like object with `read` and `readline`
 | 
231  | 
            methods.
 | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
232  | 
        """
 | 
| 
2506.2.9
by Aaron Bentley
 Use file-like objects as container input, not callables  | 
233  | 
self._source = source_file  | 
234  | 
||
235  | 
def reader_func(self, length=None):  | 
|
236  | 
return self._source.read(length)  | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
237  | 
|
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
238  | 
def _read_line(self):  | 
| 
2506.2.9
by Aaron Bentley
 Use file-like objects as container input, not callables  | 
239  | 
line = self._source.readline()  | 
240  | 
if not line.endswith('\n'):  | 
|
241  | 
raise errors.UnexpectedEndOfContainerError()  | 
|
242  | 
return line.rstrip('\n')  | 
|
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
243  | 
|
244  | 
||
245  | 
class ContainerReader(BaseReader):  | 
|
246  | 
"""A class for reading Bazaar's container format."""  | 
|
247  | 
||
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
248  | 
def iter_records(self):  | 
249  | 
"""Iterate over the container, yielding each record as it is read.  | 
|
250  | 
||
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
251  | 
        Each yielded record will be a 2-tuple of (names, callable), where names
 | 
252  | 
        is a ``list`` and bytes is a function that takes one argument,
 | 
|
253  | 
        ``max_length``.
 | 
|
254  | 
||
| 
4031.3.1
by Frank Aspell
 Fixing various typos  | 
255  | 
        You **must not** call the callable after advancing the iterator to the
 | 
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
256  | 
        next record.  That is, this code is invalid::
 | 
257  | 
||
258  | 
            record_iter = container.iter_records()
 | 
|
259  | 
            names1, callable1 = record_iter.next()
 | 
|
260  | 
            names2, callable2 = record_iter.next()
 | 
|
261  | 
            bytes1 = callable1(None)
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
262  | 
|
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
263  | 
        As it will give incorrect results and invalidate the state of the
 | 
264  | 
        ContainerReader.
 | 
|
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
265  | 
|
| 
4031.3.1
by Frank Aspell
 Fixing various typos  | 
266  | 
        :raises ContainerError: if any sort of container corruption is
 | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
267  | 
            detected, e.g. UnknownContainerFormatError is the format of the
 | 
268  | 
            container is unrecognised.
 | 
|
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
269  | 
        :seealso: ContainerReader.read
 | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
270  | 
        """
 | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
271  | 
self._read_format()  | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
272  | 
return self._iter_records()  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
273  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
274  | 
def iter_record_objects(self):  | 
275  | 
"""Iterate over the container, yielding each record as it is read.  | 
|
276  | 
||
277  | 
        Each yielded record will be an object with ``read`` and ``validate``
 | 
|
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
278  | 
        methods.  Like with iter_records, it is not safe to use a record object
 | 
279  | 
        after advancing the iterator to yield next record.
 | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
280  | 
|
| 
4031.3.1
by Frank Aspell
 Fixing various typos  | 
281  | 
        :raises ContainerError: if any sort of container corruption is
 | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
282  | 
            detected, e.g. UnknownContainerFormatError is the format of the
 | 
283  | 
            container is unrecognised.
 | 
|
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
284  | 
        :seealso: iter_records
 | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
285  | 
        """
 | 
286  | 
self._read_format()  | 
|
287  | 
return self._iter_record_objects()  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
288  | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
289  | 
def _iter_records(self):  | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
290  | 
for record in self._iter_record_objects():  | 
291  | 
yield record.read()  | 
|
292  | 
||
293  | 
def _iter_record_objects(self):  | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
294  | 
while True:  | 
295  | 
record_kind = self.reader_func(1)  | 
|
296  | 
if record_kind == 'B':  | 
|
297  | 
                # Bytes record.
 | 
|
| 
2506.2.9
by Aaron Bentley
 Use file-like objects as container input, not callables  | 
298  | 
reader = BytesRecordReader(self._source)  | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
299  | 
yield reader  | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
300  | 
elif record_kind == 'E':  | 
301  | 
                # End marker.  There are no more records.
 | 
|
302  | 
                return
 | 
|
303  | 
elif record_kind == '':  | 
|
304  | 
                # End of stream encountered, but no End Marker record seen, so
 | 
|
305  | 
                # this container is incomplete.
 | 
|
306  | 
raise errors.UnexpectedEndOfContainerError()  | 
|
307  | 
else:  | 
|
308  | 
                # Unknown record type.
 | 
|
309  | 
raise errors.UnknownRecordTypeError(record_kind)  | 
|
310  | 
||
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
311  | 
def _read_format(self):  | 
312  | 
format = self._read_line()  | 
|
| 
2535.3.26
by Andrew Bennetts
 Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).  | 
313  | 
if format != FORMAT_ONE:  | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
314  | 
raise errors.UnknownContainerFormatError(format)  | 
315  | 
||
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
316  | 
def validate(self):  | 
317  | 
"""Validate this container and its records.  | 
|
318  | 
||
| 
2506.2.7
by Andrew Bennetts
 Change read/iter_records to return a callable, add more validation, and  | 
319  | 
        Validating consumes the data stream just like iter_records and
 | 
320  | 
        iter_record_objects, so you cannot call it after
 | 
|
321  | 
        iter_records/iter_record_objects.
 | 
|
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
322  | 
|
323  | 
        :raises ContainerError: if something is invalid.
 | 
|
324  | 
        """
 | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
325  | 
all_names = set()  | 
326  | 
for record_names, read_bytes in self.iter_records():  | 
|
327  | 
read_bytes(None)  | 
|
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
328  | 
for name_tuple in record_names:  | 
329  | 
for name in name_tuple:  | 
|
330  | 
_check_name_encoding(name)  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
331  | 
                # Check that the name is unique.  Note that Python will refuse
 | 
332  | 
                # to decode non-shortest forms of UTF-8 encoding, so there is no
 | 
|
333  | 
                # risk that the same unicode string has been encoded two
 | 
|
334  | 
                # different ways.
 | 
|
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
335  | 
if name_tuple in all_names:  | 
336  | 
raise errors.DuplicateRecordNameError(name_tuple)  | 
|
337  | 
all_names.add(name_tuple)  | 
|
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
338  | 
excess_bytes = self.reader_func(1)  | 
339  | 
if excess_bytes != '':  | 
|
340  | 
raise errors.ContainerHasExcessDataError(excess_bytes)  | 
|
341  | 
||
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
342  | 
|
343  | 
class BytesRecordReader(BaseReader):  | 
|
344  | 
||
345  | 
def read(self):  | 
|
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
346  | 
"""Read this record.  | 
347  | 
||
| 
2506.6.2
by Andrew Bennetts
 Docstring improvements.  | 
348  | 
        You can either validate or read a record, you can't do both.
 | 
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
349  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
350  | 
        :returns: A tuple of (names, callable).  The callable can be called
 | 
351  | 
            repeatedly to obtain the bytes for the record, with a max_length
 | 
|
352  | 
            argument.  If max_length is None, returns all the bytes.  Because
 | 
|
353  | 
            records can be arbitrarily large, using None is not recommended
 | 
|
354  | 
            unless you have reason to believe the content will fit in memory.
 | 
|
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
355  | 
        """
 | 
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
356  | 
        # Read the content length.
 | 
357  | 
length_line = self._read_line()  | 
|
358  | 
try:  | 
|
359  | 
length = int(length_line)  | 
|
360  | 
except ValueError:  | 
|
361  | 
raise errors.InvalidRecordError(  | 
|
362  | 
"%r is not a valid length." % (length_line,))  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
363  | 
|
| 
2506.3.1
by Andrew Bennetts
 More progress:  | 
364  | 
        # Read the list of names.
 | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
365  | 
names = []  | 
366  | 
while True:  | 
|
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
367  | 
name_line = self._read_line()  | 
368  | 
if name_line == '':  | 
|
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
369  | 
                break
 | 
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
370  | 
name_tuple = tuple(name_line.split('\x00'))  | 
371  | 
for name in name_tuple:  | 
|
372  | 
_check_name(name)  | 
|
373  | 
names.append(name_tuple)  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
374  | 
|
375  | 
self._remaining_length = length  | 
|
376  | 
return names, self._content_reader  | 
|
377  | 
||
378  | 
def _content_reader(self, max_length):  | 
|
379  | 
if max_length is None:  | 
|
380  | 
length_to_read = self._remaining_length  | 
|
381  | 
else:  | 
|
382  | 
length_to_read = min(max_length, self._remaining_length)  | 
|
383  | 
self._remaining_length -= length_to_read  | 
|
384  | 
bytes = self.reader_func(length_to_read)  | 
|
385  | 
if len(bytes) != length_to_read:  | 
|
| 
2506.3.3
by Andrew Bennetts
 Deal with EOF in the middle of a bytes record.  | 
386  | 
raise errors.UnexpectedEndOfContainerError()  | 
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
387  | 
return bytes  | 
| 
2506.2.1
by Andrew Bennetts
 Start implementing container format reading and writing.  | 
388  | 
|
| 
2506.2.6
by Andrew Bennetts
 Add validate method to ContainerReader and BytesRecordReader.  | 
389  | 
def validate(self):  | 
390  | 
"""Validate this record.  | 
|
391  | 
||
392  | 
        You can either validate or read, you can't do both.
 | 
|
393  | 
||
394  | 
        :raises ContainerError: if this record is invalid.
 | 
|
395  | 
        """
 | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
396  | 
names, read_bytes = self.read()  | 
| 
2682.1.1
by Robert Collins
 * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings  | 
397  | 
for name_tuple in names:  | 
398  | 
for name in name_tuple:  | 
|
399  | 
_check_name_encoding(name)  | 
|
| 
2506.6.1
by Andrew Bennetts
 Return a callable instead of a str from read, and add more validation.  | 
400  | 
read_bytes(None)  | 
401  | 
||
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
402  | 
|
403  | 
class ContainerPushParser(object):  | 
|
| 
2916.2.14
by Andrew Bennetts
 Add a docstring.  | 
404  | 
"""A "push" parser for container format 1.  | 
405  | 
||
406  | 
    It accepts bytes via the ``accept_bytes`` method, and parses them into
 | 
|
407  | 
    records which can be retrieved via the ``read_pending_records`` method.
 | 
|
408  | 
    """
 | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
409  | 
|
410  | 
def __init__(self):  | 
|
411  | 
self._buffer = ''  | 
|
412  | 
self._state_handler = self._state_expecting_format_line  | 
|
413  | 
self._parsed_records = []  | 
|
414  | 
self._reset_current_record()  | 
|
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
415  | 
self.finished = False  | 
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
416  | 
|
417  | 
def _reset_current_record(self):  | 
|
418  | 
self._current_record_length = None  | 
|
419  | 
self._current_record_names = []  | 
|
420  | 
||
421  | 
def accept_bytes(self, bytes):  | 
|
422  | 
self._buffer += bytes  | 
|
423  | 
        # Keep iterating the state machine until it stops consuming bytes from
 | 
|
424  | 
        # the buffer.
 | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
425  | 
last_buffer_length = None  | 
426  | 
cur_buffer_length = len(self._buffer)  | 
|
| 
4464.1.1
by Aaron Bentley
 ContainerPushParser.accept_bytes handles zero-length records correctly.  | 
427  | 
last_state_handler = None  | 
428  | 
while (cur_buffer_length != last_buffer_length  | 
|
429  | 
or last_state_handler != self._state_handler):  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
430  | 
last_buffer_length = cur_buffer_length  | 
| 
4464.1.1
by Aaron Bentley
 ContainerPushParser.accept_bytes handles zero-length records correctly.  | 
431  | 
last_state_handler = self._state_handler  | 
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
432  | 
self._state_handler()  | 
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
433  | 
cur_buffer_length = len(self._buffer)  | 
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
434  | 
|
| 
4060.1.4
by Robert Collins
 Streaming fetch from remote servers.  | 
435  | 
def read_pending_records(self, max=None):  | 
436  | 
if max:  | 
|
437  | 
records = self._parsed_records[:max]  | 
|
438  | 
del self._parsed_records[:max]  | 
|
439  | 
return records  | 
|
440  | 
else:  | 
|
441  | 
records = self._parsed_records  | 
|
442  | 
self._parsed_records = []  | 
|
443  | 
return records  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
444  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
445  | 
def _consume_line(self):  | 
446  | 
"""Take a line out of the buffer, and return the line.  | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
447  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
448  | 
        If a newline byte is not found in the buffer, the buffer is
 | 
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
449  | 
        unchanged and this returns None instead.
 | 
450  | 
        """
 | 
|
451  | 
newline_pos = self._buffer.find('\n')  | 
|
452  | 
if newline_pos != -1:  | 
|
453  | 
line = self._buffer[:newline_pos]  | 
|
454  | 
self._buffer = self._buffer[newline_pos+1:]  | 
|
455  | 
return line  | 
|
456  | 
else:  | 
|
457  | 
return None  | 
|
458  | 
||
459  | 
def _state_expecting_format_line(self):  | 
|
460  | 
line = self._consume_line()  | 
|
461  | 
if line is not None:  | 
|
462  | 
if line != FORMAT_ONE:  | 
|
463  | 
raise errors.UnknownContainerFormatError(line)  | 
|
464  | 
self._state_handler = self._state_expecting_record_type  | 
|
465  | 
||
466  | 
def _state_expecting_record_type(self):  | 
|
467  | 
if len(self._buffer) >= 1:  | 
|
468  | 
record_type = self._buffer[0]  | 
|
469  | 
self._buffer = self._buffer[1:]  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
470  | 
if record_type == 'B':  | 
471  | 
self._state_handler = self._state_expecting_length  | 
|
472  | 
elif record_type == 'E':  | 
|
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
473  | 
self.finished = True  | 
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
474  | 
self._state_handler = self._state_expecting_nothing  | 
475  | 
else:  | 
|
476  | 
raise errors.UnknownRecordTypeError(record_type)  | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
477  | 
|
478  | 
def _state_expecting_length(self):  | 
|
479  | 
line = self._consume_line()  | 
|
480  | 
if line is not None:  | 
|
481  | 
try:  | 
|
482  | 
self._current_record_length = int(line)  | 
|
483  | 
except ValueError:  | 
|
484  | 
raise errors.InvalidRecordError(  | 
|
485  | 
"%r is not a valid length." % (line,))  | 
|
486  | 
self._state_handler = self._state_expecting_name  | 
|
487  | 
||
488  | 
def _state_expecting_name(self):  | 
|
489  | 
encoded_name_parts = self._consume_line()  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
490  | 
if encoded_name_parts == '':  | 
491  | 
self._state_handler = self._state_expecting_body  | 
|
492  | 
elif encoded_name_parts:  | 
|
493  | 
name_parts = tuple(encoded_name_parts.split('\x00'))  | 
|
494  | 
for name_part in name_parts:  | 
|
495  | 
_check_name(name_part)  | 
|
496  | 
self._current_record_names.append(name_parts)  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
497  | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
498  | 
def _state_expecting_body(self):  | 
499  | 
if len(self._buffer) >= self._current_record_length:  | 
|
500  | 
body_bytes = self._buffer[:self._current_record_length]  | 
|
501  | 
self._buffer = self._buffer[self._current_record_length:]  | 
|
502  | 
record = (self._current_record_names, body_bytes)  | 
|
503  | 
self._parsed_records.append(record)  | 
|
504  | 
self._reset_current_record()  | 
|
505  | 
self._state_handler = self._state_expecting_record_type  | 
|
506  | 
||
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
507  | 
def _state_expecting_nothing(self):  | 
508  | 
        pass
 | 
|
509  | 
||
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
510  | 
def read_size_hint(self):  | 
511  | 
hint = 16384  | 
|
512  | 
if self._state_handler == self._state_expecting_body:  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
513  | 
remaining = self._current_record_length - len(self._buffer)  | 
514  | 
if remaining < 0:  | 
|
515  | 
remaining = 0  | 
|
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
516  | 
return max(hint, remaining)  | 
517  | 
return hint  | 
|
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
518  | 
|
519  | 
||
520  | 
def iter_records_from_file(source_file):  | 
|
521  | 
parser = ContainerPushParser()  | 
|
522  | 
while True:  | 
|
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
523  | 
bytes = source_file.read(parser.read_size_hint())  | 
| 
2916.2.8
by Andrew Bennetts
 Add bzrlib.pack.iter_records_from_file.  | 
524  | 
parser.accept_bytes(bytes)  | 
525  | 
for record in parser.read_pending_records():  | 
|
526  | 
yield record  | 
|
| 
2916.2.10
by Andrew Bennetts
 Simpler iter_records_from_file implementation.  | 
527  | 
if parser.finished:  | 
528  | 
            break
 | 
|
| 
2916.2.1
by Andrew Bennetts
 Initial implementation of a 'push' parser for the container format.  | 
529  |