20
20
doc/developers/container-format.txt.
23
from cStringIO import StringIO
23
from __future__ import absolute_import
26
from bzrlib import errors
29
FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)"
32
_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]')
28
from ..sixish import (
33
FORMAT_ONE = b"Bazaar pack format 1 (introduced in 0.18)"
36
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r ]')
35
39
def _check_name(name):
71
75
"""Return the bytes to begin a container."""
72
return FORMAT_ONE + "\n"
76
return FORMAT_ONE + b"\n"
75
79
"""Return the bytes to finish a container."""
78
def bytes_record(self, bytes, names):
79
"""Return the bytes for a Bytes record with the given name and
82
def bytes_header(self, length, names):
83
"""Return the header for a Bytes record."""
85
byte_sections = [b"B"]
85
byte_sections.append(str(len(bytes)) + "\n")
87
byte_sections.append(b"%d\n" % (length,))
87
89
for name_tuple in names:
88
90
# Make sure we're writing valid names. Note that we will leave a
89
91
# half-written record if a name is bad!
90
92
for name in name_tuple:
92
byte_sections.append('\x00'.join(name_tuple) + "\n")
94
byte_sections.append(b'\x00'.join(name_tuple) + b"\n")
94
byte_sections.append("\n")
95
# Finally, the contents.
96
byte_sections.append(bytes)
97
# XXX: This causes a memory copy of bytes in size, but is usually
98
# faster than two write calls (12 vs 13 seconds to output a gig of
99
# 1k records.) - results may differ on significantly larger records
100
# like .iso's but as they should be rare in any case and thus not
101
# likely to be the common case. The biggest issue is causing extreme
102
# memory pressure in that case. One possibly improvement here is to
103
# check the size of the content before deciding to join here vs call
105
return ''.join(byte_sections)
96
byte_sections.append(b"\n")
97
return b''.join(byte_sections)
99
def bytes_record(self, bytes, names):
100
"""Return the bytes for a Bytes record with the given name and
103
If the content may be large, construct the header separately and then
104
stream out the contents.
106
return self.bytes_header(len(bytes), names) + bytes
108
109
class ContainerWriter(object):
113
114
introduced by the begin() and end() methods.
117
# Join up headers with the body if writing fewer than this many bytes:
118
# trades off memory usage and copying to do less IO ops.
119
_JOIN_WRITES_THRESHOLD = 100000
116
121
def __init__(self, write_func):
151
156
and thus are only suitable for use by a ContainerReader.
153
158
current_offset = self.current_offset
154
serialised_record = self._serialiser.bytes_record(bytes, names)
155
self.write_func(serialised_record)
160
if length < self._JOIN_WRITES_THRESHOLD:
161
self.write_func(self._serialiser.bytes_header(length, names)
164
self.write_func(self._serialiser.bytes_header(length, names))
165
self.write_func(bytes)
156
166
self.records_written += 1
157
167
# return a memo of where we wrote data to allow random access.
158
168
return current_offset, self.current_offset - current_offset
185
195
if (self._string is None or
186
self._string.tell() == self._string_length):
187
offset, data = self.readv_result.next()
196
self._string.tell() == self._string_length):
197
offset, data = next(self.readv_result)
188
198
self._string_length = len(data)
189
self._string = StringIO(data)
199
self._string = BytesIO(data)
191
201
def read(self, length):
193
203
result = self._string.read(length)
194
204
if len(result) < length:
195
205
raise errors.BzrError('wanted %d bytes but next '
196
'hunk only contains %d: %r...' %
197
(length, len(result), result[:20]))
206
'hunk only contains %d: %r...' %
207
(length, len(result), result[:20]))
200
210
def readline(self):
201
211
"""Note that readline will not cross readv segments."""
203
213
result = self._string.readline()
204
if self._string.tell() == self._string_length and result[-1] != '\n':
214
if self._string.tell() == self._string_length and result[-1:] != b'\n':
205
215
raise errors.BzrError('short readline in the readvfile hunk: %r'
215
225
:param requested_records: The record offset, length tuples as returned
216
226
by add_bytes_record for the desired records.
218
readv_blocks = [(0, len(FORMAT_ONE)+1)]
228
readv_blocks = [(0, len(FORMAT_ONE) + 1)]
219
229
readv_blocks.extend(requested_records)
220
230
result = ContainerReader(ReadVFile(
221
231
transport.readv(filename, readv_blocks)))
238
248
def _read_line(self):
239
249
line = self._source.readline()
240
if not line.endswith('\n'):
250
if not line.endswith(b'\n'):
241
251
raise errors.UnexpectedEndOfContainerError()
242
return line.rstrip('\n')
252
return line.rstrip(b'\n')
245
255
class ContainerReader(BaseReader):
293
303
def _iter_record_objects(self):
295
record_kind = self.reader_func(1)
296
if record_kind == 'B':
306
record_kind = self.reader_func(1)
307
except StopIteration:
309
if record_kind == b'B':
298
311
reader = BytesRecordReader(self._source)
300
elif record_kind == 'E':
313
elif record_kind == b'E':
301
314
# End marker. There are no more records.
303
elif record_kind == '':
316
elif record_kind == b'':
304
317
# End of stream encountered, but no End Marker record seen, so
305
318
# this container is incomplete.
306
319
raise errors.UnexpectedEndOfContainerError()
333
346
# risk that the same unicode string has been encoded two
334
347
# different ways.
335
348
if name_tuple in all_names:
336
raise errors.DuplicateRecordNameError(name_tuple)
349
raise errors.DuplicateRecordNameError(name_tuple[0])
337
350
all_names.add(name_tuple)
338
351
excess_bytes = self.reader_func(1)
339
if excess_bytes != '':
352
if excess_bytes != b'':
340
353
raise errors.ContainerHasExcessDataError(excess_bytes)
448
461
If a newline byte is not found in the buffer, the buffer is
449
462
unchanged and this returns None instead.
451
newline_pos = self._buffer.find('\n')
464
newline_pos = self._buffer.find(b'\n')
452
465
if newline_pos != -1:
453
466
line = self._buffer[:newline_pos]
454
self._buffer = self._buffer[newline_pos+1:]
467
self._buffer = self._buffer[newline_pos + 1:]
466
479
def _state_expecting_record_type(self):
467
480
if len(self._buffer) >= 1:
468
record_type = self._buffer[0]
481
record_type = self._buffer[:1]
469
482
self._buffer = self._buffer[1:]
470
if record_type == 'B':
483
if record_type == b'B':
471
484
self._state_handler = self._state_expecting_length
472
elif record_type == 'E':
485
elif record_type == b'E':
473
486
self.finished = True
474
487
self._state_handler = self._state_expecting_nothing
488
501
def _state_expecting_name(self):
489
502
encoded_name_parts = self._consume_line()
490
if encoded_name_parts == '':
503
if encoded_name_parts == b'':
491
504
self._state_handler = self._state_expecting_body
492
505
elif encoded_name_parts:
493
name_parts = tuple(encoded_name_parts.split('\x00'))
506
name_parts = tuple(encoded_name_parts.split(b'\x00'))
494
507
for name_part in name_parts:
495
508
_check_name(name_part)
496
509
self._current_record_names.append(name_parts)