20
20
doc/developers/container-format.txt.
23
from __future__ import absolute_import
23
from cStringIO import StringIO
28
from ..sixish import (
33
FORMAT_ONE = b"Bazaar pack format 1 (introduced in 0.18)"
36
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r ]')
26
from bzrlib import errors
29
FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)"
32
_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]')
39
35
def _check_name(name):
75
71
"""Return the bytes to begin a container."""
76
return FORMAT_ONE + b"\n"
72
return FORMAT_ONE + "\n"
79
75
"""Return the bytes to finish a container."""
82
def bytes_header(self, length, names):
83
"""Return the header for a Bytes record."""
78
def bytes_record(self, bytes, names):
79
"""Return the bytes for a Bytes record with the given name and
85
byte_sections = [b"B"]
87
byte_sections.append(b"%d\n" % (length,))
85
byte_sections.append(str(len(bytes)) + "\n")
89
87
for name_tuple in names:
90
88
# Make sure we're writing valid names. Note that we will leave a
91
89
# half-written record if a name is bad!
92
90
for name in name_tuple:
94
byte_sections.append(b'\x00'.join(name_tuple) + b"\n")
92
byte_sections.append('\x00'.join(name_tuple) + "\n")
96
byte_sections.append(b"\n")
97
return b''.join(byte_sections)
99
def bytes_record(self, bytes, names):
100
"""Return the bytes for a Bytes record with the given name and
103
If the content may be large, construct the header separately and then
104
stream out the contents.
106
return self.bytes_header(len(bytes), names) + bytes
94
byte_sections.append("\n")
95
# Finally, the contents.
96
byte_sections.append(bytes)
97
# XXX: This causes a memory copy of bytes in size, but is usually
98
# faster than two write calls (12 vs 13 seconds to output a gig of
99
# 1k records.) - results may differ on significantly larger records
100
# like .iso's but as they should be rare in any case and thus not
101
# likely to be the common case. The biggest issue is causing extreme
102
# memory pressure in that case. One possibly improvement here is to
103
# check the size of the content before deciding to join here vs call
105
return ''.join(byte_sections)
109
108
class ContainerWriter(object):
114
113
introduced by the begin() and end() methods.
117
# Join up headers with the body if writing fewer than this many bytes:
118
# trades off memory usage and copying to do less IO ops.
119
_JOIN_WRITES_THRESHOLD = 100000
121
116
def __init__(self, write_func):
156
151
and thus are only suitable for use by a ContainerReader.
158
153
current_offset = self.current_offset
160
if length < self._JOIN_WRITES_THRESHOLD:
161
self.write_func(self._serialiser.bytes_header(length, names)
164
self.write_func(self._serialiser.bytes_header(length, names))
165
self.write_func(bytes)
154
serialised_record = self._serialiser.bytes_record(bytes, names)
155
self.write_func(serialised_record)
166
156
self.records_written += 1
167
157
# return a memo of where we wrote data to allow random access.
168
158
return current_offset, self.current_offset - current_offset
195
185
if (self._string is None or
196
186
self._string.tell() == self._string_length):
197
offset, data = next(self.readv_result)
187
offset, data = self.readv_result.next()
198
188
self._string_length = len(data)
199
self._string = BytesIO(data)
189
self._string = StringIO(data)
201
191
def read(self, length):
211
201
"""Note that readline will not cross readv segments."""
213
203
result = self._string.readline()
214
if self._string.tell() == self._string_length and result[-1:] != b'\n':
204
if self._string.tell() == self._string_length and result[-1] != '\n':
215
205
raise errors.BzrError('short readline in the readvfile hunk: %r'
248
238
def _read_line(self):
249
239
line = self._source.readline()
250
if not line.endswith(b'\n'):
240
if not line.endswith('\n'):
251
241
raise errors.UnexpectedEndOfContainerError()
252
return line.rstrip(b'\n')
242
return line.rstrip('\n')
255
245
class ContainerReader(BaseReader):
303
293
def _iter_record_objects(self):
306
record_kind = self.reader_func(1)
307
except StopIteration:
309
if record_kind == b'B':
295
record_kind = self.reader_func(1)
296
if record_kind == 'B':
311
298
reader = BytesRecordReader(self._source)
313
elif record_kind == b'E':
300
elif record_kind == 'E':
314
301
# End marker. There are no more records.
316
elif record_kind == b'':
303
elif record_kind == '':
317
304
# End of stream encountered, but no End Marker record seen, so
318
305
# this container is incomplete.
319
306
raise errors.UnexpectedEndOfContainerError()
346
333
# risk that the same unicode string has been encoded two
347
334
# different ways.
348
335
if name_tuple in all_names:
349
raise errors.DuplicateRecordNameError(name_tuple[0])
336
raise errors.DuplicateRecordNameError(name_tuple)
350
337
all_names.add(name_tuple)
351
338
excess_bytes = self.reader_func(1)
352
if excess_bytes != b'':
339
if excess_bytes != '':
353
340
raise errors.ContainerHasExcessDataError(excess_bytes)
461
448
If a newline byte is not found in the buffer, the buffer is
462
449
unchanged and this returns None instead.
464
newline_pos = self._buffer.find(b'\n')
451
newline_pos = self._buffer.find('\n')
465
452
if newline_pos != -1:
466
453
line = self._buffer[:newline_pos]
467
454
self._buffer = self._buffer[newline_pos+1:]
479
466
def _state_expecting_record_type(self):
480
467
if len(self._buffer) >= 1:
481
record_type = self._buffer[:1]
468
record_type = self._buffer[0]
482
469
self._buffer = self._buffer[1:]
483
if record_type == b'B':
470
if record_type == 'B':
484
471
self._state_handler = self._state_expecting_length
485
elif record_type == b'E':
472
elif record_type == 'E':
486
473
self.finished = True
487
474
self._state_handler = self._state_expecting_nothing
501
488
def _state_expecting_name(self):
502
489
encoded_name_parts = self._consume_line()
503
if encoded_name_parts == b'':
490
if encoded_name_parts == '':
504
491
self._state_handler = self._state_expecting_body
505
492
elif encoded_name_parts:
506
name_parts = tuple(encoded_name_parts.split(b'\x00'))
493
name_parts = tuple(encoded_name_parts.split('\x00'))
507
494
for name_part in name_parts:
508
495
_check_name(name_part)
509
496
self._current_record_names.append(name_parts)