20
20
doc/developers/container-format.txt.
23
from cStringIO import StringIO
23
from __future__ import absolute_import
26
from bzrlib import errors
29
FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)"
32
_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]')
28
from ..sixish import (
33
FORMAT_ONE = b"Bazaar pack format 1 (introduced in 0.18)"
36
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r ]')
35
39
def _check_name(name):
71
75
"""Return the bytes to begin a container."""
72
return FORMAT_ONE + "\n"
76
return FORMAT_ONE + b"\n"
75
79
"""Return the bytes to finish a container."""
78
def bytes_record(self, bytes, names):
79
"""Return the bytes for a Bytes record with the given name and
82
def bytes_header(self, length, names):
83
"""Return the header for a Bytes record."""
85
byte_sections = [b"B"]
85
byte_sections.append(str(len(bytes)) + "\n")
87
byte_sections.append(b"%d\n" % (length,))
87
89
for name_tuple in names:
88
90
# Make sure we're writing valid names. Note that we will leave a
89
91
# half-written record if a name is bad!
90
92
for name in name_tuple:
92
byte_sections.append('\x00'.join(name_tuple) + "\n")
94
byte_sections.append(b'\x00'.join(name_tuple) + b"\n")
94
byte_sections.append("\n")
95
# Finally, the contents.
96
byte_sections.append(bytes)
97
# XXX: This causes a memory copy of bytes in size, but is usually
98
# faster than two write calls (12 vs 13 seconds to output a gig of
99
# 1k records.) - results may differ on significantly larger records
100
# like .iso's but as they should be rare in any case and thus not
101
# likely to be the common case. The biggest issue is causing extreme
102
# memory pressure in that case. One possibly improvement here is to
103
# check the size of the content before deciding to join here vs call
105
return ''.join(byte_sections)
96
byte_sections.append(b"\n")
97
return b''.join(byte_sections)
99
def bytes_record(self, bytes, names):
100
"""Return the bytes for a Bytes record with the given name and
103
If the content may be large, construct the header separately and then
104
stream out the contents.
106
return self.bytes_header(len(bytes), names) + bytes
108
109
class ContainerWriter(object):
113
114
introduced by the begin() and end() methods.
117
# Join up headers with the body if writing fewer than this many bytes:
118
# trades off memory usage and copying to do less IO ops.
119
_JOIN_WRITES_THRESHOLD = 100000
116
121
def __init__(self, write_func):
136
141
"""Finish writing a container."""
137
142
self.write_func(self._serialiser.end())
139
def add_bytes_record(self, bytes, names):
144
def add_bytes_record(self, chunks, length, names):
140
145
"""Add a Bytes record with the given names.
142
:param bytes: The bytes to insert.
147
:param bytes: The chunks to insert.
148
:param length: Total length of bytes in chunks
143
149
:param names: The names to give the inserted bytes. Each name is
144
150
a tuple of bytestrings. The bytestrings may not contain
151
157
and thus are only suitable for use by a ContainerReader.
153
159
current_offset = self.current_offset
154
serialised_record = self._serialiser.bytes_record(bytes, names)
155
self.write_func(serialised_record)
160
if length < self._JOIN_WRITES_THRESHOLD:
161
self.write_func(self._serialiser.bytes_header(length, names)
164
self.write_func(self._serialiser.bytes_header(length, names))
166
self.write_func(chunk)
156
167
self.records_written += 1
157
168
# return a memo of where we wrote data to allow random access.
158
169
return current_offset, self.current_offset - current_offset
185
196
if (self._string is None or
186
self._string.tell() == self._string_length):
187
offset, data = self.readv_result.next()
197
self._string.tell() == self._string_length):
198
offset, data = next(self.readv_result)
188
199
self._string_length = len(data)
189
self._string = StringIO(data)
200
self._string = BytesIO(data)
191
202
def read(self, length):
193
204
result = self._string.read(length)
194
205
if len(result) < length:
195
206
raise errors.BzrError('wanted %d bytes but next '
196
'hunk only contains %d: %r...' %
197
(length, len(result), result[:20]))
207
'hunk only contains %d: %r...' %
208
(length, len(result), result[:20]))
200
211
def readline(self):
201
212
"""Note that readline will not cross readv segments."""
203
214
result = self._string.readline()
204
if self._string.tell() == self._string_length and result[-1] != '\n':
215
if self._string.tell() == self._string_length and result[-1:] != b'\n':
205
216
raise errors.BzrError('short readline in the readvfile hunk: %r'
215
226
:param requested_records: The record offset, length tuples as returned
216
227
by add_bytes_record for the desired records.
218
readv_blocks = [(0, len(FORMAT_ONE)+1)]
229
readv_blocks = [(0, len(FORMAT_ONE) + 1)]
219
230
readv_blocks.extend(requested_records)
220
231
result = ContainerReader(ReadVFile(
221
232
transport.readv(filename, readv_blocks)))
238
249
def _read_line(self):
239
250
line = self._source.readline()
240
if not line.endswith('\n'):
251
if not line.endswith(b'\n'):
241
252
raise errors.UnexpectedEndOfContainerError()
242
return line.rstrip('\n')
253
return line.rstrip(b'\n')
245
256
class ContainerReader(BaseReader):
293
304
def _iter_record_objects(self):
295
record_kind = self.reader_func(1)
296
if record_kind == 'B':
307
record_kind = self.reader_func(1)
308
except StopIteration:
310
if record_kind == b'B':
298
312
reader = BytesRecordReader(self._source)
300
elif record_kind == 'E':
314
elif record_kind == b'E':
301
315
# End marker. There are no more records.
303
elif record_kind == '':
317
elif record_kind == b'':
304
318
# End of stream encountered, but no End Marker record seen, so
305
319
# this container is incomplete.
306
320
raise errors.UnexpectedEndOfContainerError()
333
347
# risk that the same unicode string has been encoded two
334
348
# different ways.
335
349
if name_tuple in all_names:
336
raise errors.DuplicateRecordNameError(name_tuple)
350
raise errors.DuplicateRecordNameError(name_tuple[0])
337
351
all_names.add(name_tuple)
338
352
excess_bytes = self.reader_func(1)
339
if excess_bytes != '':
353
if excess_bytes != b'':
340
354
raise errors.ContainerHasExcessDataError(excess_bytes)
448
462
If a newline byte is not found in the buffer, the buffer is
449
463
unchanged and this returns None instead.
451
newline_pos = self._buffer.find('\n')
465
newline_pos = self._buffer.find(b'\n')
452
466
if newline_pos != -1:
453
467
line = self._buffer[:newline_pos]
454
self._buffer = self._buffer[newline_pos+1:]
468
self._buffer = self._buffer[newline_pos + 1:]
466
480
def _state_expecting_record_type(self):
467
481
if len(self._buffer) >= 1:
468
record_type = self._buffer[0]
482
record_type = self._buffer[:1]
469
483
self._buffer = self._buffer[1:]
470
if record_type == 'B':
484
if record_type == b'B':
471
485
self._state_handler = self._state_expecting_length
472
elif record_type == 'E':
486
elif record_type == b'E':
473
487
self.finished = True
474
488
self._state_handler = self._state_expecting_nothing
488
502
def _state_expecting_name(self):
489
503
encoded_name_parts = self._consume_line()
490
if encoded_name_parts == '':
504
if encoded_name_parts == b'':
491
505
self._state_handler = self._state_expecting_body
492
506
elif encoded_name_parts:
493
name_parts = tuple(encoded_name_parts.split('\x00'))
507
name_parts = tuple(encoded_name_parts.split(b'\x00'))
494
508
for name_part in name_parts:
495
509
_check_name(name_part)
496
510
self._current_record_names.append(name_parts)