20
20
doc/developers/container-format.txt.
23
from __future__ import absolute_import
23
from cStringIO import StringIO
28
from ..sixish import (
33
FORMAT_ONE = b"Bazaar pack format 1 (introduced in 0.18)"
36
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r ]')
26
from bzrlib import errors
29
FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)"
32
_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]')
39
35
def _check_name(name):
75
71
"""Return the bytes to begin a container."""
76
return FORMAT_ONE + b"\n"
72
return FORMAT_ONE + "\n"
79
75
"""Return the bytes to finish a container."""
82
def bytes_header(self, length, names):
83
"""Return the header for a Bytes record."""
78
def bytes_record(self, bytes, names):
79
"""Return the bytes for a Bytes record with the given name and
85
byte_sections = [b"B"]
87
byte_sections.append(b"%d\n" % (length,))
85
byte_sections.append(str(len(bytes)) + "\n")
89
87
for name_tuple in names:
90
88
# Make sure we're writing valid names. Note that we will leave a
91
89
# half-written record if a name is bad!
92
90
for name in name_tuple:
94
byte_sections.append(b'\x00'.join(name_tuple) + b"\n")
92
byte_sections.append('\x00'.join(name_tuple) + "\n")
96
byte_sections.append(b"\n")
97
return b''.join(byte_sections)
99
def bytes_record(self, bytes, names):
100
"""Return the bytes for a Bytes record with the given name and
103
If the content may be large, construct the header separately and then
104
stream out the contents.
106
return self.bytes_header(len(bytes), names) + bytes
94
byte_sections.append("\n")
95
# Finally, the contents.
96
byte_sections.append(bytes)
97
# XXX: This causes a memory copy of bytes in size, but is usually
98
# faster than two write calls (12 vs 13 seconds to output a gig of
99
# 1k records.) - results may differ on significantly larger records
100
# like .iso's but as they should be rare in any case and thus not
101
# likely to be the common case. The biggest issue is causing extreme
102
# memory pressure in that case. One possibly improvement here is to
103
# check the size of the content before deciding to join here vs call
105
return ''.join(byte_sections)
109
108
class ContainerWriter(object):
114
113
introduced by the begin() and end() methods.
117
# Join up headers with the body if writing fewer than this many bytes:
118
# trades off memory usage and copying to do less IO ops.
119
_JOIN_WRITES_THRESHOLD = 100000
121
116
def __init__(self, write_func):
141
136
"""Finish writing a container."""
142
137
self.write_func(self._serialiser.end())
144
def add_bytes_record(self, chunks, length, names):
139
def add_bytes_record(self, bytes, names):
145
140
"""Add a Bytes record with the given names.
147
:param bytes: The chunks to insert.
148
:param length: Total length of bytes in chunks
142
:param bytes: The bytes to insert.
149
143
:param names: The names to give the inserted bytes. Each name is
150
144
a tuple of bytestrings. The bytestrings may not contain
157
151
and thus are only suitable for use by a ContainerReader.
159
153
current_offset = self.current_offset
160
if length < self._JOIN_WRITES_THRESHOLD:
161
self.write_func(self._serialiser.bytes_header(length, names)
164
self.write_func(self._serialiser.bytes_header(length, names))
166
self.write_func(chunk)
154
serialised_record = self._serialiser.bytes_record(bytes, names)
155
self.write_func(serialised_record)
167
156
self.records_written += 1
168
157
# return a memo of where we wrote data to allow random access.
169
158
return current_offset, self.current_offset - current_offset
196
185
if (self._string is None or
197
self._string.tell() == self._string_length):
198
offset, data = next(self.readv_result)
186
self._string.tell() == self._string_length):
187
offset, data = self.readv_result.next()
199
188
self._string_length = len(data)
200
self._string = BytesIO(data)
189
self._string = StringIO(data)
202
191
def read(self, length):
204
193
result = self._string.read(length)
205
194
if len(result) < length:
206
195
raise errors.BzrError('wanted %d bytes but next '
207
'hunk only contains %d: %r...' %
208
(length, len(result), result[:20]))
196
'hunk only contains %d: %r...' %
197
(length, len(result), result[:20]))
211
200
def readline(self):
212
201
"""Note that readline will not cross readv segments."""
214
203
result = self._string.readline()
215
if self._string.tell() == self._string_length and result[-1:] != b'\n':
204
if self._string.tell() == self._string_length and result[-1] != '\n':
216
205
raise errors.BzrError('short readline in the readvfile hunk: %r'
226
215
:param requested_records: The record offset, length tuples as returned
227
216
by add_bytes_record for the desired records.
229
readv_blocks = [(0, len(FORMAT_ONE) + 1)]
218
readv_blocks = [(0, len(FORMAT_ONE)+1)]
230
219
readv_blocks.extend(requested_records)
231
220
result = ContainerReader(ReadVFile(
232
221
transport.readv(filename, readv_blocks)))
249
238
def _read_line(self):
250
239
line = self._source.readline()
251
if not line.endswith(b'\n'):
240
if not line.endswith('\n'):
252
241
raise errors.UnexpectedEndOfContainerError()
253
return line.rstrip(b'\n')
242
return line.rstrip('\n')
256
245
class ContainerReader(BaseReader):
304
293
def _iter_record_objects(self):
307
record_kind = self.reader_func(1)
308
except StopIteration:
310
if record_kind == b'B':
295
record_kind = self.reader_func(1)
296
if record_kind == 'B':
312
298
reader = BytesRecordReader(self._source)
314
elif record_kind == b'E':
300
elif record_kind == 'E':
315
301
# End marker. There are no more records.
317
elif record_kind == b'':
303
elif record_kind == '':
318
304
# End of stream encountered, but no End Marker record seen, so
319
305
# this container is incomplete.
320
306
raise errors.UnexpectedEndOfContainerError()
347
333
# risk that the same unicode string has been encoded two
348
334
# different ways.
349
335
if name_tuple in all_names:
350
raise errors.DuplicateRecordNameError(name_tuple[0])
336
raise errors.DuplicateRecordNameError(name_tuple)
351
337
all_names.add(name_tuple)
352
338
excess_bytes = self.reader_func(1)
353
if excess_bytes != b'':
339
if excess_bytes != '':
354
340
raise errors.ContainerHasExcessDataError(excess_bytes)
462
448
If a newline byte is not found in the buffer, the buffer is
463
449
unchanged and this returns None instead.
465
newline_pos = self._buffer.find(b'\n')
451
newline_pos = self._buffer.find('\n')
466
452
if newline_pos != -1:
467
453
line = self._buffer[:newline_pos]
468
self._buffer = self._buffer[newline_pos + 1:]
454
self._buffer = self._buffer[newline_pos+1:]
480
466
def _state_expecting_record_type(self):
481
467
if len(self._buffer) >= 1:
482
record_type = self._buffer[:1]
468
record_type = self._buffer[0]
483
469
self._buffer = self._buffer[1:]
484
if record_type == b'B':
470
if record_type == 'B':
485
471
self._state_handler = self._state_expecting_length
486
elif record_type == b'E':
472
elif record_type == 'E':
487
473
self.finished = True
488
474
self._state_handler = self._state_expecting_nothing
502
488
def _state_expecting_name(self):
503
489
encoded_name_parts = self._consume_line()
504
if encoded_name_parts == b'':
490
if encoded_name_parts == '':
505
491
self._state_handler = self._state_expecting_body
506
492
elif encoded_name_parts:
507
name_parts = tuple(encoded_name_parts.split(b'\x00'))
493
name_parts = tuple(encoded_name_parts.split('\x00'))
508
494
for name_part in name_parts:
509
495
_check_name(name_part)
510
496
self._current_record_names.append(name_parts)