1
# Copyright (C) 2007, 2009, 2010 Canonical Ltd
1
# Copyright (C) 2007, 2009 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
20
20
doc/developers/container-format.txt.
23
from io import BytesIO
23
from cStringIO import StringIO
29
FORMAT_ONE = b"Bazaar pack format 1 (introduced in 0.18)"
32
_whitespace_re = re.compile(b'[\t\n\x0b\x0c\r ]')
26
from bzrlib import errors
29
FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)"
32
_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]')
35
35
def _check_name(name):
71
71
"""Return the bytes to begin a container."""
72
return FORMAT_ONE + b"\n"
72
return FORMAT_ONE + "\n"
75
75
"""Return the bytes to finish a container."""
78
def bytes_header(self, length, names):
79
"""Return the header for a Bytes record."""
78
def bytes_record(self, bytes, names):
79
"""Return the bytes for a Bytes record with the given name and
81
byte_sections = [b"B"]
83
byte_sections.append(b"%d\n" % (length,))
85
byte_sections.append(str(len(bytes)) + "\n")
85
87
for name_tuple in names:
86
88
# Make sure we're writing valid names. Note that we will leave a
87
89
# half-written record if a name is bad!
88
90
for name in name_tuple:
90
byte_sections.append(b'\x00'.join(name_tuple) + b"\n")
92
byte_sections.append('\x00'.join(name_tuple) + "\n")
92
byte_sections.append(b"\n")
93
return b''.join(byte_sections)
95
def bytes_record(self, bytes, names):
96
"""Return the bytes for a Bytes record with the given name and
99
If the content may be large, construct the header separately and then
100
stream out the contents.
102
return self.bytes_header(len(bytes), names) + bytes
94
byte_sections.append("\n")
95
# Finally, the contents.
96
byte_sections.append(bytes)
97
# XXX: This causes a memory copy of bytes in size, but is usually
98
# faster than two write calls (12 vs 13 seconds to output a gig of
99
# 1k records.) - results may differ on significantly larger records
100
# like .iso's but as they should be rare in any case and thus not
101
# likely to be the common case. The biggest issue is causing extreme
102
# memory pressure in that case. One possibly improvement here is to
103
# check the size of the content before deciding to join here vs call
105
return ''.join(byte_sections)
105
108
class ContainerWriter(object):
110
113
introduced by the begin() and end() methods.
113
# Join up headers with the body if writing fewer than this many bytes:
114
# trades off memory usage and copying to do less IO ops.
115
_JOIN_WRITES_THRESHOLD = 100000
117
116
def __init__(self, write_func):
137
136
"""Finish writing a container."""
138
137
self.write_func(self._serialiser.end())
140
def add_bytes_record(self, chunks, length, names):
139
def add_bytes_record(self, bytes, names):
141
140
"""Add a Bytes record with the given names.
143
:param bytes: The chunks to insert.
144
:param length: Total length of bytes in chunks
142
:param bytes: The bytes to insert.
145
143
:param names: The names to give the inserted bytes. Each name is
146
144
a tuple of bytestrings. The bytestrings may not contain
153
151
and thus are only suitable for use by a ContainerReader.
155
153
current_offset = self.current_offset
156
if length < self._JOIN_WRITES_THRESHOLD:
157
self.write_func(self._serialiser.bytes_header(length, names)
160
self.write_func(self._serialiser.bytes_header(length, names))
162
self.write_func(chunk)
154
serialised_record = self._serialiser.bytes_record(bytes, names)
155
self.write_func(serialised_record)
163
156
self.records_written += 1
164
157
# return a memo of where we wrote data to allow random access.
165
158
return current_offset, self.current_offset - current_offset
192
185
if (self._string is None or
193
self._string.tell() == self._string_length):
194
offset, data = next(self.readv_result)
186
self._string.tell() == self._string_length):
187
offset, data = self.readv_result.next()
195
188
self._string_length = len(data)
196
self._string = BytesIO(data)
189
self._string = StringIO(data)
198
191
def read(self, length):
200
193
result = self._string.read(length)
201
194
if len(result) < length:
202
195
raise errors.BzrError('wanted %d bytes but next '
203
'hunk only contains %d: %r...' %
204
(length, len(result), result[:20]))
196
'hunk only contains %d: %r...' %
197
(length, len(result), result[:20]))
207
200
def readline(self):
208
201
"""Note that readline will not cross readv segments."""
210
203
result = self._string.readline()
211
if self._string.tell() == self._string_length and result[-1:] != b'\n':
204
if self._string.tell() == self._string_length and result[-1] != '\n':
212
205
raise errors.BzrError('short readline in the readvfile hunk: %r'
222
215
:param requested_records: The record offset, length tuples as returned
223
216
by add_bytes_record for the desired records.
225
readv_blocks = [(0, len(FORMAT_ONE) + 1)]
218
readv_blocks = [(0, len(FORMAT_ONE)+1)]
226
219
readv_blocks.extend(requested_records)
227
220
result = ContainerReader(ReadVFile(
228
221
transport.readv(filename, readv_blocks)))
245
238
def _read_line(self):
246
239
line = self._source.readline()
247
if not line.endswith(b'\n'):
240
if not line.endswith('\n'):
248
241
raise errors.UnexpectedEndOfContainerError()
249
return line.rstrip(b'\n')
242
return line.rstrip('\n')
252
245
class ContainerReader(BaseReader):
300
293
def _iter_record_objects(self):
303
record_kind = self.reader_func(1)
304
except StopIteration:
306
if record_kind == b'B':
295
record_kind = self.reader_func(1)
296
if record_kind == 'B':
308
298
reader = BytesRecordReader(self._source)
310
elif record_kind == b'E':
300
elif record_kind == 'E':
311
301
# End marker. There are no more records.
313
elif record_kind == b'':
303
elif record_kind == '':
314
304
# End of stream encountered, but no End Marker record seen, so
315
305
# this container is incomplete.
316
306
raise errors.UnexpectedEndOfContainerError()
343
333
# risk that the same unicode string has been encoded two
344
334
# different ways.
345
335
if name_tuple in all_names:
346
raise errors.DuplicateRecordNameError(name_tuple[0])
336
raise errors.DuplicateRecordNameError(name_tuple)
347
337
all_names.add(name_tuple)
348
338
excess_bytes = self.reader_func(1)
349
if excess_bytes != b'':
339
if excess_bytes != '':
350
340
raise errors.ContainerHasExcessDataError(excess_bytes)
458
448
If a newline byte is not found in the buffer, the buffer is
459
449
unchanged and this returns None instead.
461
newline_pos = self._buffer.find(b'\n')
451
newline_pos = self._buffer.find('\n')
462
452
if newline_pos != -1:
463
453
line = self._buffer[:newline_pos]
464
self._buffer = self._buffer[newline_pos + 1:]
454
self._buffer = self._buffer[newline_pos+1:]
476
466
def _state_expecting_record_type(self):
477
467
if len(self._buffer) >= 1:
478
record_type = self._buffer[:1]
468
record_type = self._buffer[0]
479
469
self._buffer = self._buffer[1:]
480
if record_type == b'B':
470
if record_type == 'B':
481
471
self._state_handler = self._state_expecting_length
482
elif record_type == b'E':
472
elif record_type == 'E':
483
473
self.finished = True
484
474
self._state_handler = self._state_expecting_nothing
498
488
def _state_expecting_name(self):
499
489
encoded_name_parts = self._consume_line()
500
if encoded_name_parts == b'':
490
if encoded_name_parts == '':
501
491
self._state_handler = self._state_expecting_body
502
492
elif encoded_name_parts:
503
name_parts = tuple(encoded_name_parts.split(b'\x00'))
493
name_parts = tuple(encoded_name_parts.split('\x00'))
504
494
for name_part in name_parts:
505
495
_check_name(name_part)
506
496
self._current_record_names.append(name_parts)