20
20
doc/developers/container-format.txt.
23
from cStringIO import StringIO
23
from __future__ import absolute_import
26
from bzrlib import errors
28
from ..sixish import (
29
33
FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)"
71
75
"""Return the bytes to begin a container."""
72
return FORMAT_ONE + "\n"
76
return FORMAT_ONE.encode("ascii") + b"\n"
75
79
"""Return the bytes to finish a container."""
78
def bytes_record(self, bytes, names):
79
"""Return the bytes for a Bytes record with the given name and
82
def bytes_header(self, length, names):
83
"""Return the header for a Bytes record."""
85
byte_sections = [b"B"]
85
byte_sections.append(str(len(bytes)) + "\n")
87
byte_sections.append(b"%d\n" % (length,))
87
89
for name_tuple in names:
88
90
# Make sure we're writing valid names. Note that we will leave a
89
91
# half-written record if a name is bad!
90
92
for name in name_tuple:
92
byte_sections.append('\x00'.join(name_tuple) + "\n")
94
byte_sections.append(b'\x00'.join(name_tuple) + b"\n")
94
byte_sections.append("\n")
95
# Finally, the contents.
96
byte_sections.append(bytes)
97
# XXX: This causes a memory copy of bytes in size, but is usually
98
# faster than two write calls (12 vs 13 seconds to output a gig of
99
# 1k records.) - results may differ on significantly larger records
100
# like .iso's but as they should be rare in any case and thus not
101
# likely to be the common case. The biggest issue is causing extreme
102
# memory pressure in that case. One possibly improvement here is to
103
# check the size of the content before deciding to join here vs call
105
return ''.join(byte_sections)
96
byte_sections.append(b"\n")
97
return b''.join(byte_sections)
99
def bytes_record(self, bytes, names):
100
"""Return the bytes for a Bytes record with the given name and
103
If the content may be large, construct the header separately and then
104
stream out the contents.
106
return self.bytes_header(len(bytes), names) + bytes
108
109
class ContainerWriter(object):
113
114
introduced by the begin() and end() methods.
117
# Join up headers with the body if writing fewer than this many bytes:
118
# trades off memory usage and copying to do less IO ops.
119
_JOIN_WRITES_THRESHOLD = 100000
116
121
def __init__(self, write_func):
151
156
and thus are only suitable for use by a ContainerReader.
153
158
current_offset = self.current_offset
154
serialised_record = self._serialiser.bytes_record(bytes, names)
155
self.write_func(serialised_record)
160
if length < self._JOIN_WRITES_THRESHOLD:
161
self.write_func(self._serialiser.bytes_header(length, names)
164
self.write_func(self._serialiser.bytes_header(length, names))
165
self.write_func(bytes)
156
166
self.records_written += 1
157
167
# return a memo of where we wrote data to allow random access.
158
168
return current_offset, self.current_offset - current_offset
185
195
if (self._string is None or
186
196
self._string.tell() == self._string_length):
187
offset, data = self.readv_result.next()
197
offset, data = next(self.readv_result)
188
198
self._string_length = len(data)
189
self._string = StringIO(data)
199
self._string = BytesIO(data)
191
201
def read(self, length):
333
343
# risk that the same unicode string has been encoded two
334
344
# different ways.
335
345
if name_tuple in all_names:
336
raise errors.DuplicateRecordNameError(name_tuple)
346
raise errors.DuplicateRecordNameError(name_tuple[0])
337
347
all_names.add(name_tuple)
338
348
excess_bytes = self.reader_func(1)
339
349
if excess_bytes != '':