66
72
:param write_func: a callable that will be called when this
67
73
ContainerWriter needs to write some bytes.
69
self.write_func = write_func
75
self._write_func = write_func
76
self.current_offset = 0
77
self.records_written = 0
72
80
"""Begin writing a container."""
73
81
self.write_func(FORMAT_ONE + "\n")
83
def write_func(self, bytes):
84
self._write_func(bytes)
85
self.current_offset += len(bytes)
76
88
"""Finish writing a container."""
77
89
self.write_func("E")
79
91
def add_bytes_record(self, bytes, names):
80
"""Add a Bytes record with the given names."""
92
"""Add a Bytes record with the given names.
94
:param bytes: The bytes to insert.
95
:param names: The names to give the inserted bytes. Each name is
96
a tuple of bytestrings. The bytestrings may not contain
98
:return: An offset, length tuple. The offset is the offset
99
of the record within the container, and the length is the
100
length of data that will need to be read to reconstitute the
101
record. These offset and length can only be used with the pack
102
interface - they might be offset by headers or other such details
103
and thus are only suitable for use by a ContainerReader.
105
current_offset = self.current_offset
107
byte_sections = ["B"]
84
self.write_func(str(len(bytes)) + "\n")
109
byte_sections.append(str(len(bytes)) + "\n")
111
for name_tuple in names:
87
112
# Make sure we're writing valid names. Note that we will leave a
88
113
# half-written record if a name is bad!
90
self.write_func(name + "\n")
114
for name in name_tuple:
116
byte_sections.append('\x00'.join(name_tuple) + "\n")
118
byte_sections.append("\n")
93
119
# Finally, the contents.
94
self.write_func(bytes)
120
byte_sections.append(bytes)
121
# XXX: This causes a memory copy of bytes in size, but is usually
122
# faster than two write calls (12 vs 13 seconds to output a gig of
123
# 1k records.) - results may differ on significantly larger records
124
# like .iso's but as they should be rare in any case and thus not
125
# likely to be the common case. The biggest issue is causing extreme
126
# memory pressure in that case. One possibly improvement here is to
127
# check the size of the content before deciding to join here vs call
129
self.write_func(''.join(byte_sections))
130
self.records_written += 1
131
# return a memo of where we wrote data to allow random access.
132
return current_offset, self.current_offset - current_offset
135
class ReadVFile(object):
136
"""Adapt a readv result iterator to a file like protocol."""
138
def __init__(self, readv_result):
139
self.readv_result = readv_result
140
# the most recent readv result block
144
if (self._string is None or
145
self._string.tell() == self._string_length):
146
length, data = self.readv_result.next()
147
self._string_length = len(data)
148
self._string = StringIO(data)
150
def read(self, length):
152
result = self._string.read(length)
153
if len(result) < length:
154
raise errors.BzrError('request for too much data from a readv hunk.')
158
"""Note that readline will not cross readv segments."""
160
result = self._string.readline()
161
if self._string.tell() == self._string_length and result[-1] != '\n':
162
raise errors.BzrError('short readline in the readvfile hunk.')
166
def make_readv_reader(transport, filename, requested_records):
167
"""Create a ContainerReader that will read selected records only.
169
:param transport: The transport the pack file is located on.
170
:param filename: The filename of the pack file.
171
:param requested_records: The record offset, length tuples as returned
172
by add_bytes_record for the desired records.
174
readv_blocks = [(0, len(FORMAT_ONE)+1)]
175
readv_blocks.extend(requested_records)
176
result = ContainerReader(ReadVFile(
177
transport.readv(filename, readv_blocks)))
97
181
class BaseReader(object):
197
281
all_names = set()
198
282
for record_names, read_bytes in self.iter_records():
200
for name in record_names:
201
_check_name_encoding(name)
284
for name_tuple in record_names:
285
for name in name_tuple:
286
_check_name_encoding(name)
202
287
# Check that the name is unique. Note that Python will refuse
203
288
# to decode non-shortest forms of UTF-8 encoding, so there is no
204
289
# risk that the same unicode string has been encoded two
205
290
# different ways.
206
if name in all_names:
207
raise errors.DuplicateRecordNameError(name)
291
if name_tuple in all_names:
292
raise errors.DuplicateRecordNameError(name_tuple)
293
all_names.add(name_tuple)
209
294
excess_bytes = self.reader_func(1)
210
295
if excess_bytes != '':
211
296
raise errors.ContainerHasExcessDataError(excess_bytes)