/brz/remove-bazaar : contents of bzrlib/pack.py at revision 5757.1.2

: (revision 5757.1.2)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

5089.1.1 by Martin Pool Fix typo in ReadVFile.readline (thanks mnordhoff)	1	# Copyright (C) 2007, 2009, 2010 Canonical Ltd
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	16
	17	"""Container format for Bazaar data.
	18
2916.2.13 by Andrew Bennetts Improve some docstrings.	19	"Containers" and "records" are described in
2916.2.13 by Andrew Bennetts Improve some docstrings.	20	doc/developers/container-format.txt.
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	21	"""
	22
2661.2.2 by Robert Collins * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack	23	from cStringIO import StringIO
2506.5.2 by Andrew Bennetts Raise InvalidRecordError on invalid names.	24	import re
5757.1.1 by Jelmer Vernooij Move _DirectPackAccess from bzrlib.knit to bzrlib.pack.	25	import sys
2506.5.2 by Andrew Bennetts Raise InvalidRecordError on invalid names.	26
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	27	from bzrlib import errors
	28
	29
2535.3.26 by Andrew Bennetts Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).	30	FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)"
	31
	32
	33	_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]')
2506.5.2 by Andrew Bennetts Raise InvalidRecordError on invalid names.	34
	35
	36	def _check_name(name):
	37	"""Do some basic checking of 'name'.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	38
2535.3.26 by Andrew Bennetts Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).	39	At the moment, this just checks that there are no whitespace characters in a
	40	name.
2506.5.2 by Andrew Bennetts Raise InvalidRecordError on invalid names.	41
	42	:raises InvalidRecordError: if name is not valid.
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	43	:seealso: _check_name_encoding
2506.5.2 by Andrew Bennetts Raise InvalidRecordError on invalid names.	44	"""
2535.3.26 by Andrew Bennetts Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).	45	if _whitespace_re.search(name) is not None:
2506.5.2 by Andrew Bennetts Raise InvalidRecordError on invalid names.	46	raise errors.InvalidRecordError("%r is not a valid name." % (name,))
	47
	48
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	49	def _check_name_encoding(name):
	50	"""Check that 'name' is valid UTF-8.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	51
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	52	This is separate from _check_name because UTF-8 decoding is relatively
	53	expensive, and we usually want to avoid it.
	54
	55	:raises InvalidRecordError: if name is not valid UTF-8.
	56	"""
	57	try:
	58	name.decode('utf-8')
	59	except UnicodeDecodeError, e:
	60	raise errors.InvalidRecordError(str(e))
	61
	62
2916.2.5 by Andrew Bennetts Extract a ContainerSerialiser class from ContainerWriter.	63	class ContainerSerialiser(object):
2916.2.6 by Andrew Bennetts Better docstrings.	64	"""A helper class for serialising containers.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	65
2916.2.6 by Andrew Bennetts Better docstrings.	66	It simply returns bytes from method calls to 'begin', 'end' and
	67	'bytes_record'. You may find ContainerWriter to be a more convenient
	68	interface.
	69	"""
2916.2.5 by Andrew Bennetts Extract a ContainerSerialiser class from ContainerWriter.	70
	71	def begin(self):
2916.2.6 by Andrew Bennetts Better docstrings.	72	"""Return the bytes to begin a container."""
2916.2.5 by Andrew Bennetts Extract a ContainerSerialiser class from ContainerWriter.	73	return FORMAT_ONE + "\n"
	74
	75	def end(self):
2916.2.6 by Andrew Bennetts Better docstrings.	76	"""Return the bytes to finish a container."""
2916.2.5 by Andrew Bennetts Extract a ContainerSerialiser class from ContainerWriter.	77	return "E"
	78
	79	def bytes_record(self, bytes, names):
2916.2.6 by Andrew Bennetts Better docstrings.	80	"""Return the bytes for a Bytes record with the given name and
	81	contents.
	82	"""
2916.2.5 by Andrew Bennetts Extract a ContainerSerialiser class from ContainerWriter.	83	# Kind marker
	84	byte_sections = ["B"]
	85	# Length
	86	byte_sections.append(str(len(bytes)) + "\n")
	87	# Names
	88	for name_tuple in names:
	89	# Make sure we're writing valid names. Note that we will leave a
	90	# half-written record if a name is bad!
	91	for name in name_tuple:
	92	_check_name(name)
	93	byte_sections.append('\x00'.join(name_tuple) + "\n")
	94	# End of headers
	95	byte_sections.append("\n")
	96	# Finally, the contents.
	97	byte_sections.append(bytes)
	98	# XXX: This causes a memory copy of bytes in size, but is usually
	99	# faster than two write calls (12 vs 13 seconds to output a gig of
	100	# 1k records.) - results may differ on significantly larger records
	101	# like .iso's but as they should be rare in any case and thus not
	102	# likely to be the common case. The biggest issue is causing extreme
	103	# memory pressure in that case. One possibly improvement here is to
	104	# check the size of the content before deciding to join here vs call
	105	# write twice.
	106	return ''.join(byte_sections)
	107
	108
2506.3.1 by Andrew Bennetts More progress:	109	class ContainerWriter(object):
2916.2.5 by Andrew Bennetts Extract a ContainerSerialiser class from ContainerWriter.	110	"""A class for writing containers to a file.
2698.1.1 by Robert Collins Add records_written attribute to ContainerWriter's. (Robert Collins).	111
	112	:attribute records_written: The number of user records added to the
	113	container. This does not count the prelude or suffix of the container
	114	introduced by the begin() and end() methods.
	115	"""
2506.3.1 by Andrew Bennetts More progress:	116
	117	def __init__(self, write_func):
	118	"""Constructor.
	119
	120	:param write_func: a callable that will be called when this
	121	ContainerWriter needs to write some bytes.
	122	"""
2661.2.1 by Robert Collins * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to	123	self._write_func = write_func
	124	self.current_offset = 0
2698.1.1 by Robert Collins Add records_written attribute to ContainerWriter's. (Robert Collins).	125	self.records_written = 0
2916.2.5 by Andrew Bennetts Extract a ContainerSerialiser class from ContainerWriter.	126	self._serialiser = ContainerSerialiser()
2506.3.1 by Andrew Bennetts More progress:	127
	128	def begin(self):
	129	"""Begin writing a container."""
2916.2.5 by Andrew Bennetts Extract a ContainerSerialiser class from ContainerWriter.	130	self.write_func(self._serialiser.begin())
2506.3.1 by Andrew Bennetts More progress:	131
2661.2.1 by Robert Collins * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to	132	def write_func(self, bytes):
	133	self._write_func(bytes)
	134	self.current_offset += len(bytes)
	135
2506.3.1 by Andrew Bennetts More progress:	136	def end(self):
2506.3.1 by Andrew Bennetts More progress:	137	"""Finish writing a container."""
2916.2.5 by Andrew Bennetts Extract a ContainerSerialiser class from ContainerWriter.	138	self.write_func(self._serialiser.end())
2506.3.1 by Andrew Bennetts More progress:	139
2506.3.1 by Andrew Bennetts More progress:	140	def add_bytes_record(self, bytes, names):
2661.2.1 by Robert Collins * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to	141	"""Add a Bytes record with the given names.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	142
2661.2.1 by Robert Collins * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to	143	:param bytes: The bytes to insert.
2682.1.1 by Robert Collins * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings	144	:param names: The names to give the inserted bytes. Each name is
	145	a tuple of bytestrings. The bytestrings may not contain
	146	whitespace.
2661.2.1 by Robert Collins * ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to	147	:return: An offset, length tuple. The offset is the offset
	148	of the record within the container, and the length is the
	149	length of data that will need to be read to reconstitute the
	150	record. These offset and length can only be used with the pack
	151	interface - they might be offset by headers or other such details
	152	and thus are only suitable for use by a ContainerReader.
	153	"""
	154	current_offset = self.current_offset
2916.2.5 by Andrew Bennetts Extract a ContainerSerialiser class from ContainerWriter.	155	serialised_record = self._serialiser.bytes_record(bytes, names)
2916.2.4 by Andrew Bennetts Extract a _serialise_byte_records function.	156	self.write_func(serialised_record)
	157	self.records_written += 1
	158	# return a memo of where we wrote data to allow random access.
	159	return current_offset, self.current_offset - current_offset
	160
2506.3.1 by Andrew Bennetts More progress:	161
2661.2.2 by Robert Collins * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack	162	class ReadVFile(object):
4491.2.1 by Martin Pool Clearer documentation and variable name in ReadVFile	163	"""Adapt a readv result iterator to a file like protocol.
5757.1.1 by Jelmer Vernooij Move _DirectPackAccess from bzrlib.knit to bzrlib.pack.	164
4491.2.1 by Martin Pool Clearer documentation and variable name in ReadVFile	165	The readv result must support the iterator protocol returning (offset,
	166	data_bytes) pairs.
	167	"""
	168
	169	# XXX: This could be a generic transport class, as other code may want to
	170	# gradually consume the readv result.
2661.2.2 by Robert Collins * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack	171
	172	def __init__(self, readv_result):
4491.2.4 by Martin Pool ReadVFile copes if readv result isn't an iter; also better errors	173	"""Construct a new ReadVFile wrapper.
	174
	175	:seealso: make_readv_reader
	176
	177	:param readv_result: the most recent readv result - list or generator
	178	"""
4491.2.8 by Martin Pool iter(i) returns i so we don't need a check	179	# readv can return a sequence or an iterator, but we require an
	180	# iterator to know how much has been consumed.
	181	readv_result = iter(readv_result)
2661.2.2 by Robert Collins * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack	182	self.readv_result = readv_result
	183	self._string = None
	184
	185	def _next(self):
	186	if (self._string is None or
	187	self._string.tell() == self._string_length):
4491.2.1 by Martin Pool Clearer documentation and variable name in ReadVFile	188	offset, data = self.readv_result.next()
2661.2.2 by Robert Collins * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack	189	self._string_length = len(data)
	190	self._string = StringIO(data)
	191
	192	def read(self, length):
	193	self._next()
	194	result = self._string.read(length)
	195	if len(result) < length:
4491.2.4 by Martin Pool ReadVFile copes if readv result isn't an iter; also better errors	196	raise errors.BzrError('wanted %d bytes but next '
	197	'hunk only contains %d: %r...' %
	198	(length, len(result), result[:20]))
2661.2.2 by Robert Collins * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack	199	return result
	200
	201	def readline(self):
	202	"""Note that readline will not cross readv segments."""
	203	self._next()
	204	result = self._string.readline()
	205	if self._string.tell() == self._string_length and result[-1] != '\n':
4491.2.4 by Martin Pool ReadVFile copes if readv result isn't an iter; also better errors	206	raise errors.BzrError('short readline in the readvfile hunk: %r'
5089.1.1 by Martin Pool Fix typo in ReadVFile.readline (thanks mnordhoff)	207	% (result, ))
2661.2.2 by Robert Collins * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack	208	return result
	209
	210
	211	def make_readv_reader(transport, filename, requested_records):
	212	"""Create a ContainerReader that will read selected records only.
	213
	214	:param transport: The transport the pack file is located on.
	215	:param filename: The filename of the pack file.
	216	:param requested_records: The record offset, length tuples as returned
	217	by add_bytes_record for the desired records.
	218	"""
	219	readv_blocks = [(0, len(FORMAT_ONE)+1)]
	220	readv_blocks.extend(requested_records)
	221	result = ContainerReader(ReadVFile(
	222	transport.readv(filename, readv_blocks)))
	223	return result
	224
	225
2506.3.1 by Andrew Bennetts More progress:	226	class BaseReader(object):
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	227
2506.2.9 by Aaron Bentley Use file-like objects as container input, not callables	228	def __init__(self, source_file):
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	229	"""Constructor.
	230
2506.2.12 by Andrew Bennetts Update docstring for Aaron's changes.	231	:param source_file: a file-like object with `read` and `readline`
	232	methods.
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	233	"""
2506.2.9 by Aaron Bentley Use file-like objects as container input, not callables	234	self._source = source_file
	235
	236	def reader_func(self, length=None):
	237	return self._source.read(length)
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	238
2506.3.1 by Andrew Bennetts More progress:	239	def _read_line(self):
2506.2.9 by Aaron Bentley Use file-like objects as container input, not callables	240	line = self._source.readline()
	241	if not line.endswith('\n'):
	242	raise errors.UnexpectedEndOfContainerError()
	243	return line.rstrip('\n')
2506.3.1 by Andrew Bennetts More progress:	244
	245
	246	class ContainerReader(BaseReader):
	247	"""A class for reading Bazaar's container format."""
	248
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	249	def iter_records(self):
	250	"""Iterate over the container, yielding each record as it is read.
	251
2506.6.2 by Andrew Bennetts Docstring improvements.	252	Each yielded record will be a 2-tuple of (names, callable), where names
	253	is a ``list`` and bytes is a function that takes one argument,
	254	``max_length``.
	255
4031.3.1 by Frank Aspell Fixing various typos	256	You must not call the callable after advancing the iterator to the
2506.6.2 by Andrew Bennetts Docstring improvements.	257	next record. That is, this code is invalid::
	258
	259	record_iter = container.iter_records()
	260	names1, callable1 = record_iter.next()
	261	names2, callable2 = record_iter.next()
	262	bytes1 = callable1(None)
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	263
2506.6.2 by Andrew Bennetts Docstring improvements.	264	As it will give incorrect results and invalidate the state of the
2506.6.2 by Andrew Bennetts Docstring improvements.	265	ContainerReader.
2506.3.1 by Andrew Bennetts More progress:	266
4031.3.1 by Frank Aspell Fixing various typos	267	:raises ContainerError: if any sort of container corruption is
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	268	detected, e.g. UnknownContainerFormatError is the format of the
	269	container is unrecognised.
2506.6.2 by Andrew Bennetts Docstring improvements.	270	:seealso: ContainerReader.read
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	271	"""
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	272	self._read_format()
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	273	return self._iter_records()
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	274
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	275	def iter_record_objects(self):
	276	"""Iterate over the container, yielding each record as it is read.
	277
	278	Each yielded record will be an object with ``read`` and ``validate``
2506.6.2 by Andrew Bennetts Docstring improvements.	279	methods. Like with iter_records, it is not safe to use a record object
2506.6.2 by Andrew Bennetts Docstring improvements.	280	after advancing the iterator to yield next record.
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	281
4031.3.1 by Frank Aspell Fixing various typos	282	:raises ContainerError: if any sort of container corruption is
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	283	detected, e.g. UnknownContainerFormatError is the format of the
	284	container is unrecognised.
2506.6.2 by Andrew Bennetts Docstring improvements.	285	:seealso: iter_records
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	286	"""
	287	self._read_format()
	288	return self._iter_record_objects()
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	289
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	290	def _iter_records(self):
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	291	for record in self._iter_record_objects():
	292	yield record.read()
	293
	294	def _iter_record_objects(self):
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	295	while True:
	296	record_kind = self.reader_func(1)
	297	if record_kind == 'B':
	298	# Bytes record.
2506.2.9 by Aaron Bentley Use file-like objects as container input, not callables	299	reader = BytesRecordReader(self._source)
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	300	yield reader
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	301	elif record_kind == 'E':
	302	# End marker. There are no more records.
	303	return
	304	elif record_kind == '':
	305	# End of stream encountered, but no End Marker record seen, so
	306	# this container is incomplete.
	307	raise errors.UnexpectedEndOfContainerError()
	308	else:
	309	# Unknown record type.
	310	raise errors.UnknownRecordTypeError(record_kind)
	311
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	312	def _read_format(self):
	313	format = self._read_line()
2535.3.26 by Andrew Bennetts Revert merge of container-format changes rejected for bzr.dev (i.e. undo andrew.bennetts@canonical.com-20070717044423-cetp5spep142xsr4).	314	if format != FORMAT_ONE:
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	315	raise errors.UnknownContainerFormatError(format)
	316
2506.2.6 by Andrew Bennetts Add validate method to ContainerReader and BytesRecordReader.	317	def validate(self):
	318	"""Validate this container and its records.
	319
2506.2.7 by Andrew Bennetts Change read/iter_records to return a callable, add more validation, and	320	Validating consumes the data stream just like iter_records and
	321	iter_record_objects, so you cannot call it after
	322	iter_records/iter_record_objects.
2506.2.6 by Andrew Bennetts Add validate method to ContainerReader and BytesRecordReader.	323
	324	:raises ContainerError: if something is invalid.
	325	"""
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	326	all_names = set()
	327	for record_names, read_bytes in self.iter_records():
	328	read_bytes(None)
2682.1.1 by Robert Collins * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings	329	for name_tuple in record_names:
	330	for name in name_tuple:
	331	_check_name_encoding(name)
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	332	# Check that the name is unique. Note that Python will refuse
	333	# to decode non-shortest forms of UTF-8 encoding, so there is no
	334	# risk that the same unicode string has been encoded two
	335	# different ways.
2682.1.1 by Robert Collins * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings	336	if name_tuple in all_names:
	337	raise errors.DuplicateRecordNameError(name_tuple)
	338	all_names.add(name_tuple)
2506.2.6 by Andrew Bennetts Add validate method to ContainerReader and BytesRecordReader.	339	excess_bytes = self.reader_func(1)
	340	if excess_bytes != '':
	341	raise errors.ContainerHasExcessDataError(excess_bytes)
	342
2506.3.1 by Andrew Bennetts More progress:	343
	344	class BytesRecordReader(BaseReader):
	345
	346	def read(self):
2506.2.6 by Andrew Bennetts Add validate method to ContainerReader and BytesRecordReader.	347	"""Read this record.
	348
2506.6.2 by Andrew Bennetts Docstring improvements.	349	You can either validate or read a record, you can't do both.
2506.2.6 by Andrew Bennetts Add validate method to ContainerReader and BytesRecordReader.	350
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	351	:returns: A tuple of (names, callable). The callable can be called
	352	repeatedly to obtain the bytes for the record, with a max_length
	353	argument. If max_length is None, returns all the bytes. Because
	354	records can be arbitrarily large, using None is not recommended
	355	unless you have reason to believe the content will fit in memory.
2506.2.6 by Andrew Bennetts Add validate method to ContainerReader and BytesRecordReader.	356	"""
2506.3.1 by Andrew Bennetts More progress:	357	# Read the content length.
	358	length_line = self._read_line()
	359	try:
	360	length = int(length_line)
	361	except ValueError:
	362	raise errors.InvalidRecordError(
	363	"%r is not a valid length." % (length_line,))
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	364
2506.3.1 by Andrew Bennetts More progress:	365	# Read the list of names.
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	366	names = []
	367	while True:
2682.1.1 by Robert Collins * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings	368	name_line = self._read_line()
	369	if name_line == '':
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	370	break
2682.1.1 by Robert Collins * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings	371	name_tuple = tuple(name_line.split('\x00'))
	372	for name in name_tuple:
	373	_check_name(name)
	374	names.append(name_tuple)
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	375
	376	self._remaining_length = length
	377	return names, self._content_reader
	378
	379	def _content_reader(self, max_length):
	380	if max_length is None:
	381	length_to_read = self._remaining_length
	382	else:
	383	length_to_read = min(max_length, self._remaining_length)
	384	self._remaining_length -= length_to_read
	385	bytes = self.reader_func(length_to_read)
	386	if len(bytes) != length_to_read:
2506.3.3 by Andrew Bennetts Deal with EOF in the middle of a bytes record.	387	raise errors.UnexpectedEndOfContainerError()
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	388	return bytes
2506.2.1 by Andrew Bennetts Start implementing container format reading and writing.	389
2506.2.6 by Andrew Bennetts Add validate method to ContainerReader and BytesRecordReader.	390	def validate(self):
	391	"""Validate this record.
	392
	393	You can either validate or read, you can't do both.
	394
	395	:raises ContainerError: if this record is invalid.
	396	"""
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	397	names, read_bytes = self.read()
2682.1.1 by Robert Collins * The ``bzrlib.pack`` interface has changed to use tuples of bytestrings	398	for name_tuple in names:
	399	for name in name_tuple:
	400	_check_name_encoding(name)
2506.6.1 by Andrew Bennetts Return a callable instead of a str from read, and add more validation.	401	read_bytes(None)
	402
2916.2.1 by Andrew Bennetts Initial implementation of a 'push' parser for the container format.	403
	404	class ContainerPushParser(object):
2916.2.14 by Andrew Bennetts Add a docstring.	405	"""A "push" parser for container format 1.
	406
	407	It accepts bytes via the ``accept_bytes`` method, and parses them into
	408	records which can be retrieved via the ``read_pending_records`` method.
	409	"""
2916.2.1 by Andrew Bennetts Initial implementation of a 'push' parser for the container format.	410
	411	def __init__(self):
	412	self._buffer = ''
	413	self._state_handler = self._state_expecting_format_line
	414	self._parsed_records = []
	415	self._reset_current_record()
2916.2.10 by Andrew Bennetts Simpler iter_records_from_file implementation.	416	self.finished = False
2916.2.1 by Andrew Bennetts Initial implementation of a 'push' parser for the container format.	417
	418	def _reset_current_record(self):
	419	self._current_record_length = None
	420	self._current_record_names = []
	421
	422	def accept_bytes(self, bytes):
	423	self._buffer += bytes
	424	# Keep iterating the state machine until it stops consuming bytes from
	425	# the buffer.
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	426	last_buffer_length = None
	427	cur_buffer_length = len(self._buffer)
4464.1.1 by Aaron Bentley ContainerPushParser.accept_bytes handles zero-length records correctly.	428	last_state_handler = None
	429	while (cur_buffer_length != last_buffer_length
	430	or last_state_handler != self._state_handler):
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	431	last_buffer_length = cur_buffer_length
4464.1.1 by Aaron Bentley ContainerPushParser.accept_bytes handles zero-length records correctly.	432	last_state_handler = self._state_handler
2916.2.1 by Andrew Bennetts Initial implementation of a 'push' parser for the container format.	433	self._state_handler()
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	434	cur_buffer_length = len(self._buffer)
2916.2.1 by Andrew Bennetts Initial implementation of a 'push' parser for the container format.	435
4060.1.4 by Robert Collins Streaming fetch from remote servers.	436	def read_pending_records(self, max=None):
	437	if max:
	438	records = self._parsed_records[:max]
	439	del self._parsed_records[:max]
	440	return records
	441	else:
	442	records = self._parsed_records
	443	self._parsed_records = []
	444	return records
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	445
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	446	def _consume_line(self):
	447	"""Take a line out of the buffer, and return the line.
2916.2.1 by Andrew Bennetts Initial implementation of a 'push' parser for the container format.	448
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	449	If a newline byte is not found in the buffer, the buffer is
2916.2.1 by Andrew Bennetts Initial implementation of a 'push' parser for the container format.	450	unchanged and this returns None instead.
	451	"""
	452	newline_pos = self._buffer.find('\n')
	453	if newline_pos != -1:
	454	line = self._buffer[:newline_pos]
	455	self._buffer = self._buffer[newline_pos+1:]
	456	return line
	457	else:
	458	return None
	459
	460	def _state_expecting_format_line(self):
	461	line = self._consume_line()
	462	if line is not None:
	463	if line != FORMAT_ONE:
	464	raise errors.UnknownContainerFormatError(line)
	465	self._state_handler = self._state_expecting_record_type
	466
	467	def _state_expecting_record_type(self):
	468	if len(self._buffer) >= 1:
	469	record_type = self._buffer[0]
	470	self._buffer = self._buffer[1:]
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	471	if record_type == 'B':
	472	self._state_handler = self._state_expecting_length
	473	elif record_type == 'E':
2916.2.10 by Andrew Bennetts Simpler iter_records_from_file implementation.	474	self.finished = True
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	475	self._state_handler = self._state_expecting_nothing
	476	else:
	477	raise errors.UnknownRecordTypeError(record_type)
2916.2.1 by Andrew Bennetts Initial implementation of a 'push' parser for the container format.	478
	479	def _state_expecting_length(self):
	480	line = self._consume_line()
	481	if line is not None:
	482	try:
	483	self._current_record_length = int(line)
	484	except ValueError:
	485	raise errors.InvalidRecordError(
	486	"%r is not a valid length." % (line,))
	487	self._state_handler = self._state_expecting_name
	488
	489	def _state_expecting_name(self):
	490	encoded_name_parts = self._consume_line()
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	491	if encoded_name_parts == '':
	492	self._state_handler = self._state_expecting_body
	493	elif encoded_name_parts:
	494	name_parts = tuple(encoded_name_parts.split('\x00'))
	495	for name_part in name_parts:
	496	_check_name(name_part)
	497	self._current_record_names.append(name_parts)
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	498
2916.2.1 by Andrew Bennetts Initial implementation of a 'push' parser for the container format.	499	def _state_expecting_body(self):
	500	if len(self._buffer) >= self._current_record_length:
	501	body_bytes = self._buffer[:self._current_record_length]
	502	self._buffer = self._buffer[self._current_record_length:]
	503	record = (self._current_record_names, body_bytes)
	504	self._parsed_records.append(record)
	505	self._reset_current_record()
	506	self._state_handler = self._state_expecting_record_type
	507
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	508	def _state_expecting_nothing(self):
	509	pass
	510
2916.2.10 by Andrew Bennetts Simpler iter_records_from_file implementation.	511	def read_size_hint(self):
	512	hint = 16384
	513	if self._state_handler == self._state_expecting_body:
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	514	remaining = self._current_record_length - len(self._buffer)
	515	if remaining < 0:
	516	remaining = 0
2916.2.10 by Andrew Bennetts Simpler iter_records_from_file implementation.	517	return max(hint, remaining)
	518	return hint
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	519
	520
	521	def iter_records_from_file(source_file):
	522	parser = ContainerPushParser()
	523	while True:
2916.2.10 by Andrew Bennetts Simpler iter_records_from_file implementation.	524	bytes = source_file.read(parser.read_size_hint())
2916.2.8 by Andrew Bennetts Add bzrlib.pack.iter_records_from_file.	525	parser.accept_bytes(bytes)
	526	for record in parser.read_pending_records():
	527	yield record
2916.2.10 by Andrew Bennetts Simpler iter_records_from_file implementation.	528	if parser.finished:
	529	break
2916.2.1 by Andrew Bennetts Initial implementation of a 'push' parser for the container format.	530
5757.1.1 by Jelmer Vernooij Move _DirectPackAccess from bzrlib.knit to bzrlib.pack.	531
	532	class _DirectPackAccess(object):
	533	"""Access to data in one or more packs with less translation."""
	534
	535	def __init__(self, index_to_packs, reload_func=None, flush_func=None):
	536	"""Create a _DirectPackAccess object.
	537
	538	:param index_to_packs: A dict mapping index objects to the transport
	539	and file names for obtaining data.
	540	:param reload_func: A function to call if we determine that the pack
	541	files have moved and we need to reload our caches. See
	542	bzrlib.repo_fmt.pack_repo.AggregateIndex for more details.
	543	"""
	544	self._container_writer = None
	545	self._write_index = None
	546	self._indices = index_to_packs
	547	self._reload_func = reload_func
	548	self._flush_func = flush_func
	549
	550	def add_raw_records(self, key_sizes, raw_data):
	551	"""Add raw knit bytes to a storage area.
	552
	553	The data is spooled to the container writer in one bytes-record per
	554	raw data item.
	555
	556	:param sizes: An iterable of tuples containing the key and size of each
	557	raw data segment.
	558	:param raw_data: A bytestring containing the data.
	559	:return: A list of memos to retrieve the record later. Each memo is an
	560	opaque index memo. For _DirectPackAccess the memo is (index, pos,
	561	length), where the index field is the write_index object supplied
	562	to the PackAccess object.
	563	"""
	564	if type(raw_data) is not str:
	565	raise AssertionError(
	566	'data must be plain bytes was %s' % type(raw_data))
	567	result = []
	568	offset = 0
	569	for key, size in key_sizes:
	570	p_offset, p_length = self._container_writer.add_bytes_record(
	571	raw_data[offset:offset+size], [])
	572	offset += size
	573	result.append((self._write_index, p_offset, p_length))
	574	return result
	575
	576	def flush(self):
	577	"""Flush pending writes on this access object.
	578
	579	This will flush any buffered writes to a NewPack.
	580	"""
	581	if self._flush_func is not None:
	582	self._flush_func()
	583
	584	def get_raw_records(self, memos_for_retrieval):
	585	"""Get the raw bytes for a records.
	586
	587	:param memos_for_retrieval: An iterable containing the (index, pos,
	588	length) memo for retrieving the bytes. The Pack access method
	589	looks up the pack to use for a given record in its index_to_pack
	590	map.
	591	:return: An iterator over the bytes of the records.
	592	"""
	593	# first pass, group into same-index requests
	594	request_lists = []
595	current_index = None
596	for (index, offset, length) in memos_for_retrieval:
597	if current_index == index:
598	current_list.append((offset, length))
599	else:
600	if current_index is not None:
601	request_lists.append((current_index, current_list))
602	current_index = index
603	current_list = [(offset, length)]
604	# handle the last entry
605	if current_index is not None:
606	request_lists.append((current_index, current_list))
607	for index, offsets in request_lists:
608	try:
609	transport, path = self._indices[index]
610	except KeyError:
611	# A KeyError here indicates that someone has triggered an index
612	# reload, and this index has gone missing, we need to start
613	# over.
614	if self._reload_func is None:
615	# If we don't have a _reload_func there is nothing that can
616	# be done
617	raise
618	raise errors.RetryWithNewPacks(index,
619	reload_occurred=True,
620	exc_info=sys.exc_info())
621	try:
622	reader = make_readv_reader(transport, path, offsets)
623	for names, read_func in reader.iter_records():
624	yield read_func(None)
625	except errors.NoSuchFile:
626	# A NoSuchFile error indicates that a pack file has gone
627	# missing on disk, we need to trigger a reload, and start over.
628	if self._reload_func is None:
629	raise
630	raise errors.RetryWithNewPacks(transport.abspath(path),
631	reload_occurred=False,
632	exc_info=sys.exc_info())
633
634	def set_writer(self, writer, index, transport_packname):
635	"""Set a writer to use for adding data."""
636	if index is not None:
637	self._indices[index] = transport_packname
638	self._container_writer = writer
639	self._write_index = index
640
641	def reload_or_raise(self, retry_exc):
642	"""Try calling the reload function, or re-raise the original exception.
643
644	This should be called after _DirectPackAccess raises a
645	RetryWithNewPacks exception. This function will handle the common logic
646	of determining when the error is fatal versus being temporary.
647	It will also make sure that the original exception is raised, rather
648	than the RetryWithNewPacks exception.
649
650	If this function returns, then the calling function should retry
651	whatever operation was being performed. Otherwise an exception will
652	be raised.
653
654	:param retry_exc: A RetryWithNewPacks exception.
655	"""
656	is_error = False
657	if self._reload_func is None:
658	is_error = True
659	elif not self._reload_func():
660	# The reload claimed that nothing changed
661	if not retry_exc.reload_occurred:
662	# If there wasn't an earlier reload, then we really were
663	# expecting to find changes. We didn't find them, so this is a
664	# hard error
665	is_error = True
666	if is_error:
667	exc_class, exc_value, exc_traceback = retry_exc.exc_info
668	raise exc_class, exc_value, exc_traceback
669
670
671