/brz/remove-bazaar : contents of multiparent/__init_

: (revision 0.9.19)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

0.9.2 by Aaron Bentley Get single-parent comparison working	1	from difflib import SequenceMatcher
0.9.19 by Aaron Bentley More tweakage	2	import sys
0.9.19 by Aaron Bentley More tweakage	3
0.9.16 by Aaron Bentley More control over snapshotting, disable caching for inventory	4	from bzrlib import patiencediff
0.9.3 by Aaron Bentley Get three-parent comparisions under test	5
0.9.1 by Aaron Bentley Get trivial case passing	6	class MultiParent(object):
0.9.1 by Aaron Bentley Get trivial case passing	7
0.9.2 by Aaron Bentley Get single-parent comparison working	8	def __init__(self, hunks=None):
	9	if hunks is not None:
	10	self.hunks = hunks
	11	else:
	12	self.hunks = []
	13
	14	def __repr__(self):
	15	return "MultiParent(%r)" % self.hunks
	16
	17	def __eq__(self, other):
	18	if self.__class__ is not other.__class__:
	19	return False
	20	return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley Get trivial case passing	21
	22	@staticmethod
	23	def from_lines(text, parents=()):
0.9.10 by Aaron Bentley Text reconstruction seems to work	24	"""Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley Get single-parent comparison working	25	def compare(parent):
0.9.16 by Aaron Bentley More control over snapshotting, disable caching for inventory	26	matcher = patiencediff.PatienceSequenceMatcher(None, parent,
	27	text)
	28	return matcher.get_matching_blocks()
0.9.2 by Aaron Bentley Get single-parent comparison working	29	parent_comparisons = [compare(p) for p in parents]
	30	cur_line = 0
	31	new_text = NewText([])
	32	parent_text = []
	33	block_iter = [iter(i) for i in parent_comparisons]
	34	diff = MultiParent([])
	35	def next_block(p):
	36	try:
	37	return block_iter[p].next()
	38	except StopIteration:
	39	return None
	40	cur_block = [next_block(p) for p, i in enumerate(block_iter)]
	41	while cur_line < len(text):
	42	best_match = None
	43	for p, block in enumerate(cur_block):
	44	if block is None:
	45	continue
	46	i, j, n = block
	47	while j + n < cur_line:
	48	block = cur_block[p] = next_block(p)
	49	if block is None:
	50	break
	51	i, j, n = block
	52	if block is None:
	53	continue
	54	if j > cur_line:
	55	continue
	56	offset = cur_line - j
	57	i += offset
	58	j = cur_line
	59	n -= offset
	60	if n == 0:
	61	continue
	62	if best_match is None or n > best_match.num_lines:
	63	best_match = ParentText(p, i, j, n)
	64	if best_match is None:
	65	new_text.lines.append(text[cur_line])
	66	cur_line += 1
	67	else:
	68	if len(new_text.lines) > 0:
	69	diff.hunks.append(new_text)
	70	new_text = NewText([])
	71	diff.hunks.append(best_match)
	72	cur_line += best_match.num_lines
	73	if len(new_text.lines) > 0:
	74	diff.hunks.append(new_text)
0.9.1 by Aaron Bentley Get trivial case passing	75	return diff
	76
	77	@classmethod
	78	def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley Text reconstruction seems to work	79	"""Produce a MultiParent from a text and list of parent text"""
0.9.1 by Aaron Bentley Get trivial case passing	80	return cls.from_lines(text.splitlines(True),
	81	[p.splitlines(True) for p in parents])
	82
0.9.4 by Aaron Bentley Start supporting serialization	83	def to_patch(self):
0.9.10 by Aaron Bentley Text reconstruction seems to work	84	"""Yield text lines for a patch"""
0.9.4 by Aaron Bentley Start supporting serialization	85	for hunk in self.hunks:
	86	for line in hunk.to_patch():
	87	yield line
	88
0.9.18 by Aaron Bentley Implement from_patch	89	@staticmethod
0.9.18 by Aaron Bentley Implement from_patch	90	def from_patch(lines):
0.9.19 by Aaron Bentley More tweakage	91	"""Produce a MultiParent from a sequence of lines"""
0.9.18 by Aaron Bentley Implement from_patch	92	line_iter = iter(lines)
	93	hunks = []
	94	cur_line = None
	95	while(True):
	96	try:
	97	cur_line = line_iter.next()
	98	except StopIteration:
	99	break
	100	if cur_line[0] == 'i':
	101	num_lines = int(cur_line.split(' ')[1])
	102	hunk_lines = [line_iter.next() for x in xrange(num_lines)]
	103	hunk_lines[-1] = hunk_lines[-1][:-1]
	104	hunks.append(NewText(hunk_lines))
	105	elif cur_line[0] == '\n':
	106	hunks[-1].lines[-1] += '\n'
	107	else:
	108	assert cur_line[0] == 'c', cur_line[0]
	109	parent, parent_pos, child_pos, num_lines =\
	110	[int(v) for v in cur_line.split(' ')[1:]]
	111	hunks.append(ParentText(parent, parent_pos, child_pos,
	112	num_lines))
	113	return MultiParent(hunks)
	114
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	115	def range_iterator(self):
0.9.10 by Aaron Bentley Text reconstruction seems to work	116	"""Iterate through the hunks, with range indicated
	117
	118	kind is "new" or "parent".
	119	for "new", data is a list of lines.
	120	for "parent", data is (parent, parent_start, parent_end)
	121	:return: a generator of (start, end, kind, data)
	122	"""
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	123	start = 0
	124	for hunk in self.hunks:
	125	if isinstance(hunk, NewText):
	126	kind = 'new'
	127	end = start + len(hunk.lines)
	128	data = hunk.lines
	129	else:
	130	kind = 'parent'
	131	start = hunk.child_pos
	132	end = start + hunk.num_lines
	133	data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
	134	hunk.num_lines)
	135	yield start, end, kind, data
	136	start = end
	137
0.9.11 by Aaron Bentley Implement reconstruct_version, handle all hunks through that	138	def num_lines(self):
	139	extra_n = 0
	140	for hunk in reversed(self.hunks):
	141	if isinstance(hunk, ParentText):
	142	return hunk.child_pos + hunk.num_lines + extra_n
	143	extra_n += len(hunk.lines)
	144	return extra_n
	145
0.9.1 by Aaron Bentley Get trivial case passing	146
0.9.1 by Aaron Bentley Get trivial case passing	147	class NewText(object):
0.9.10 by Aaron Bentley Text reconstruction seems to work	148	"""The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley Get trivial case passing	149
	150	def __init__(self, lines):
	151	self.lines = lines
	152
	153	def __eq__(self, other):
	154	if self.__class__ is not other.__class__:
	155	return False
	156	return (other.lines == self.lines)
0.9.2 by Aaron Bentley Get single-parent comparison working	157
	158	def __repr__(self):
	159	return 'NewText(%r)' % self.lines
	160
0.9.4 by Aaron Bentley Start supporting serialization	161	def to_patch(self):
	162	yield 'i %d\n' % len(self.lines)
	163	for line in self.lines:
	164	yield line
	165	yield '\n'
	166
0.9.2 by Aaron Bentley Get single-parent comparison working	167
0.9.2 by Aaron Bentley Get single-parent comparison working	168	class ParentText(object):
0.9.10 by Aaron Bentley Text reconstruction seems to work	169	"""A reference to text present in a parent text"""
0.9.2 by Aaron Bentley Get single-parent comparison working	170
	171	def __init__(self, parent, parent_pos, child_pos, num_lines):
	172	self.parent = parent
	173	self.parent_pos = parent_pos
	174	self.child_pos = child_pos
	175	self.num_lines = num_lines
	176
	177	def __repr__(self):
	178	return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\
	179	' %(num_lines)r)' % self.__dict__
	180
	181	def __eq__(self, other):
	182	if self.__class__ != other.__class__:
	183	return False
	184	return (self.__dict__ == other.__dict__)
0.9.4 by Aaron Bentley Start supporting serialization	185
	186	def to_patch(self):
	187	yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\
	188	% self.__dict__
0.9.8 by Aaron Bentley get add_version working	189
	190
	191	class MultiVersionedFile(object):
0.9.10 by Aaron Bentley Text reconstruction seems to work	192	"""VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley get add_version working	193
0.9.16 by Aaron Bentley More control over snapshotting, disable caching for inventory	194	def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley get add_version working	195	self._diffs = {}
	196	self._lines = {}
	197	self._parents = {}
0.9.16 by Aaron Bentley More control over snapshotting, disable caching for inventory	198	self._snapshots = set()
0.9.12 by Aaron Bentley Make benchmarks for mp	199	self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley More control over snapshotting, disable caching for inventory	200	self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley Make benchmarks for mp	201
0.9.12 by Aaron Bentley Make benchmarks for mp	202	def do_snapshot(self, version_id, parent_ids):
0.9.16 by Aaron Bentley More control over snapshotting, disable caching for inventory	203	if self.snapshot_interval is None:
	204	return False
	205	if self.max_snapshots is not None and\
	206	len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley Temporarily force snapshots to 44	207	return False
0.9.12 by Aaron Bentley Make benchmarks for mp	208	if len(parent_ids) == 0:
0.9.16 by Aaron Bentley More control over snapshotting, disable caching for inventory	209	return True
	210	for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley Make benchmarks for mp	211	if len(parent_ids) == 0:
0.9.12 by Aaron Bentley Make benchmarks for mp	212	return False
0.9.17 by Aaron Bentley Dynamically select snapshots based on all parents	213	version_ids = parent_ids
	214	parent_ids = []
	215	for version_id in version_ids:
	216	if version_id not in self._snapshots:
	217	parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley More control over snapshotting, disable caching for inventory	218	else:
	219	return True
0.9.8 by Aaron Bentley get add_version working	220
0.9.16 by Aaron Bentley More control over snapshotting, disable caching for inventory	221	def add_version(self, lines, version_id, parent_ids,
	222	force_snapshot=None):
	223	if force_snapshot is None:
	224	do_snapshot = self.do_snapshot(version_id, parent_ids)
	225	else:
	226	do_snapshot = force_snapshot
	227	if do_snapshot:
	228	self._snapshots.add(version_id)
0.9.12 by Aaron Bentley Make benchmarks for mp	229	diff = MultiParent([NewText(lines)])
0.9.12 by Aaron Bentley Make benchmarks for mp	230	else:
0.9.16 by Aaron Bentley More control over snapshotting, disable caching for inventory	231	parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley Make benchmarks for mp	232	diff = MultiParent.from_lines(lines, parent_lines)
0.9.8 by Aaron Bentley get add_version working	233	self.add_diff(diff, version_id, parent_ids)
	234	self._lines[version_id] = lines
	235
	236	def add_diff(self, diff, version_id, parent_ids):
	237	self._diffs[version_id] = diff
	238	self._parents[version_id] = parent_ids
	239
0.9.19 by Aaron Bentley More tweakage	240	def import_versionedfile(self, vf, ft_set=None, no_cache=True,):
	241	revisions = set(vf.versions())
	242	total = len(revisions)
	243	while len(revisions) > 0:
	244	added = set()
	245	for revision in revisions:
	246	parents = vf.get_parents(revision)
	247	if [p for p in parents if p not in self._diffs] != []:
	248	continue
	249	lines = [a + ' ' + l for a, l in vf.annotate_iter(revision)]
	250	if ft_set is None:
	251	force_snapshot = None
	252	else:
	253	force_snapshot = (revision in ft_set)
	254	self.add_version(lines, revision, parents, force_snapshot)
	255	added.add(revision)
	256	if no_cache:
	257	self.clear_cache()
	258	revisions = [r for r in revisions if r not in added]
	259	print >> sys.stderr, "%.1f %%" % ((((total - len(revisions))
	260	* 100.0) / total))
	261
0.9.8 by Aaron Bentley get add_version working	262	def clear_cache(self):
0.9.8 by Aaron Bentley get add_version working	263	self._lines.clear()
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	264
	265	def get_line_list(self, version_ids):
	266	return [self.cache_version(v) for v in version_ids]
	267
	268	def cache_version(self, version_id):
	269	try:
	270	return self._lines[version_id]
	271	except KeyError:
	272	pass
	273	diff = self._diffs[version_id]
	274	lines = []
0.9.17 by Aaron Bentley Dynamically select snapshots based on all parents	275	reconstructor = _Reconstructor(self._diffs, self._lines,
	276	self._parents)
0.9.11 by Aaron Bentley Implement reconstruct_version, handle all hunks through that	277	reconstructor.reconstruct_version(lines, version_id)
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	278	self._lines[version_id] = lines
	279	return lines
	280
	281
	282	class _Reconstructor(object):
0.9.10 by Aaron Bentley Text reconstruction seems to work	283	"""Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	284
	285	def __init__(self, diffs, lines, parents):
	286	self.diffs = diffs
	287	self.lines = lines
	288	self.parents = parents
	289	self.cursor = {}
	290
	291	def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley Text reconstruction seems to work	292	"""Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	293	parent_id = self.parents[version_id][parent_text.parent]
	294	end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley Dynamically select snapshots based on all parents	295	return self._reconstruct(lines, parent_id, parent_text.parent_pos,
	296	end)
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	297
	298	def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley Text reconstruction seems to work	299	"""Append lines for the requested version_id range"""
0.9.10 by Aaron Bentley Text reconstruction seems to work	300	# stack of pending range requests
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	301	pending_reqs = [(req_version_id, req_start, req_end)]
	302	while len(pending_reqs) > 0:
	303	req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley Text reconstruction seems to work	304	# lazily allocate cursors for versions
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	305	try:
	306	start, end, kind, data, iterator = self.cursor[req_version_id]
	307	except KeyError:
	308	iterator = self.diffs[req_version_id].range_iterator()
	309	start, end, kind, data = iterator.next()
0.9.10 by Aaron Bentley Text reconstruction seems to work	310	# find the first hunk relevant to the request
0.9.10 by Aaron Bentley Text reconstruction seems to work	311	while end <= req_start:
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	312	start, end, kind, data = iterator.next()
	313	self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley Text reconstruction seems to work	314	# if the hunk can't satisfy the whole request, split it in two,
	315	# and leave the second half for later.
	316	if req_end > end:
	317	pending_reqs.append((req_version_id, end, req_end))
	318	req_end = end
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	319	if kind == 'new':
	320	lines.extend(data[req_start - start: (req_end - start)])
	321	else:
0.9.10 by Aaron Bentley Text reconstruction seems to work	322	# If the hunk is a ParentText, rewrite it as a range request
0.9.10 by Aaron Bentley Text reconstruction seems to work	323	# for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley Much progress on non-naive text reconstruction	324	parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley Text reconstruction seems to work	325	new_version_id = self.parents[req_version_id][parent]
	326	new_start = parent_start + req_start - start
	327	new_end = parent_end + req_end - end
	328	pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley Implement reconstruct_version, handle all hunks through that	329
	330	def reconstruct_version(self, lines, version_id):
	331	length = self.diffs[version_id].num_lines()
	332	return self._reconstruct(lines, version_id, 0, length)