2762
2768
def __init__(self, knit):
2763
2769
self._knit = knit
2771
# unannotated lines of various revisions, this will have the final
2773
self._fulltexts = {}
2774
# Content objects, differs from fulltexts because of how final newlines
2775
# are treated by knits. the content objects here will always have a
2777
self._fulltext_contents = {}
2779
# Annotated lines of specific revisions
2780
self._annotated_lines = {}
2782
# Track the raw data for nodes that we could not process yet.
2783
# This maps the revision_id of the base to a list of children that will
2784
# annotated from it.
2785
self._pending_children = {}
2787
self._all_build_details = {}
2788
self._revision_id_graph = {}
2790
def _add_fulltext_content(self, revision_id, content_obj, noeol_flag):
2791
self._fulltext_contents[revision_id] = content_obj
2793
content_obj = content_obj.copy()
2794
content_obj.strip_last_line_newline()
2795
fulltext = content_obj.text()
2796
self._fulltexts[revision_id] = fulltext
2797
# XXX: It would probably be good to check the sha1digest here
2800
def _check_parents(self, child, nodes_to_annotate):
2801
"""Check if all parents have been processed.
2803
:param child: A tuple of (rev_id, parents, raw_content)
2804
:param nodes_to_annotate: If child is ready, add it to
2805
nodes_to_annotate, otherwise put it back in self._pending_children
2807
for parent_id in child[1]:
2808
if parent_id not in self._annotated_lines:
2809
# This parent is present, but another parent is missing
2810
self._pending_children.setdefault(parent_id,
2814
# This one is ready to be processed
2815
nodes_to_annotate.append(child)
2817
def _add_annotation(self, revision_id, fulltext, parent_ids,
2818
left_matching_blocks=None):
2819
"""Add an annotation entry.
2821
All parents should already have been annotated.
2822
:return: A list of children that now have their parents satisfied.
2824
a = self._annotated_lines
2825
annotated_parent_lines = [a[p] for p in parent_ids]
2826
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
2827
fulltext, revision_id, left_matching_blocks))
2828
self._annotated_lines[revision_id] = annotated_lines
2829
# Now that we've added this one, see if there are any pending
2830
# deltas to be done, certainly this parent is finished
2831
nodes_to_annotate = []
2832
for child in self._pending_children.pop(revision_id, []):
2833
self._check_parents(child, nodes_to_annotate)
2834
return nodes_to_annotate
2836
def _get_build_graph(self, revision_id):
2837
"""Get the graphs for building texts and annotations.
2839
The data you need for creating a full text may be different than the
2840
data you need to annotate that text. (At a minimum, you need both
2841
parents to create an annotation, but only need 1 parent to generate the
2844
:return: A list of (revision_id, index_memo) records, suitable for
2845
passing to read_records_iter to start reading in the raw data from
2848
pending = set([revision_id])
2851
# get all pending nodes
2852
this_iteration = pending
2853
build_details = self._knit._index.get_build_details(this_iteration)
2854
self._all_build_details.update(build_details)
2855
# new_nodes = self._knit._index._get_entries(this_iteration)
2857
for rev_id, details in build_details.iteritems():
2858
method, index_memo, compression_parent, parents = details
2859
self._revision_id_graph[rev_id] = parents
2860
records.append((rev_id, index_memo))
2861
pending.update(p for p in parents
2862
if p not in self._all_build_details)
2864
missing_versions = this_iteration.difference(build_details.keys())
2865
for missing_version in missing_versions:
2866
# add a key, no parents
2867
self._revision_id_graph[missing_versions] = ()
2868
pending.discard(missing_version) # don't look for it
2869
# Generally we will want to read the records in reverse order, because
2870
# we find the parent nodes after the children
2874
def _annotate_records(self, records):
2875
"""Build the annotations for the listed records."""
2876
# We iterate in the order read, rather than a strict order requested
2877
# However, process what we can, and put off to the side things that still
2878
# need parents, cleaning them up when those parents are processed.
2879
for (rev_id, raw_content,
2880
digest) in self._knit._data.read_records_iter(records):
2881
if rev_id in self._annotated_lines:
2883
parent_ids = self._revision_id_graph[rev_id]
2884
details = self._all_build_details[rev_id]
2885
method, index_memo, compression_parent, parent_ids = details
2886
# XXX: We don't want to be going back to the index here, make it
2888
noeol = 'no-eol' in self._knit._index.get_options(rev_id)
2889
nodes_to_annotate = []
2890
# TODO: Remove the punning between compression parents, and
2891
# parent_ids, we should be able to do this without assuming
2893
if len(parent_ids) == 0:
2894
# There are no parents for this node, so just add it
2895
# TODO: This probably needs to be decoupled
2896
assert compression_parent is None and method == 'fulltext'
2897
fulltext_content = self._knit.factory.parse_fulltext(
2898
raw_content, rev_id)
2899
fulltext = self._add_fulltext_content(rev_id, fulltext_content,
2901
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
2902
parent_ids, left_matching_blocks=None))
2904
child = (rev_id, parent_ids, raw_content)
2905
# Check if all the parents are present
2906
self._check_parents(child, nodes_to_annotate)
2907
while nodes_to_annotate:
2908
# Should we use a queue here instead of a stack?
2909
(rev_id, parent_ids, raw_content) = nodes_to_annotate.pop()
2910
(method, index_memo, compression_parent,
2911
parent_ids) = self._all_build_details[rev_id]
2913
noeol = 'no-eol' in self._knit._index.get_options(rev_id)
2914
if method == 'line-delta':
2915
parent_fulltext_content = self._fulltext_contents[compression_parent]
2916
delta = self._knit.factory.parse_line_delta(raw_content,
2918
# TODO: only copy when the parent is still needed elsewhere
2919
fulltext_content = parent_fulltext_content.copy()
2920
fulltext_content.apply_delta(delta, rev_id)
2921
fulltext = self._add_fulltext_content(rev_id,
2922
fulltext_content, noeol)
2923
parent_fulltext = self._fulltexts[parent_ids[0]]
2924
blocks = KnitContent.get_line_delta_blocks(delta,
2925
parent_fulltext, fulltext)
2927
assert method == 'fulltext'
2928
fulltext_content = self._knit.factory.parse_fulltext(
2929
raw_content, rev_id)
2930
fulltext = self._add_fulltext_content(rev_id,
2931
fulltext_content, noeol)
2933
nodes_to_annotate.extend(
2934
self._add_annotation(rev_id, fulltext, parent_ids,
2935
left_matching_blocks=blocks))
2765
2937
def get_annotated_lines(self, revision_id):
2766
2938
"""Return the annotated fulltext at the given revision.
2768
2940
:param revision_id: The revision id for this file
2770
ancestry = self._knit.get_ancestry(revision_id)
2771
fulltext = dict(zip(ancestry, self._knit.get_line_list(ancestry)))
2773
for candidate in ancestry:
2774
if candidate in annotations:
2776
parents = self._knit.get_parents(candidate)
2777
if len(parents) == 0:
2779
elif self._knit._index.get_method(candidate) != 'line-delta':
2782
parent, sha1, noeol, delta = self._knit.get_delta(candidate)
2783
blocks = KnitContent.get_line_delta_blocks(delta,
2784
fulltext[parents[0]], fulltext[candidate])
2785
annotations[candidate] = list(annotate.reannotate([annotations[p]
2786
for p in parents], fulltext[candidate], candidate, blocks))
2787
return annotations[revision_id]
2942
records = self._get_build_graph(revision_id)
2943
self._annotate_records(records)
2944
return self._annotated_lines[revision_id]