1037
1101
def _get_record_map(self, version_ids):
1038
1102
"""Produce a dictionary of knit records.
1040
The keys are version_ids, the values are tuples of (method, content,
1042
method is the way the content should be applied.
1043
content is a KnitContent object.
1044
digest is the SHA1 digest of this version id after all steps are done
1045
next is the build-parent of the version, i.e. the leftmost ancestor.
1046
If the method is fulltext, next will be None.
1104
:return: {version_id:(record, record_details, digest, next)}
1106
data returned from read_records
1108
opaque information to pass to parse_record
1110
SHA1 digest of the full text after all steps are done
1112
build-parent of the version, i.e. the leftmost ancestor.
1113
Will be None if the record is not a delta.
1048
1115
position_map = self._get_components_positions(version_ids)
1049
# c = component_id, m = method, i_m = index_memo, n = next
1050
records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]
1116
# c = component_id, r = record_details, i_m = index_memo, n = next
1117
records = [(c, i_m) for c, (r, i_m, n)
1118
in position_map.iteritems()]
1051
1119
record_map = {}
1052
for component_id, content, digest in \
1120
for component_id, record, digest in \
1053
1121
self._data.read_records_iter(records):
1054
method, index_memo, next = position_map[component_id]
1055
record_map[component_id] = method, content, digest, next
1122
(record_details, index_memo, next) = position_map[component_id]
1123
record_map[component_id] = record, record_details, digest, next
1057
1125
return record_map
1059
1127
def get_text(self, version_id):
2668
2881
It will work for knits with cached annotations, but this is not
2671
ancestry = knit.get_ancestry(revision_id)
2672
fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))
2674
for candidate in ancestry:
2675
if candidate in annotations:
2677
parents = knit.get_parents(candidate)
2678
if len(parents) == 0:
2680
elif knit._index.get_method(candidate) != 'line-delta':
2884
annotator = _KnitAnnotator(knit)
2885
return iter(annotator.annotate(revision_id))
2888
class _KnitAnnotator(object):
2889
"""Build up the annotations for a text."""
2891
def __init__(self, knit):
2894
# Content objects, differs from fulltexts because of how final newlines
2895
# are treated by knits. the content objects here will always have a
2897
self._fulltext_contents = {}
2899
# Annotated lines of specific revisions
2900
self._annotated_lines = {}
2902
# Track the raw data for nodes that we could not process yet.
2903
# This maps the revision_id of the base to a list of children that will
2904
# annotated from it.
2905
self._pending_children = {}
2907
# Nodes which cannot be extracted
2908
self._ghosts = set()
2910
# Track how many children this node has, so we know if we need to keep
2912
self._annotate_children = {}
2913
self._compression_children = {}
2915
self._all_build_details = {}
2916
# The children => parent revision_id graph
2917
self._revision_id_graph = {}
2919
self._heads_provider = None
2921
self._nodes_to_keep_annotations = set()
2922
self._generations_until_keep = 100
2924
def set_generations_until_keep(self, value):
2925
"""Set the number of generations before caching a node.
2927
Setting this to -1 will cache every merge node, setting this higher
2928
will cache fewer nodes.
2930
self._generations_until_keep = value
2932
def _add_fulltext_content(self, revision_id, content_obj):
2933
self._fulltext_contents[revision_id] = content_obj
2934
# TODO: jam 20080305 It might be good to check the sha1digest here
2935
return content_obj.text()
2937
def _check_parents(self, child, nodes_to_annotate):
2938
"""Check if all parents have been processed.
2940
:param child: A tuple of (rev_id, parents, raw_content)
2941
:param nodes_to_annotate: If child is ready, add it to
2942
nodes_to_annotate, otherwise put it back in self._pending_children
2944
for parent_id in child[1]:
2945
if (parent_id not in self._annotated_lines):
2946
# This parent is present, but another parent is missing
2947
self._pending_children.setdefault(parent_id,
2683
parent, sha1, noeol, delta = knit.get_delta(candidate)
2684
blocks = KnitContent.get_line_delta_blocks(delta,
2685
fulltext[parents[0]], fulltext[candidate])
2686
annotations[candidate] = list(annotate.reannotate([annotations[p]
2687
for p in parents], fulltext[candidate], candidate, blocks))
2688
return iter(annotations[revision_id])
2951
# This one is ready to be processed
2952
nodes_to_annotate.append(child)
2954
def _add_annotation(self, revision_id, fulltext, parent_ids,
2955
left_matching_blocks=None):
2956
"""Add an annotation entry.
2958
All parents should already have been annotated.
2959
:return: A list of children that now have their parents satisfied.
2961
a = self._annotated_lines
2962
annotated_parent_lines = [a[p] for p in parent_ids]
2963
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
2964
fulltext, revision_id, left_matching_blocks,
2965
heads_provider=self._get_heads_provider()))
2966
self._annotated_lines[revision_id] = annotated_lines
2967
for p in parent_ids:
2968
ann_children = self._annotate_children[p]
2969
ann_children.remove(revision_id)
2970
if (not ann_children
2971
and p not in self._nodes_to_keep_annotations):
2972
del self._annotated_lines[p]
2973
del self._all_build_details[p]
2974
if p in self._fulltext_contents:
2975
del self._fulltext_contents[p]
2976
# Now that we've added this one, see if there are any pending
2977
# deltas to be done, certainly this parent is finished
2978
nodes_to_annotate = []
2979
for child in self._pending_children.pop(revision_id, []):
2980
self._check_parents(child, nodes_to_annotate)
2981
return nodes_to_annotate
2983
def _get_build_graph(self, revision_id):
2984
"""Get the graphs for building texts and annotations.
2986
The data you need for creating a full text may be different than the
2987
data you need to annotate that text. (At a minimum, you need both
2988
parents to create an annotation, but only need 1 parent to generate the
2991
:return: A list of (revision_id, index_memo) records, suitable for
2992
passing to read_records_iter to start reading in the raw data fro/
2995
if revision_id in self._annotated_lines:
2998
pending = set([revision_id])
3003
# get all pending nodes
3005
this_iteration = pending
3006
build_details = self._knit._index.get_build_details(this_iteration)
3007
self._all_build_details.update(build_details)
3008
# new_nodes = self._knit._index._get_entries(this_iteration)
3010
for rev_id, details in build_details.iteritems():
3011
(index_memo, compression_parent, parents,
3012
record_details) = details
3013
self._revision_id_graph[rev_id] = parents
3014
records.append((rev_id, index_memo))
3015
# Do we actually need to check _annotated_lines?
3016
pending.update(p for p in parents
3017
if p not in self._all_build_details)
3018
if compression_parent:
3019
self._compression_children.setdefault(compression_parent,
3022
for parent in parents:
3023
self._annotate_children.setdefault(parent,
3025
num_gens = generation - kept_generation
3026
if ((num_gens >= self._generations_until_keep)
3027
and len(parents) > 1):
3028
kept_generation = generation
3029
self._nodes_to_keep_annotations.add(rev_id)
3031
missing_versions = this_iteration.difference(build_details.keys())
3032
self._ghosts.update(missing_versions)
3033
for missing_version in missing_versions:
3034
# add a key, no parents
3035
self._revision_id_graph[missing_version] = ()
3036
pending.discard(missing_version) # don't look for it
3037
# XXX: This should probably be a real exception, as it is a data
3039
assert not self._ghosts.intersection(self._compression_children), \
3040
"We cannot have nodes which have a compression parent of a ghost."
3041
# Cleanout anything that depends on a ghost so that we don't wait for
3042
# the ghost to show up
3043
for node in self._ghosts:
3044
if node in self._annotate_children:
3045
# We won't be building this node
3046
del self._annotate_children[node]
3047
# Generally we will want to read the records in reverse order, because
3048
# we find the parent nodes after the children
3052
def _annotate_records(self, records):
3053
"""Build the annotations for the listed records."""
3054
# We iterate in the order read, rather than a strict order requested
3055
# However, process what we can, and put off to the side things that
3056
# still need parents, cleaning them up when those parents are
3058
for (rev_id, record,
3059
digest) in self._knit._data.read_records_iter(records):
3060
if rev_id in self._annotated_lines:
3062
parent_ids = self._revision_id_graph[rev_id]
3063
parent_ids = [p for p in parent_ids if p not in self._ghosts]
3064
details = self._all_build_details[rev_id]
3065
(index_memo, compression_parent, parents,
3066
record_details) = details
3067
nodes_to_annotate = []
3068
# TODO: Remove the punning between compression parents, and
3069
# parent_ids, we should be able to do this without assuming
3071
if len(parent_ids) == 0:
3072
# There are no parents for this node, so just add it
3073
# TODO: This probably needs to be decoupled
3074
assert compression_parent is None
3075
fulltext_content, delta = self._knit.factory.parse_record(
3076
rev_id, record, record_details, None)
3077
fulltext = self._add_fulltext_content(rev_id, fulltext_content)
3078
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
3079
parent_ids, left_matching_blocks=None))
3081
child = (rev_id, parent_ids, record)
3082
# Check if all the parents are present
3083
self._check_parents(child, nodes_to_annotate)
3084
while nodes_to_annotate:
3085
# Should we use a queue here instead of a stack?
3086
(rev_id, parent_ids, record) = nodes_to_annotate.pop()
3087
(index_memo, compression_parent, parents,
3088
record_details) = self._all_build_details[rev_id]
3089
if compression_parent is not None:
3090
comp_children = self._compression_children[compression_parent]
3091
assert rev_id in comp_children
3092
# If there is only 1 child, it is safe to reuse this
3094
reuse_content = (len(comp_children) == 1
3095
and compression_parent not in
3096
self._nodes_to_keep_annotations)
3098
# Remove it from the cache since it will be changing
3099
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
3100
# Make sure to copy the fulltext since it might be
3102
parent_fulltext = list(parent_fulltext_content.text())
3104
parent_fulltext_content = self._fulltext_contents[compression_parent]
3105
parent_fulltext = parent_fulltext_content.text()
3106
comp_children.remove(rev_id)
3107
fulltext_content, delta = self._knit.factory.parse_record(
3108
rev_id, record, record_details,
3109
parent_fulltext_content,
3110
copy_base_content=(not reuse_content))
3111
fulltext = self._add_fulltext_content(rev_id,
3113
blocks = KnitContent.get_line_delta_blocks(delta,
3114
parent_fulltext, fulltext)
3116
fulltext_content = self._knit.factory.parse_fulltext(
3118
fulltext = self._add_fulltext_content(rev_id,
3121
nodes_to_annotate.extend(
3122
self._add_annotation(rev_id, fulltext, parent_ids,
3123
left_matching_blocks=blocks))
3125
def _get_heads_provider(self):
3126
"""Create a heads provider for resolving ancestry issues."""
3127
if self._heads_provider is not None:
3128
return self._heads_provider
3129
parent_provider = _mod_graph.DictParentsProvider(
3130
self._revision_id_graph)
3131
graph_obj = _mod_graph.Graph(parent_provider)
3132
head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
3133
self._heads_provider = head_cache
3136
def annotate(self, revision_id):
3137
"""Return the annotated fulltext at the given revision.
3139
:param revision_id: The revision id for this file
3141
records = self._get_build_graph(revision_id)
3142
if revision_id in self._ghosts:
3143
raise errors.RevisionNotPresent(revision_id, self._knit)
3144
self._annotate_records(records)
3145
return self._annotated_lines[revision_id]