131
138
INDEX_SUFFIX = '.kndx'
141
class KnitAdapter(object):
142
"""Base class for knit record adaption."""
144
def __init__(self, basis_vf):
145
"""Create an adapter which accesses full texts from basis_vf.
147
:param basis_vf: A versioned file to access basis texts of deltas from.
148
May be None for adapters that do not need to access basis texts.
150
self._data = _KnitData(None)
151
self._annotate_factory = KnitAnnotateFactory()
152
self._plain_factory = KnitPlainFactory()
153
self._basis_vf = basis_vf
156
class FTAnnotatedToUnannotated(KnitAdapter):
157
"""An adapter from FT annotated knits to unannotated ones."""
159
def get_bytes(self, factory, annotated_compressed_bytes):
161
self._data._parse_record_unchecked(annotated_compressed_bytes)
162
content = self._annotate_factory.parse_fulltext(contents, rec[1])
163
size, bytes = self._data._record_to_data(rec[1], rec[3], content.text())
167
class DeltaAnnotatedToUnannotated(KnitAdapter):
168
"""An adapter for deltas from annotated to unannotated."""
170
def get_bytes(self, factory, annotated_compressed_bytes):
172
self._data._parse_record_unchecked(annotated_compressed_bytes)
173
delta = self._annotate_factory.parse_line_delta(contents, rec[1],
175
contents = self._plain_factory.lower_line_delta(delta)
176
size, bytes = self._data._record_to_data(rec[1], rec[3], contents)
180
class FTAnnotatedToFullText(KnitAdapter):
181
"""An adapter from FT annotated knits to unannotated ones."""
183
def get_bytes(self, factory, annotated_compressed_bytes):
185
self._data._parse_record_unchecked(annotated_compressed_bytes)
186
content, delta = self._annotate_factory.parse_record(factory.key[0],
187
contents, factory._build_details, None)
188
return ''.join(content.text())
191
class DeltaAnnotatedToFullText(KnitAdapter):
192
"""An adapter for deltas from annotated to unannotated."""
194
def get_bytes(self, factory, annotated_compressed_bytes):
196
self._data._parse_record_unchecked(annotated_compressed_bytes)
197
delta = self._annotate_factory.parse_line_delta(contents, rec[1],
199
compression_parent = factory.parents[0][0]
200
basis_lines = self._basis_vf.get_lines(compression_parent)
201
# Manually apply the delta because we have one annotated content and
203
basis_content = PlainKnitContent(basis_lines, compression_parent)
204
basis_content.apply_delta(delta, rec[1])
205
basis_content._should_strip_eol = factory._build_details[1]
206
return ''.join(basis_content.text())
209
class FTPlainToFullText(KnitAdapter):
210
"""An adapter from FT plain knits to unannotated ones."""
212
def get_bytes(self, factory, compressed_bytes):
214
self._data._parse_record_unchecked(compressed_bytes)
215
content, delta = self._plain_factory.parse_record(factory.key[0],
216
contents, factory._build_details, None)
217
return ''.join(content.text())
220
class DeltaPlainToFullText(KnitAdapter):
221
"""An adapter for deltas from annotated to unannotated."""
223
def get_bytes(self, factory, compressed_bytes):
225
self._data._parse_record_unchecked(compressed_bytes)
226
delta = self._plain_factory.parse_line_delta(contents, rec[1])
227
compression_parent = factory.parents[0][0]
228
basis_lines = self._basis_vf.get_lines(compression_parent)
229
basis_content = PlainKnitContent(basis_lines, compression_parent)
230
# Manually apply the delta because we have one annotated content and
232
content, _ = self._plain_factory.parse_record(rec[1], contents,
233
factory._build_details, basis_content)
234
return ''.join(content.text())
237
class KnitContentFactory(ContentFactory):
238
"""Content factory for streaming from knits.
240
:seealso ContentFactory:
243
def __init__(self, version, parents, build_details, sha1, raw_record,
244
annotated, knit=None):
245
"""Create a KnitContentFactory for version.
247
:param version: The version.
248
:param parents: The parents.
249
:param build_details: The build details as returned from
251
:param sha1: The sha1 expected from the full text of this object.
252
:param raw_record: The bytes of the knit data from disk.
253
:param annotated: True if the raw data is annotated.
255
ContentFactory.__init__(self)
257
self.key = (version,)
258
self.parents = tuple((parent,) for parent in parents)
259
if build_details[0] == 'line-delta':
264
annotated_kind = 'annotated-'
267
self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)
268
self._raw_record = raw_record
269
self._build_details = build_details
272
def get_bytes_as(self, storage_kind):
273
if storage_kind == self.storage_kind:
274
return self._raw_record
275
if storage_kind == 'fulltext' and self._knit is not None:
276
return self._knit.get_text(self.key[0])
278
raise errors.UnavailableRepresentation(self.key, storage_kind,
134
282
class KnitContent(object):
135
"""Content of a knit version to which deltas can be applied."""
283
"""Content of a knit version to which deltas can be applied.
285
This is always stored in memory as a list of lines with \n at the end,
286
plus a flag saying if the final ending is really there or not, because that
287
corresponds to the on-disk knit representation.
138
"""Return a list of (origin, text) tuples."""
139
return list(self.annotate_iter())
291
self._should_strip_eol = False
141
293
def apply_delta(self, delta, new_version_id):
142
294
"""Apply delta to this object to become new_version_id."""
425
616
out.extend(lines)
428
def annotate_iter(self, knit, version_id):
429
return annotate_knit(knit, version_id)
619
def annotate(self, knit, version_id):
620
annotator = _KnitAnnotator(knit)
621
return annotator.annotate(version_id)
432
624
def make_empty_knit(transport, relpath):
433
625
"""Construct a empty knit at the specified location."""
434
k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)
626
k = make_file_knit(transport, relpath, 'w', KnitPlainFactory)
629
def make_file_knit(name, transport, file_mode=None, access_mode='w',
630
factory=None, delta=True, create=False, create_parent_dir=False,
631
delay_create=False, dir_mode=None, get_scope=None):
632
"""Factory to create a KnitVersionedFile for a .knit/.kndx file pair."""
634
factory = KnitAnnotateFactory()
635
if get_scope is None:
636
get_scope = lambda:None
637
index = _KnitIndex(transport, name + INDEX_SUFFIX,
638
access_mode, create=create, file_mode=file_mode,
639
create_parent_dir=create_parent_dir, delay_create=delay_create,
640
dir_mode=dir_mode, get_scope=get_scope)
641
access = _KnitAccess(transport, name + DATA_SUFFIX, file_mode,
642
dir_mode, ((create and not len(index)) and delay_create),
644
return KnitVersionedFile(name, transport, factory=factory,
645
create=create, delay_create=delay_create, index=index,
646
access_method=access)
650
"""Return the suffixes used by file based knits."""
651
return [DATA_SUFFIX, INDEX_SUFFIX]
652
make_file_knit.get_suffixes = get_suffixes
437
655
class KnitVersionedFile(VersionedFile):
652
845
return pseudo_file.read(length)
653
846
return (self.get_format_signature(), result_version_list, read)
848
def get_record_stream(self, versions, ordering, include_delta_closure):
849
"""Get a stream of records for versions.
851
:param versions: The versions to include. Each version is a tuple
853
:param ordering: Either 'unordered' or 'topological'. A topologically
854
sorted stream has compression parents strictly before their
856
:param include_delta_closure: If True then the closure across any
857
compression parents will be included (in the opaque data).
858
:return: An iterator of ContentFactory objects, each of which is only
859
valid until the iterator is advanced.
861
if include_delta_closure:
862
# Nb: what we should do is plan the data to stream to allow
863
# reconstruction of all the texts without excessive buffering,
864
# including re-sending common bases as needed. This makes the most
865
# sense when we start serialising these streams though, so for now
866
# we just fallback to individual text construction behind the
867
# abstraction barrier.
871
# We end up doing multiple index lookups here for parents details and
872
# disk layout details - we need a unified api ?
873
parent_map = self.get_parent_map(versions)
874
absent_versions = set(versions) - set(parent_map)
875
if ordering == 'topological':
876
present_versions = topo_sort(parent_map)
878
# List comprehension to keep the requested order (as that seems
879
# marginally useful, at least until we start doing IO optimising
881
present_versions = [version for version in versions if version in
883
position_map = self._get_components_positions(present_versions)
884
records = [(version, position_map[version][1]) for version in
887
for version in absent_versions:
888
yield AbsentContentFactory((version,))
889
for version, raw_data, sha1 in \
890
self._data.read_records_iter_raw(records):
891
(record_details, index_memo, _) = position_map[version]
892
yield KnitContentFactory(version, parent_map[version],
893
record_details, sha1, raw_data, self.factory.annotated, knit)
655
895
def _extract_blocks(self, version_id, source, target):
656
896
if self._index.get_method(version_id) != 'line-delta':
807
1039
factory = KnitAnnotateFactory()
809
1041
raise errors.KnitDataStreamUnknown(format)
810
index = _StreamIndex(data_list)
1042
index = _StreamIndex(data_list, self._index)
811
1043
access = _StreamAccess(reader_callable, index, self, factory)
812
1044
return KnitVersionedFile(self.filename, self.transport,
813
1045
factory=factory, index=index, access_method=access)
1047
def insert_record_stream(self, stream):
1048
"""Insert a record stream into this versioned file.
1050
:param stream: A stream of records to insert.
1052
:seealso VersionedFile.get_record_stream:
1054
def get_adapter(adapter_key):
1056
return adapters[adapter_key]
1058
adapter_factory = adapter_registry.get(adapter_key)
1059
adapter = adapter_factory(self)
1060
adapters[adapter_key] = adapter
1062
if self.factory.annotated:
1063
# self is annotated, we need annotated knits to use directly.
1064
annotated = "annotated-"
1067
# self is not annotated, but we can strip annotations cheaply.
1069
convertibles = set(["knit-annotated-delta-gz",
1070
"knit-annotated-ft-gz"])
1071
# The set of types we can cheaply adapt without needing basis texts.
1072
native_types = set()
1073
native_types.add("knit-%sdelta-gz" % annotated)
1074
native_types.add("knit-%sft-gz" % annotated)
1075
knit_types = native_types.union(convertibles)
1077
# Buffer all index entries that we can't add immediately because their
1078
# basis parent is missing. We don't buffer all because generating
1079
# annotations may require access to some of the new records. However we
1080
# can't generate annotations from new deltas until their basis parent
1081
# is present anyway, so we get away with not needing an index that
1082
# includes the new keys.
1083
# key = basis_parent, value = index entry to add
1084
buffered_index_entries = {}
1085
for record in stream:
1086
# Raise an error when a record is missing.
1087
if record.storage_kind == 'absent':
1088
raise RevisionNotPresent([record.key[0]], self)
1089
# adapt to non-tuple interface
1090
parents = [parent[0] for parent in record.parents]
1091
if record.storage_kind in knit_types:
1092
if record.storage_kind not in native_types:
1094
adapter_key = (record.storage_kind, "knit-delta-gz")
1095
adapter = get_adapter(adapter_key)
1097
adapter_key = (record.storage_kind, "knit-ft-gz")
1098
adapter = get_adapter(adapter_key)
1099
bytes = adapter.get_bytes(
1100
record, record.get_bytes_as(record.storage_kind))
1102
bytes = record.get_bytes_as(record.storage_kind)
1103
options = [record._build_details[0]]
1104
if record._build_details[1]:
1105
options.append('no-eol')
1106
# Just blat it across.
1107
# Note: This does end up adding data on duplicate keys. As
1108
# modern repositories use atomic insertions this should not
1109
# lead to excessive growth in the event of interrupted fetches.
1110
# 'knit' repositories may suffer excessive growth, but as a
1111
# deprecated format this is tolerable. It can be fixed if
1112
# needed by in the kndx index support raising on a duplicate
1113
# add with identical parents and options.
1114
access_memo = self._data.add_raw_records([len(bytes)], bytes)[0]
1115
index_entry = (record.key[0], options, access_memo, parents)
1117
if 'fulltext' not in options:
1118
basis_parent = parents[0]
1119
if not self.has_version(basis_parent):
1120
pending = buffered_index_entries.setdefault(
1122
pending.append(index_entry)
1125
self._index.add_versions([index_entry])
1126
elif record.storage_kind == 'fulltext':
1127
self.add_lines(record.key[0], parents,
1128
split_lines(record.get_bytes_as('fulltext')))
1130
adapter_key = record.storage_kind, 'fulltext'
1131
adapter = get_adapter(adapter_key)
1132
lines = split_lines(adapter.get_bytes(
1133
record, record.get_bytes_as(record.storage_kind)))
1135
self.add_lines(record.key[0], parents, lines)
1136
except errors.RevisionAlreadyPresent:
1138
# Add any records whose basis parent is now available.
1139
added_keys = [record.key[0]]
1141
key = added_keys.pop(0)
1142
if key in buffered_index_entries:
1143
index_entries = buffered_index_entries[key]
1144
self._index.add_versions(index_entries)
1146
[index_entry[0] for index_entry in index_entries])
1147
del buffered_index_entries[key]
1148
# If there were any deltas which had a missing basis parent, error.
1149
if buffered_index_entries:
1150
raise errors.RevisionNotPresent(buffered_index_entries.keys()[0],
815
1153
def versions(self):
816
1154
"""See VersionedFile.versions."""
817
1155
if 'evil' in debug.debug_flags:
1034
1390
def _get_record_map(self, version_ids):
1035
1391
"""Produce a dictionary of knit records.
1037
The keys are version_ids, the values are tuples of (method, content,
1039
method is the way the content should be applied.
1040
content is a KnitContent object.
1041
digest is the SHA1 digest of this version id after all steps are done
1042
next is the build-parent of the version, i.e. the leftmost ancestor.
1043
If the method is fulltext, next will be None.
1393
:return: {version_id:(record, record_details, digest, next)}
1395
data returned from read_records
1397
opaque information to pass to parse_record
1399
SHA1 digest of the full text after all steps are done
1401
build-parent of the version, i.e. the leftmost ancestor.
1402
Will be None if the record is not a delta.
1045
1404
position_map = self._get_components_positions(version_ids)
1046
# c = component_id, m = method, i_m = index_memo, n = next
1047
records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]
1405
# c = component_id, r = record_details, i_m = index_memo, n = next
1406
records = [(c, i_m) for c, (r, i_m, n)
1407
in position_map.iteritems()]
1048
1408
record_map = {}
1049
for component_id, content, digest in \
1409
for component_id, record, digest in \
1050
1410
self._data.read_records_iter(records):
1051
method, index_memo, next = position_map[component_id]
1052
record_map[component_id] = method, content, digest, next
1411
(record_details, index_memo, next) = position_map[component_id]
1412
record_map[component_id] = record, record_details, digest, next
1054
1414
return record_map
1056
1416
def get_text(self, version_id):
2212
2544
def get_build_details(self, version_ids):
2213
2545
"""Get the method, index_memo and compression parent for version_ids.
2547
Ghosts are omitted from the result.
2215
2549
:param version_ids: An iterable of version_ids.
2216
:return: A dict of version_id:(method, index_memo, compression_parent).
2550
:return: A dict of version_id:(index_memo, compression_parent,
2551
parents, record_details).
2553
opaque memo that can be passed to _StreamAccess.read_records
2554
to extract the raw data; for these classes it is
2555
(from_backing_knit, index, start, end)
2557
Content that this record is built upon, may be None
2559
Logical parents of this node
2561
extra information about the content which needs to be passed to
2562
Factory.parse_record
2219
2565
for version_id in version_ids:
2220
method = self.get_method(version_id)
2567
method = self.get_method(version_id)
2568
except errors.RevisionNotPresent:
2569
# ghosts are omitted
2571
parent_ids = self.get_parents_with_ghosts(version_id)
2572
noeol = ('no-eol' in self.get_options(version_id))
2573
index_memo = self.get_position(version_id)
2574
from_backing_knit = index_memo[0]
2575
if from_backing_knit:
2576
# texts retrieved from the backing knit are always full texts
2221
2578
if method == 'fulltext':
2222
2579
compression_parent = None
2224
compression_parent = self.get_parents_with_ghosts(version_id)[0]
2225
index_memo = self.get_position(version_id)
2226
result[version_id] = (method, index_memo, compression_parent)
2581
compression_parent = parent_ids[0]
2582
result[version_id] = (index_memo, compression_parent,
2583
parent_ids, (method, noeol))
2229
2586
def get_method(self, version_id):
2230
2587
"""Return compression method of specified version."""
2232
options = self._by_version[version_id][0]
2234
# Strictly speaking this should check in the backing knit, but
2235
# until we have a test to discriminate, this will do.
2588
options = self.get_options(version_id)
2237
2589
if 'fulltext' in options:
2238
2590
return 'fulltext'
2239
2591
elif 'line-delta' in options:
2376
2727
% (version_id, e.__class__.__name__, str(e)))
2379
def _check_header(self, version_id, line):
2730
def _split_header(self, line):
2380
2731
rec = line.split()
2381
2732
if len(rec) != 4:
2382
2733
raise KnitCorrupt(self._access,
2383
2734
'unexpected number of elements in record header')
2737
def _check_header_version(self, rec, version_id):
2384
2738
if rec[1] != version_id:
2385
2739
raise KnitCorrupt(self._access,
2386
2740
'unexpected version, wanted %r, got %r'
2387
2741
% (version_id, rec[1]))
2743
def _check_header(self, version_id, line):
2744
rec = self._split_header(line)
2745
self._check_header_version(rec, version_id)
2390
def _parse_record(self, version_id, data):
2748
def _parse_record_unchecked(self, data):
2391
2749
# profiling notes:
2392
2750
# 4168 calls in 2880 217 internal
2393
2751
# 4168 calls to _parse_record_header in 2121
2394
2752
# 4168 calls to readlines in 330
2395
2753
df = GzipFile(mode='rb', fileobj=StringIO(data))
2398
2755
record_contents = df.readlines()
2399
2756
except Exception, e:
2400
raise KnitCorrupt(self._access,
2401
"While reading {%s} got %s(%s)"
2402
% (version_id, e.__class__.__name__, str(e)))
2757
raise KnitCorrupt(self._access, "Corrupt compressed record %r, got %s(%s)" %
2758
(data, e.__class__.__name__, str(e)))
2403
2759
header = record_contents.pop(0)
2404
rec = self._check_header(version_id, header)
2760
rec = self._split_header(header)
2406
2761
last_line = record_contents.pop()
2407
2762
if len(record_contents) != int(rec[2]):
2408
2763
raise KnitCorrupt(self._access,
2409
2764
'incorrect number of lines %s != %s'
2410
2765
' for version {%s}'
2411
2766
% (len(record_contents), int(rec[2]),
2413
2768
if last_line != 'end %s\n' % rec[1]:
2414
2769
raise KnitCorrupt(self._access,
2415
2770
'unexpected version end line %r, wanted %r'
2416
% (last_line, version_id))
2771
% (last_line, rec[1]))
2773
return rec, record_contents
2775
def _parse_record(self, version_id, data):
2776
rec, record_contents = self._parse_record_unchecked(data)
2777
self._check_header_version(rec, version_id)
2418
2778
return record_contents, rec[3]
2420
2780
def read_records_iter_raw(self, records):
2735
3074
It will work for knits with cached annotations, but this is not
2738
ancestry = knit.get_ancestry(revision_id)
2739
fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))
2741
for candidate in ancestry:
2742
if candidate in annotations:
2744
parents = knit.get_parents(candidate)
2745
if len(parents) == 0:
2747
elif knit._index.get_method(candidate) != 'line-delta':
3077
annotator = _KnitAnnotator(knit)
3078
return iter(annotator.annotate(revision_id))
3081
class _KnitAnnotator(object):
3082
"""Build up the annotations for a text."""
3084
def __init__(self, knit):
3087
# Content objects, differs from fulltexts because of how final newlines
3088
# are treated by knits. the content objects here will always have a
3090
self._fulltext_contents = {}
3092
# Annotated lines of specific revisions
3093
self._annotated_lines = {}
3095
# Track the raw data for nodes that we could not process yet.
3096
# This maps the revision_id of the base to a list of children that will
3097
# annotated from it.
3098
self._pending_children = {}
3100
# Nodes which cannot be extracted
3101
self._ghosts = set()
3103
# Track how many children this node has, so we know if we need to keep
3105
self._annotate_children = {}
3106
self._compression_children = {}
3108
self._all_build_details = {}
3109
# The children => parent revision_id graph
3110
self._revision_id_graph = {}
3112
self._heads_provider = None
3114
self._nodes_to_keep_annotations = set()
3115
self._generations_until_keep = 100
3117
def set_generations_until_keep(self, value):
3118
"""Set the number of generations before caching a node.
3120
Setting this to -1 will cache every merge node, setting this higher
3121
will cache fewer nodes.
3123
self._generations_until_keep = value
3125
def _add_fulltext_content(self, revision_id, content_obj):
3126
self._fulltext_contents[revision_id] = content_obj
3127
# TODO: jam 20080305 It might be good to check the sha1digest here
3128
return content_obj.text()
3130
def _check_parents(self, child, nodes_to_annotate):
3131
"""Check if all parents have been processed.
3133
:param child: A tuple of (rev_id, parents, raw_content)
3134
:param nodes_to_annotate: If child is ready, add it to
3135
nodes_to_annotate, otherwise put it back in self._pending_children
3137
for parent_id in child[1]:
3138
if (parent_id not in self._annotated_lines):
3139
# This parent is present, but another parent is missing
3140
self._pending_children.setdefault(parent_id,
2750
parent, sha1, noeol, delta = knit.get_delta(candidate)
2751
blocks = KnitContent.get_line_delta_blocks(delta,
2752
fulltext[parents[0]], fulltext[candidate])
2753
annotations[candidate] = list(annotate.reannotate([annotations[p]
2754
for p in parents], fulltext[candidate], candidate, blocks))
2755
return iter(annotations[revision_id])
3144
# This one is ready to be processed
3145
nodes_to_annotate.append(child)
3147
def _add_annotation(self, revision_id, fulltext, parent_ids,
3148
left_matching_blocks=None):
3149
"""Add an annotation entry.
3151
All parents should already have been annotated.
3152
:return: A list of children that now have their parents satisfied.
3154
a = self._annotated_lines
3155
annotated_parent_lines = [a[p] for p in parent_ids]
3156
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
3157
fulltext, revision_id, left_matching_blocks,
3158
heads_provider=self._get_heads_provider()))
3159
self._annotated_lines[revision_id] = annotated_lines
3160
for p in parent_ids:
3161
ann_children = self._annotate_children[p]
3162
ann_children.remove(revision_id)
3163
if (not ann_children
3164
and p not in self._nodes_to_keep_annotations):
3165
del self._annotated_lines[p]
3166
del self._all_build_details[p]
3167
if p in self._fulltext_contents:
3168
del self._fulltext_contents[p]
3169
# Now that we've added this one, see if there are any pending
3170
# deltas to be done, certainly this parent is finished
3171
nodes_to_annotate = []
3172
for child in self._pending_children.pop(revision_id, []):
3173
self._check_parents(child, nodes_to_annotate)
3174
return nodes_to_annotate
3176
def _get_build_graph(self, revision_id):
3177
"""Get the graphs for building texts and annotations.
3179
The data you need for creating a full text may be different than the
3180
data you need to annotate that text. (At a minimum, you need both
3181
parents to create an annotation, but only need 1 parent to generate the
3184
:return: A list of (revision_id, index_memo) records, suitable for
3185
passing to read_records_iter to start reading in the raw data fro/
3188
if revision_id in self._annotated_lines:
3191
pending = set([revision_id])
3196
# get all pending nodes
3198
this_iteration = pending
3199
build_details = self._knit._index.get_build_details(this_iteration)
3200
self._all_build_details.update(build_details)
3201
# new_nodes = self._knit._index._get_entries(this_iteration)
3203
for rev_id, details in build_details.iteritems():
3204
(index_memo, compression_parent, parents,
3205
record_details) = details
3206
self._revision_id_graph[rev_id] = parents
3207
records.append((rev_id, index_memo))
3208
# Do we actually need to check _annotated_lines?
3209
pending.update(p for p in parents
3210
if p not in self._all_build_details)
3211
if compression_parent:
3212
self._compression_children.setdefault(compression_parent,
3215
for parent in parents:
3216
self._annotate_children.setdefault(parent,
3218
num_gens = generation - kept_generation
3219
if ((num_gens >= self._generations_until_keep)
3220
and len(parents) > 1):
3221
kept_generation = generation
3222
self._nodes_to_keep_annotations.add(rev_id)
3224
missing_versions = this_iteration.difference(build_details.keys())
3225
self._ghosts.update(missing_versions)
3226
for missing_version in missing_versions:
3227
# add a key, no parents
3228
self._revision_id_graph[missing_version] = ()
3229
pending.discard(missing_version) # don't look for it
3230
if self._ghosts.intersection(self._compression_children):
3232
"We cannot have nodes which have a ghost compression parent:\n"
3234
"compression children: %r"
3235
% (self._ghosts, self._compression_children))
3236
# Cleanout anything that depends on a ghost so that we don't wait for
3237
# the ghost to show up
3238
for node in self._ghosts:
3239
if node in self._annotate_children:
3240
# We won't be building this node
3241
del self._annotate_children[node]
3242
# Generally we will want to read the records in reverse order, because
3243
# we find the parent nodes after the children
3247
def _annotate_records(self, records):
3248
"""Build the annotations for the listed records."""
3249
# We iterate in the order read, rather than a strict order requested
3250
# However, process what we can, and put off to the side things that
3251
# still need parents, cleaning them up when those parents are
3253
for (rev_id, record,
3254
digest) in self._knit._data.read_records_iter(records):
3255
if rev_id in self._annotated_lines:
3257
parent_ids = self._revision_id_graph[rev_id]
3258
parent_ids = [p for p in parent_ids if p not in self._ghosts]
3259
details = self._all_build_details[rev_id]
3260
(index_memo, compression_parent, parents,
3261
record_details) = details
3262
nodes_to_annotate = []
3263
# TODO: Remove the punning between compression parents, and
3264
# parent_ids, we should be able to do this without assuming
3266
if len(parent_ids) == 0:
3267
# There are no parents for this node, so just add it
3268
# TODO: This probably needs to be decoupled
3269
fulltext_content, delta = self._knit.factory.parse_record(
3270
rev_id, record, record_details, None)
3271
fulltext = self._add_fulltext_content(rev_id, fulltext_content)
3272
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
3273
parent_ids, left_matching_blocks=None))
3275
child = (rev_id, parent_ids, record)
3276
# Check if all the parents are present
3277
self._check_parents(child, nodes_to_annotate)
3278
while nodes_to_annotate:
3279
# Should we use a queue here instead of a stack?
3280
(rev_id, parent_ids, record) = nodes_to_annotate.pop()
3281
(index_memo, compression_parent, parents,
3282
record_details) = self._all_build_details[rev_id]
3283
if compression_parent is not None:
3284
comp_children = self._compression_children[compression_parent]
3285
if rev_id not in comp_children:
3286
raise AssertionError("%r not in compression children %r"
3287
% (rev_id, comp_children))
3288
# If there is only 1 child, it is safe to reuse this
3290
reuse_content = (len(comp_children) == 1
3291
and compression_parent not in
3292
self._nodes_to_keep_annotations)
3294
# Remove it from the cache since it will be changing
3295
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
3296
# Make sure to copy the fulltext since it might be
3298
parent_fulltext = list(parent_fulltext_content.text())
3300
parent_fulltext_content = self._fulltext_contents[compression_parent]
3301
parent_fulltext = parent_fulltext_content.text()
3302
comp_children.remove(rev_id)
3303
fulltext_content, delta = self._knit.factory.parse_record(
3304
rev_id, record, record_details,
3305
parent_fulltext_content,
3306
copy_base_content=(not reuse_content))
3307
fulltext = self._add_fulltext_content(rev_id,
3309
blocks = KnitContent.get_line_delta_blocks(delta,
3310
parent_fulltext, fulltext)
3312
fulltext_content = self._knit.factory.parse_fulltext(
3314
fulltext = self._add_fulltext_content(rev_id,
3317
nodes_to_annotate.extend(
3318
self._add_annotation(rev_id, fulltext, parent_ids,
3319
left_matching_blocks=blocks))
3321
def _get_heads_provider(self):
3322
"""Create a heads provider for resolving ancestry issues."""
3323
if self._heads_provider is not None:
3324
return self._heads_provider
3325
parent_provider = _mod_graph.DictParentsProvider(
3326
self._revision_id_graph)
3327
graph_obj = _mod_graph.Graph(parent_provider)
3328
head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
3329
self._heads_provider = head_cache
3332
def annotate(self, revision_id):
3333
"""Return the annotated fulltext at the given revision.
3335
:param revision_id: The revision id for this file
3337
records = self._get_build_graph(revision_id)
3338
if revision_id in self._ghosts:
3339
raise errors.RevisionNotPresent(revision_id, self._knit)
3340
self._annotate_records(records)
3341
return self._annotated_lines[revision_id]