55
55
PyObject *, PyObject *, int opid)
58
from bzrlib import _annotator_py
58
from bzrlib import errors, graph as _mod_graph, osutils, patiencediff, ui
61
cdef class _NeededTextIterator:
64
cdef object text_cache
67
cdef object stream_len
69
cdef int stream_is_consumed
72
def __init__(self, stream, text_cache, stream_len, ann_keys, pb=None):
75
self.stream_len = stream_len
76
self.text_cache = text_cache
77
self.stream_len = stream_len
78
self.ann_keys = list(ann_keys)
80
self.stream_is_consumed = 0
86
cdef _get_ann_text(self):
87
if self.ann_key_pos >= len(self.ann_keys):
89
key = self.ann_keys[self.ann_key_pos]
90
self.ann_key_pos = self.ann_key_pos + 1
91
lines = self.text_cache[key]
92
num_lines = len(lines)
93
return key, lines, num_lines
96
if self.stream_is_consumed:
97
return self._get_ann_text()
99
record = self.stream.next()
100
except StopIteration:
101
self.stream_is_consumed = 1
102
return self._get_ann_text()
103
if self.pb is not None:
104
self.pb.update('extracting', self.counter, self.stream_len)
105
if record.storage_kind == 'absent':
106
raise errors.RevisionNotPresent(record.key, None)
107
self.counter = self.counter + 1
108
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
109
num_lines = len(lines)
110
self.text_cache[record.key] = lines
111
return record.key, lines, num_lines
61
114
cdef int _check_annotations_are_lists(annotations,
187
229
Py_INCREF_ptr(par_temp[idx])
188
230
Py_DECREF_ptr(ann_temp[idx])
189
231
ann_temp[idx] = par_temp[idx]
193
cdef int _merge_annotations(this_annotation, annotations, parent_annotations,
194
matching_blocks, ann_cache) except -1:
195
cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx
197
cdef PyObject *ann_temp, *par_temp
199
_check_annotations_are_lists(annotations, parent_annotations)
203
for parent_idx, lines_idx, match_len in matching_blocks:
204
_check_match_ranges(parent_annotations, annotations,
205
parent_idx, lines_idx, match_len)
206
# For lines which match this parent, we will now resolve whether
207
# this parent wins over the current annotation
208
for idx from 0 <= idx < match_len:
209
ann_idx = lines_idx + idx
210
ann_temp = PyList_GET_ITEM(annotations, ann_idx)
211
par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx)
212
if (ann_temp == par_temp):
213
# This is parent, do nothing
214
# Pointer comparison is fine here. Value comparison would
215
# be ok, but it will be handled in the final if clause by
216
# merging the two tuples into the same tuple
217
# Avoiding the Py_INCREF and function call to
218
# PyObject_RichCompareBool using pointer comparison drops
219
# timing from 215ms => 125ms
221
par_ann = <object>par_temp
222
ann = <object>ann_temp
223
if (ann is this_annotation):
224
# Originally claimed 'this', but it was really in this
227
PyList_SetItem(annotations, ann_idx, par_ann)
229
# Resolve the fact that both sides have a different value for
231
if (ann is last_ann and par_ann is last_parent):
233
PyList_SetItem(annotations, ann_idx, last_res)
235
new_ann = _combine_annotations(ann, par_ann, ann_cache)
237
PyList_SetItem(annotations, ann_idx, new_ann)
239
last_parent = par_ann
244
class Annotator(_annotator_py.Annotator):
245
235
"""Class that drives performing annotations."""
247
def _update_from_first_parent(self, key, annotations, lines, parent_key):
237
def __init__(self, vf):
238
"""Create a new Annotator from a VersionedFile."""
240
self._parent_map = {}
241
self._text_cache = {}
242
# Map from key => number of nexts that will be built from this key
243
self._num_needed_children = {}
244
self._annotations_cache = {}
245
self._heads_provider = None
246
self._ann_tuple_cache = {}
249
def _update_needed_children(self, key, parent_keys):
250
for parent_key in parent_keys:
251
if parent_key in self._num_needed_children:
252
self._num_needed_children[parent_key] += 1
254
self._num_needed_children[parent_key] = 1
256
def _get_needed_keys(self, key):
257
"""Determine the texts we need to get from the backing vf.
259
:return: (vf_keys_needed, ann_keys_needed)
260
vf_keys_needed These are keys that we need to get from the vf
261
ann_keys_needed Texts which we have in self._text_cache but we
262
don't have annotations for. We need to yield these
263
in the proper order so that we can get proper
266
parent_map = self._parent_map
267
# We need 1 extra copy of the node we will be looking at when we are
269
self._num_needed_children[key] = 1
270
vf_keys_needed = set()
271
ann_keys_needed = set()
272
needed_keys = set([key])
276
for key in needed_keys:
277
if key in self._parent_map:
278
# We don't need to lookup this key in the vf
279
if key not in self._text_cache:
280
# Extract this text from the vf
281
vf_keys_needed.add(key)
282
elif key not in self._annotations_cache:
283
# We do need to annotate
284
ann_keys_needed.add(key)
285
next_parent_map[key] = self._parent_map[key]
287
parent_lookup.append(key)
288
vf_keys_needed.add(key)
290
next_parent_map.update(self._vf.get_parent_map(parent_lookup))
291
for key, parent_keys in next_parent_map.iteritems():
292
if parent_keys is None:
294
next_parent_map[key] = ()
295
self._update_needed_children(key, parent_keys)
296
for key in parent_keys:
297
if key not in parent_map:
299
parent_map.update(next_parent_map)
300
# _heads_provider does some graph caching, so it is only valid while
301
# self._parent_map hasn't changed
302
self._heads_provider = None
303
return vf_keys_needed, ann_keys_needed
305
def _get_needed_texts(self, key, pb=None):
306
"""Get the texts we need to properly annotate key.
308
:param key: A Key that is present in self._vf
309
:return: Yield (this_key, text, num_lines)
310
'text' is an opaque object that just has to work with whatever
311
matcher object we are using. Currently it is always 'lines' but
312
future improvements may change this to a simple text string.
314
keys, ann_keys = self._get_needed_keys(key)
316
pb.update('getting stream', 0, len(keys))
317
stream = self._vf.get_record_stream(keys, 'topological', True)
318
iterator = _NeededTextIterator(stream, self._text_cache, len(keys),
322
def _get_parent_annotations_and_matches(self, key, text, parent_key):
323
"""Get the list of annotations for the parent, and the matching lines.
325
:param text: The opaque value given by _get_needed_texts
326
:param parent_key: The key for the parent text
327
:return: (parent_annotations, matching_blocks)
328
parent_annotations is a list as long as the number of lines in
330
matching_blocks is a list of (parent_idx, text_idx, len) tuples
331
indicating which lines match between the two texts
333
parent_lines = self._text_cache[parent_key]
334
parent_annotations = self._annotations_cache[parent_key]
335
# PatienceSequenceMatcher should probably be part of Policy
336
matcher = patiencediff.PatienceSequenceMatcher(None,
338
matching_blocks = matcher.get_matching_blocks()
339
return parent_annotations, matching_blocks
341
def _update_from_one_parent(self, key, annotations, lines, parent_key):
248
342
"""Reannotate this text relative to its first parent."""
250
matching_blocks) = self._get_parent_annotations_and_matches(
251
key, lines, parent_key)
343
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
344
key, lines, parent_key)
253
346
_apply_parent_annotations(annotations, parent_annotations,
256
349
def _update_from_other_parents(self, key, annotations, lines,
257
350
this_annotation, parent_key):
258
351
"""Reannotate this text relative to a second (or more) parent."""
260
matching_blocks) = self._get_parent_annotations_and_matches(
261
key, lines, parent_key)
262
_merge_annotations(this_annotation, annotations, parent_annotations,
263
matching_blocks, self._ann_tuple_cache)
352
cdef Py_ssize_t parent_idx, ann_idx, lines_idx, match_len, idx
354
cdef PyObject *ann_temp, *par_temp
355
parent_annotations, matching_blocks = self._get_parent_annotations_and_matches(
356
key, lines, parent_key)
357
_check_annotations_are_lists(annotations, parent_annotations)
361
cache = self._ann_tuple_cache
362
for parent_idx, lines_idx, match_len in matching_blocks:
363
_check_match_ranges(parent_annotations, annotations,
364
parent_idx, lines_idx, match_len)
365
# For lines which match this parent, we will now resolve whether
366
# this parent wins over the current annotation
367
for idx from 0 <= idx < match_len:
368
ann_idx = lines_idx + idx
369
ann_temp = PyList_GET_ITEM(annotations, ann_idx)
370
par_temp = PyList_GET_ITEM(parent_annotations, parent_idx + idx)
371
if (ann_temp == par_temp):
372
# This is parent, do nothing
373
# Pointer comparison is fine here. Value comparison would
374
# be ok, but it will be handled in the final if clause by
375
# merging the two tuples into the same tuple
376
# Avoiding the Py_INCREF by using pointer comparison drops
377
# timing from 215ms => 125ms
379
par_ann = <object>par_temp
380
ann = <object>ann_temp
381
if (ann is this_annotation):
382
# Originally claimed 'this', but it was really in this
385
PyList_SetItem(annotations, ann_idx, par_ann)
387
# Resolve the fact that both sides have a different value for
389
if (ann is last_ann and par_ann is last_parent):
391
PyList_SetItem(annotations, ann_idx, last_res)
393
new_ann = _combine_annotations(ann, par_ann, cache)
395
PyList_SetItem(annotations, ann_idx, new_ann)
397
last_parent = par_ann
400
def _record_annotation(self, key, parent_keys, annotations):
401
self._annotations_cache[key] = annotations
402
for parent_key in parent_keys:
403
num = self._num_needed_children[parent_key]
406
del self._text_cache[parent_key]
407
del self._annotations_cache[parent_key]
408
# Do we want to clean up _num_needed_children at this point as
410
self._num_needed_children[parent_key] = num
412
def _annotate_one(self, key, text, num_lines):
413
this_annotation = (key,)
414
# Note: annotations will be mutated by calls to _update_from*
415
annotations = [this_annotation] * num_lines
416
parent_keys = self._parent_map[key]
418
self._update_from_one_parent(key, annotations, text, parent_keys[0])
419
for parent in parent_keys[1:]:
420
self._update_from_other_parents(key, annotations, text,
421
this_annotation, parent)
422
self._record_annotation(key, parent_keys, annotations)
424
def add_special_text(self, key, parent_keys, text):
425
"""Add a specific text to the graph."""
426
self._parent_map[key] = parent_keys
427
self._text_cache[key] = osutils.split_lines(text)
428
self._heads_provider = None
430
def annotate(self, key):
431
"""Return annotated fulltext for the given key."""
432
pb = ui.ui_factory.nested_progress_bar()
434
for text_key, text, num_lines in self._get_needed_texts(key, pb=pb):
435
self._annotate_one(text_key, text, num_lines)
439
annotations = self._annotations_cache[key]
441
raise errors.RevisionNotPresent(key, self._vf)
442
return annotations, self._text_cache[key]
444
def _get_heads_provider(self):
445
if self._heads_provider is None:
446
self._heads_provider = _mod_graph.KnownGraph(self._parent_map)
447
return self._heads_provider
265
449
def annotate_flat(self, key):
266
450
"""Determine the single-best-revision to source for each line.