63
63
cdef object counter
64
64
cdef object text_cache
66
67
cdef object stream_len
69
cdef int stream_is_consumed
69
def __init__(self, stream, text_cache, stream_len, pb=None):
72
def __init__(self, stream, text_cache, stream_len, ann_keys, pb=None):
71
74
self.stream = stream
72
75
self.stream_len = stream_len
73
76
self.text_cache = text_cache
74
77
self.stream_len = stream_len
78
self.ann_keys = list(ann_keys)
80
self.stream_is_consumed = 0
77
83
def __iter__(self):
86
cdef _get_ann_text(self):
87
if self.ann_key_pos >= len(self.ann_keys):
89
key = self.ann_keys[self.ann_key_pos]
90
self.ann_key_pos = self.ann_key_pos + 1
91
lines = self.text_cache[key]
92
num_lines = len(lines)
93
return key, lines, num_lines
80
95
def __next__(self):
81
record = self.stream.next()
96
if self.stream_is_consumed:
97
return self._get_ann_text()
99
record = self.stream.next()
100
except StopIteration:
101
self.stream_is_consumed = 1
102
return self._get_ann_text()
82
103
if self.pb is not None:
83
104
self.pb.update('extracting', self.counter, self.stream_len)
105
if record.storage_kind == 'absent':
106
raise errors.RevisionNotPresent(record.key, None)
84
107
self.counter = self.counter + 1
85
108
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
86
109
num_lines = len(lines)
222
245
self._heads_provider = None
223
246
self._ann_tuple_cache = {}
249
def _update_needed_children(self, key, parent_keys):
250
for parent_key in parent_keys:
251
if parent_key in self._num_needed_children:
252
self._num_needed_children[parent_key] += 1
254
self._num_needed_children[parent_key] = 1
225
256
def _get_needed_keys(self, key):
226
graph = _mod_graph.Graph(self._vf)
257
"""Determine the texts we need to get from the backing vf.
259
:return: (vf_keys_needed, ann_keys_needed)
260
vf_keys_needed These are keys that we need to get from the vf
261
ann_keys_needed Texts which we have in self._text_cache but we
262
don't have annotations for. We need to yield these
263
in the proper order so that we can get proper
266
parent_map = self._parent_map
228
267
# We need 1 extra copy of the node we will be looking at when we are
230
269
self._num_needed_children[key] = 1
231
for key, parent_keys in graph.iter_ancestry([key]):
232
if parent_keys is None:
234
parent_map[key] = parent_keys
235
for parent_key in parent_keys:
236
if parent_key in self._num_needed_children:
237
self._num_needed_children[parent_key] += 1
270
vf_keys_needed = set()
271
ann_keys_needed = set()
272
needed_keys = set([key])
276
for key in needed_keys:
277
if key in self._parent_map:
278
# We don't need to lookup this key in the vf
279
if key not in self._text_cache:
280
# Extract this text from the vf
281
vf_keys_needed.add(key)
282
elif key not in self._annotations_cache:
283
# We do need to annotate
284
ann_keys_needed.add(key)
285
next_parent_map[key] = self._parent_map[key]
239
self._num_needed_children[parent_key] = 1
240
self._parent_map.update(parent_map)
241
# _heads_provider does some graph caching, so it is only valid while
242
# self._parent_map hasn't changed
243
self._heads_provider = None
244
keys = parent_map.keys()
287
parent_lookup.append(key)
288
vf_keys_needed.add(key)
290
next_parent_map.update(self._vf.get_parent_map(parent_lookup))
291
for key, parent_keys in next_parent_map.iteritems():
292
self._update_needed_children(key, parent_keys)
293
for key in parent_keys:
294
if key not in parent_map:
296
parent_map.update(next_parent_map)
297
# _heads_provider does some graph caching, so it is only valid while
298
# self._parent_map hasn't changed
299
self._heads_provider = None
300
return vf_keys_needed, ann_keys_needed
247
302
def _get_needed_texts(self, key, pb=None):
248
303
"""Get the texts we need to properly annotate key.
253
308
matcher object we are using. Currently it is always 'lines' but
254
309
future improvements may change this to a simple text string.
256
keys = self._get_needed_keys(key)
311
keys, ann_keys = self._get_needed_keys(key)
257
312
if pb is not None:
258
313
pb.update('getting stream', 0, len(keys))
259
314
stream = self._vf.get_record_stream(keys, 'topological', True)
260
iterator = _NeededTextIterator(stream, self._text_cache, len(keys), pb)
315
iterator = _NeededTextIterator(stream, self._text_cache, len(keys),
263
319
def _get_parent_annotations_and_matches(self, key, text, parent_key):
365
421
def add_special_text(self, key, parent_keys, text):
366
422
"""Add a specific text to the graph."""
423
self._parent_map[key] = parent_keys
424
self._text_cache[key] = osutils.split_lines(text)
425
self._heads_provider = None
368
427
def annotate(self, key):
369
428
"""Return annotated fulltext for the given key."""