3305
3305
return iter(annotator.annotate(revision_id))
3308
class _KnitAnnotator(object):
3308
class _KnitAnnotator(annotate.Annotator):
3309
3309
"""Build up the annotations for a text."""
3311
def __init__(self, knit):
3314
# Content objects, differs from fulltexts because of how final newlines
3315
# are treated by knits. the content objects here will always have a
3317
self._fulltext_contents = {}
3319
# Annotated lines of specific revisions
3320
self._annotated_lines = {}
3322
# Track the raw data for nodes that we could not process yet.
3323
# This maps the revision_id of the base to a list of children that will
3324
# annotated from it.
3325
self._pending_children = {}
3327
# Nodes which cannot be extracted
3328
self._ghosts = set()
3330
# Track how many children this node has, so we know if we need to keep
3332
self._annotate_children = {}
3333
self._compression_children = {}
3311
def __init__(self, vf):
3312
annotate.Annotator.__init__(self, vf)
3314
# TODO: handle Nodes which cannot be extracted
3315
# self._ghosts = set()
3317
# Map from revision_id => left_matching_blocks, should be 'use once'
3318
self._left_matching_blocks = {}
3320
# KnitContent objects
3321
self._content_objects = {}
3322
# The number of children that depend on this fulltext content object
3323
self._num_compression_children = {}
3335
3325
self._all_build_details = {}
3336
# The children => parent revision_id graph
3337
self._revision_id_graph = {}
3339
self._heads_provider = None
3341
self._nodes_to_keep_annotations = set()
3342
self._generations_until_keep = 100
3344
def set_generations_until_keep(self, value):
3345
"""Set the number of generations before caching a node.
3347
Setting this to -1 will cache every merge node, setting this higher
3348
will cache fewer nodes.
3350
self._generations_until_keep = value
3352
def _add_fulltext_content(self, revision_id, content_obj):
3353
self._fulltext_contents[revision_id] = content_obj
3354
# TODO: jam 20080305 It might be good to check the sha1digest here
3355
return content_obj.text()
3357
def _check_parents(self, child, nodes_to_annotate):
3358
"""Check if all parents have been processed.
3360
:param child: A tuple of (rev_id, parents, raw_content)
3361
:param nodes_to_annotate: If child is ready, add it to
3362
nodes_to_annotate, otherwise put it back in self._pending_children
3364
for parent_id in child[1]:
3365
if (parent_id not in self._annotated_lines):
3366
# This parent is present, but another parent is missing
3367
self._pending_children.setdefault(parent_id,
3371
# This one is ready to be processed
3372
nodes_to_annotate.append(child)
3374
def _add_annotation(self, revision_id, fulltext, parent_ids,
3375
left_matching_blocks=None):
3376
"""Add an annotation entry.
3378
All parents should already have been annotated.
3379
:return: A list of children that now have their parents satisfied.
3381
a = self._annotated_lines
3382
annotated_parent_lines = [a[p] for p in parent_ids]
3383
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
3384
fulltext, revision_id, left_matching_blocks,
3385
heads_provider=self._get_heads_provider()))
3386
self._annotated_lines[revision_id] = annotated_lines
3387
for p in parent_ids:
3388
ann_children = self._annotate_children[p]
3389
ann_children.remove(revision_id)
3390
if (not ann_children
3391
and p not in self._nodes_to_keep_annotations):
3392
del self._annotated_lines[p]
3393
del self._all_build_details[p]
3394
if p in self._fulltext_contents:
3395
del self._fulltext_contents[p]
3396
# Now that we've added this one, see if there are any pending
3397
# deltas to be done, certainly this parent is finished
3398
nodes_to_annotate = []
3399
for child in self._pending_children.pop(revision_id, []):
3400
self._check_parents(child, nodes_to_annotate)
3401
return nodes_to_annotate
3403
3327
def _get_build_graph(self, key):
3404
3328
"""Get the graphs for building texts and annotations.
3423
3344
# get all pending nodes
3424
3345
generation += 1
3425
3346
this_iteration = pending
3426
build_details = self._knit._index.get_build_details(this_iteration)
3347
build_details = self._vf._index.get_build_details(this_iteration)
3427
3348
self._all_build_details.update(build_details)
3428
# new_nodes = self._knit._index._get_entries(this_iteration)
3349
# new_nodes = self._vf._index._get_entries(this_iteration)
3429
3350
pending = set()
3430
3351
for key, details in build_details.iteritems():
3431
(index_memo, compression_parent, parents,
3352
(index_memo, compression_parent, parent_keys,
3432
3353
record_details) = details
3433
self._revision_id_graph[key] = parents
3354
self._parent_map[key] = parent_keys
3434
3355
records.append((key, index_memo))
3435
3356
# Do we actually need to check _annotated_lines?
3436
pending.update(p for p in parents
3357
pending.update(p for p in parent_keys
3437
3358
if p not in self._all_build_details)
3438
if compression_parent:
3439
self._compression_children.setdefault(compression_parent,
3442
for parent in parents:
3443
self._annotate_children.setdefault(parent,
3445
num_gens = generation - kept_generation
3446
if ((num_gens >= self._generations_until_keep)
3447
and len(parents) > 1):
3448
kept_generation = generation
3449
self._nodes_to_keep_annotations.add(key)
3360
for parent_key in parent_keys:
3361
if parent_key in self._num_needed_children:
3362
self._num_needed_children[parent_key] += 1
3364
self._num_needed_children[parent_key] = 1
3451
3366
missing_versions = this_iteration.difference(build_details.keys())
3452
self._ghosts.update(missing_versions)
3453
for missing_version in missing_versions:
3454
# add a key, no parents
3455
self._revision_id_graph[missing_version] = ()
3456
pending.discard(missing_version) # don't look for it
3457
if self._ghosts.intersection(self._compression_children):
3459
"We cannot have nodes which have a ghost compression parent:\n"
3461
"compression children: %r"
3462
% (self._ghosts, self._compression_children))
3463
# Cleanout anything that depends on a ghost so that we don't wait for
3464
# the ghost to show up
3465
for node in self._ghosts:
3466
if node in self._annotate_children:
3467
# We won't be building this node
3468
del self._annotate_children[node]
3367
if missing_versions:
3368
raise ValueError('i dont handle ghosts')
3469
3369
# Generally we will want to read the records in reverse order, because
3470
3370
# we find the parent nodes after the children
3471
3371
records.reverse()
3474
def _annotate_records(self, records):
3475
"""Build the annotations for the listed records."""
3374
def _get_needed_texts(self, key, pb=None):
3375
if len(self._vf._fallback_vfs) > 0:
3376
# If we have fallbacks, go to the generic path
3377
for v in super(_KnitAnnotator, self)._get_needed_texts(key, pb=pb):
3381
records = self._get_build_graph(key)
3382
for key, text, num_lines in self._extract_texts(records):
3383
yield key, text, num_lines
3384
except errors.RetryWithNewPacks, e:
3385
self._vf._access.reload_or_raise(e)
3386
# The cached build_details are no longer valid
3387
self._all_build_details.clear()
3389
def _extract_texts(self, records):
3390
"""Extract the various texts needed based on records"""
3476
3391
# We iterate in the order read, rather than a strict order requested
3477
3392
# However, process what we can, and put off to the side things that
3478
3393
# still need parents, cleaning them up when those parents are
3480
for (rev_id, record,
3481
digest) in self._knit._read_records_iter(records):
3482
if rev_id in self._annotated_lines:
3484
parent_ids = self._revision_id_graph[rev_id]
3485
parent_ids = [p for p in parent_ids if p not in self._ghosts]
3486
details = self._all_build_details[rev_id]
3487
(index_memo, compression_parent, parents,
3395
# Children that we want to annotate as soon as we get the parent text
3396
# Map from parent_key => [child_key]
3397
pending_annotation = {}
3398
# Children that are missing their compression parent
3401
digest) in self._vf._read_records_iter(records):
3402
# parent_ids = [p for p in parent_ids if p not in self._ghosts]
3403
details = self._all_build_details[keys]
3404
(index_memo, compression_parent, parent_keys,
3488
3405
record_details) = details
3489
nodes_to_annotate = []
3406
assert parent_keys == self._parent_map[key]
3490
3407
# TODO: Remove the punning between compression parents, and
3491
3408
# parent_ids, we should be able to do this without assuming
3492
3409
# the build order
3493
if len(parent_ids) == 0:
3494
# There are no parents for this node, so just add it
3495
# TODO: This probably needs to be decoupled
3496
fulltext_content, delta = self._knit._factory.parse_record(
3410
if compression_parent:
3411
# This is a delta, do we have the parent fulltext?
3412
if compression_parent not in self._fulltext_contents:
3413
# Waiting for the parent
3414
pending_deltas.setdefault(compression_parent, []).append(
3415
(key, parent_keys, record, record_details))
3417
# We can build this as a fulltext
3418
parent_content = self._content_objects[compression_parent]
3419
content, delta = self._vf._factory.parse_record(
3420
key, record, record_details,
3422
# TODO: track when we can copy the base
3423
copy_base_content=True)
3425
# No compression parent means we have a fulltext
3426
content, delta = self._vf._factory.parse_record(
3497
3427
rev_id, record, record_details, None)
3498
fulltext = self._add_fulltext_content(rev_id, fulltext_content)
3499
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
3500
parent_ids, left_matching_blocks=None))
3502
child = (rev_id, parent_ids, record)
3503
# Check if all the parents are present
3504
self._check_parents(child, nodes_to_annotate)
3505
while nodes_to_annotate:
3506
# Should we use a queue here instead of a stack?
3507
(rev_id, parent_ids, record) = nodes_to_annotate.pop()
3508
(index_memo, compression_parent, parents,
3509
record_details) = self._all_build_details[rev_id]
3511
if compression_parent is not None:
3512
comp_children = self._compression_children[compression_parent]
3513
if rev_id not in comp_children:
3514
raise AssertionError("%r not in compression children %r"
3515
% (rev_id, comp_children))
3516
# If there is only 1 child, it is safe to reuse this
3518
reuse_content = (len(comp_children) == 1
3519
and compression_parent not in
3520
self._nodes_to_keep_annotations)
3522
# Remove it from the cache since it will be changing
3523
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
3524
# Make sure to copy the fulltext since it might be
3526
parent_fulltext = list(parent_fulltext_content.text())
3428
self._content_objects[key] = content
3429
to_process = [(key, parent_keys, content)]
3430
# Check for compression children that we can expand
3431
if key in pending_deltas:
3432
children = pending_deltas.pop(key)
3433
for child_key, child_parent_keys, child_record, child_details in children:
3434
child_content, child_delta = self._vf._factory.parse_record(
3435
child_key, child_record, child_details,
3437
# TODO: track when we can copy the base
3438
copy_base_content=True)
3439
self._content_objects[child_key] = child_content
3440
to_process.append((child_key, child_parent_keys, child_content))
3444
for key, parent_keys, content in cur:
3445
# Check if all parents are present
3446
for parent_key in parent_keys:
3447
if parent_key not in self._annotations_cache:
3448
# still waiting on a parent text
3449
pending_annotation.setdefault(parent_key,
3450
[]).append((key, parent_keys, content))
3528
parent_fulltext_content = self._fulltext_contents[compression_parent]
3529
parent_fulltext = parent_fulltext_content.text()
3530
comp_children.remove(rev_id)
3531
fulltext_content, delta = self._knit._factory.parse_record(
3532
rev_id, record, record_details,
3533
parent_fulltext_content,
3534
copy_base_content=(not reuse_content))
3535
fulltext = self._add_fulltext_content(rev_id,
3537
if compression_parent == parent_ids[0]:
3538
# the compression_parent is the left parent, so we can
3540
blocks = KnitContent.get_line_delta_blocks(delta,
3541
parent_fulltext, fulltext)
3543
fulltext_content = self._knit._factory.parse_fulltext(
3545
fulltext = self._add_fulltext_content(rev_id,
3547
nodes_to_annotate.extend(
3548
self._add_annotation(rev_id, fulltext, parent_ids,
3549
left_matching_blocks=blocks))
3551
def _get_heads_provider(self):
3552
"""Create a heads provider for resolving ancestry issues."""
3553
if self._heads_provider is not None:
3554
return self._heads_provider
3555
self._heads_provider = _mod_graph.KnownGraph(self._revision_id_graph)
3556
return self._heads_provider
3453
# All parents present
3454
lines = content.text()
3455
yield key, lines, len(lines)
3456
if key in pending_annotation:
3457
to_process.extend(pending_annotation.pop(key))
3558
3459
def annotate(self, key):
3559
3460
"""Return the annotated fulltext at the given key.
3561
3462
:param key: The key to annotate.
3563
if len(self._knit._fallback_vfs) > 0:
3464
if len(self._vf._fallback_vfs) > 0:
3564
3465
# stacked knits can't use the fast path at present.
3565
3466
return self._simple_annotate(key)
3568
3469
records = self._get_build_graph(key)
3569
3470
if key in self._ghosts:
3570
raise errors.RevisionNotPresent(key, self._knit)
3471
raise errors.RevisionNotPresent(key, self._vf)
3571
3472
self._annotate_records(records)
3572
3473
return self._annotated_lines[key]
3573
3474
except errors.RetryWithNewPacks, e:
3574
self._knit._access.reload_or_raise(e)
3475
self._vf._access.reload_or_raise(e)
3575
3476
# The cached build_details are no longer valid
3576
3477
self._all_build_details.clear()
3578
def _simple_annotate(self, key):
3579
"""Return annotated fulltext, rediffing from the full texts.
3581
This is slow but makes no assumptions about the repository
3582
being able to produce line deltas.
3584
# TODO: this code generates a parent maps of present ancestors; it
3585
# could be split out into a separate method
3586
# -- mbp and robertc 20080704
3587
graph = _mod_graph.Graph(self._knit)
3588
parent_map = dict((k, v) for k, v in graph.iter_ancestry([key])
3591
raise errors.RevisionNotPresent(key, self)
3592
keys = parent_map.keys()
3593
heads_provider = _mod_graph.KnownGraph(parent_map)
3595
reannotate = annotate.reannotate
3596
for record in self._knit.get_record_stream(keys, 'topological', True):
3598
fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
3599
parents = parent_map[key]
3600
if parents is not None:
3601
parent_lines = [parent_cache[parent] for parent in parent_map[key]]
3604
parent_cache[key] = list(
3605
reannotate(parent_lines, fulltext, key, None, heads_provider))
3607
return parent_cache[key]
3609
raise errors.RevisionNotPresent(key, self._knit)
3613
3481
from bzrlib._knit_load_data_c import _load_data_c as _load_data