/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

  • Committer: John Arbash Meinel
  • Date: 2009-06-18 22:07:42 UTC
  • mto: This revision was merged to the branch mainline in revision 4522.
  • Revision ID: john@arbash-meinel.com-20090618220742-10czy03gxzh337fl
Initial attempt at refactoring _KnitAnnotator to derive from Annotator.

So far it seems possible, by just overriding _get_needed_texts.
However, we'll also want to override the get_matching_blocks stuff based on the delta.

Show diffs side-by-side

added added

removed removed

Lines of Context:
3305
3305
    return iter(annotator.annotate(revision_id))
3306
3306
 
3307
3307
 
3308
 
class _KnitAnnotator(object):
 
3308
class _KnitAnnotator(annotate.Annotator):
3309
3309
    """Build up the annotations for a text."""
3310
3310
 
3311
 
    def __init__(self, knit):
3312
 
        self._knit = knit
3313
 
 
3314
 
        # Content objects, differs from fulltexts because of how final newlines
3315
 
        # are treated by knits. the content objects here will always have a
3316
 
        # final newline
3317
 
        self._fulltext_contents = {}
3318
 
 
3319
 
        # Annotated lines of specific revisions
3320
 
        self._annotated_lines = {}
3321
 
 
3322
 
        # Track the raw data for nodes that we could not process yet.
3323
 
        # This maps the revision_id of the base to a list of children that will
3324
 
        # annotated from it.
3325
 
        self._pending_children = {}
3326
 
 
3327
 
        # Nodes which cannot be extracted
3328
 
        self._ghosts = set()
3329
 
 
3330
 
        # Track how many children this node has, so we know if we need to keep
3331
 
        # it
3332
 
        self._annotate_children = {}
3333
 
        self._compression_children = {}
 
3311
    def __init__(self, vf):
 
3312
        annotate.Annotator.__init__(self, vf)
 
3313
 
 
3314
        # TODO: handle Nodes which cannot be extracted
 
3315
        # self._ghosts = set()
 
3316
 
 
3317
        # Map from revision_id => left_matching_blocks, should be 'use once'
 
3318
        self._left_matching_blocks = {}
 
3319
 
 
3320
        # KnitContent objects
 
3321
        self._content_objects = {}
 
3322
        # The number of children that depend on this fulltext content object
 
3323
        self._num_compression_children = {}
3334
3324
 
3335
3325
        self._all_build_details = {}
3336
 
        # The children => parent revision_id graph
3337
 
        self._revision_id_graph = {}
3338
 
 
3339
 
        self._heads_provider = None
3340
 
 
3341
 
        self._nodes_to_keep_annotations = set()
3342
 
        self._generations_until_keep = 100
3343
 
 
3344
 
    def set_generations_until_keep(self, value):
3345
 
        """Set the number of generations before caching a node.
3346
 
 
3347
 
        Setting this to -1 will cache every merge node, setting this higher
3348
 
        will cache fewer nodes.
3349
 
        """
3350
 
        self._generations_until_keep = value
3351
 
 
3352
 
    def _add_fulltext_content(self, revision_id, content_obj):
3353
 
        self._fulltext_contents[revision_id] = content_obj
3354
 
        # TODO: jam 20080305 It might be good to check the sha1digest here
3355
 
        return content_obj.text()
3356
 
 
3357
 
    def _check_parents(self, child, nodes_to_annotate):
3358
 
        """Check if all parents have been processed.
3359
 
 
3360
 
        :param child: A tuple of (rev_id, parents, raw_content)
3361
 
        :param nodes_to_annotate: If child is ready, add it to
3362
 
            nodes_to_annotate, otherwise put it back in self._pending_children
3363
 
        """
3364
 
        for parent_id in child[1]:
3365
 
            if (parent_id not in self._annotated_lines):
3366
 
                # This parent is present, but another parent is missing
3367
 
                self._pending_children.setdefault(parent_id,
3368
 
                                                  []).append(child)
3369
 
                break
3370
 
        else:
3371
 
            # This one is ready to be processed
3372
 
            nodes_to_annotate.append(child)
3373
 
 
3374
 
    def _add_annotation(self, revision_id, fulltext, parent_ids,
3375
 
                        left_matching_blocks=None):
3376
 
        """Add an annotation entry.
3377
 
 
3378
 
        All parents should already have been annotated.
3379
 
        :return: A list of children that now have their parents satisfied.
3380
 
        """
3381
 
        a = self._annotated_lines
3382
 
        annotated_parent_lines = [a[p] for p in parent_ids]
3383
 
        annotated_lines = list(annotate.reannotate(annotated_parent_lines,
3384
 
            fulltext, revision_id, left_matching_blocks,
3385
 
            heads_provider=self._get_heads_provider()))
3386
 
        self._annotated_lines[revision_id] = annotated_lines
3387
 
        for p in parent_ids:
3388
 
            ann_children = self._annotate_children[p]
3389
 
            ann_children.remove(revision_id)
3390
 
            if (not ann_children
3391
 
                and p not in self._nodes_to_keep_annotations):
3392
 
                del self._annotated_lines[p]
3393
 
                del self._all_build_details[p]
3394
 
                if p in self._fulltext_contents:
3395
 
                    del self._fulltext_contents[p]
3396
 
        # Now that we've added this one, see if there are any pending
3397
 
        # deltas to be done, certainly this parent is finished
3398
 
        nodes_to_annotate = []
3399
 
        for child in self._pending_children.pop(revision_id, []):
3400
 
            self._check_parents(child, nodes_to_annotate)
3401
 
        return nodes_to_annotate
3402
3326
 
3403
3327
    def _get_build_graph(self, key):
3404
3328
        """Get the graphs for building texts and annotations.
3412
3336
            passing to read_records_iter to start reading in the raw data from
3413
3337
            the pack file.
3414
3338
        """
3415
 
        if key in self._annotated_lines:
3416
 
            # Nothing to do
3417
 
            return []
3418
3339
        pending = set([key])
3419
3340
        records = []
3420
3341
        generation = 0
3423
3344
            # get all pending nodes
3424
3345
            generation += 1
3425
3346
            this_iteration = pending
3426
 
            build_details = self._knit._index.get_build_details(this_iteration)
 
3347
            build_details = self._vf._index.get_build_details(this_iteration)
3427
3348
            self._all_build_details.update(build_details)
3428
 
            # new_nodes = self._knit._index._get_entries(this_iteration)
 
3349
            # new_nodes = self._vf._index._get_entries(this_iteration)
3429
3350
            pending = set()
3430
3351
            for key, details in build_details.iteritems():
3431
 
                (index_memo, compression_parent, parents,
 
3352
                (index_memo, compression_parent, parent_keys,
3432
3353
                 record_details) = details
3433
 
                self._revision_id_graph[key] = parents
 
3354
                self._parent_map[key] = parent_keys
3434
3355
                records.append((key, index_memo))
3435
3356
                # Do we actually need to check _annotated_lines?
3436
 
                pending.update(p for p in parents
 
3357
                pending.update(p for p in parent_keys
3437
3358
                                 if p not in self._all_build_details)
3438
 
                if compression_parent:
3439
 
                    self._compression_children.setdefault(compression_parent,
3440
 
                        []).append(key)
3441
 
                if parents:
3442
 
                    for parent in parents:
3443
 
                        self._annotate_children.setdefault(parent,
3444
 
                            []).append(key)
3445
 
                    num_gens = generation - kept_generation
3446
 
                    if ((num_gens >= self._generations_until_keep)
3447
 
                        and len(parents) > 1):
3448
 
                        kept_generation = generation
3449
 
                        self._nodes_to_keep_annotations.add(key)
 
3359
                if parent_keys:
 
3360
                    for parent_key in parent_keys:
 
3361
                        if parent_key in self._num_needed_children:
 
3362
                            self._num_needed_children[parent_key] += 1
 
3363
                        else:
 
3364
                            self._num_needed_children[parent_key] = 1
3450
3365
 
3451
3366
            missing_versions = this_iteration.difference(build_details.keys())
3452
 
            self._ghosts.update(missing_versions)
3453
 
            for missing_version in missing_versions:
3454
 
                # add a key, no parents
3455
 
                self._revision_id_graph[missing_version] = ()
3456
 
                pending.discard(missing_version) # don't look for it
3457
 
        if self._ghosts.intersection(self._compression_children):
3458
 
            raise KnitCorrupt(
3459
 
                "We cannot have nodes which have a ghost compression parent:\n"
3460
 
                "ghosts: %r\n"
3461
 
                "compression children: %r"
3462
 
                % (self._ghosts, self._compression_children))
3463
 
        # Cleanout anything that depends on a ghost so that we don't wait for
3464
 
        # the ghost to show up
3465
 
        for node in self._ghosts:
3466
 
            if node in self._annotate_children:
3467
 
                # We won't be building this node
3468
 
                del self._annotate_children[node]
 
3367
            if missing_versions:
 
3368
                raise ValueError('i dont handle ghosts')
3469
3369
        # Generally we will want to read the records in reverse order, because
3470
3370
        # we find the parent nodes after the children
3471
3371
        records.reverse()
3472
3372
        return records
3473
3373
 
3474
 
    def _annotate_records(self, records):
3475
 
        """Build the annotations for the listed records."""
 
3374
    def _get_needed_texts(self, key, pb=None):
 
3375
        if len(self._vf._fallback_vfs) > 0:
 
3376
            # If we have fallbacks, go to the generic path
 
3377
            for v in super(_KnitAnnotator, self)._get_needed_texts(key, pb=pb):
 
3378
                yield v
 
3379
        while True:
 
3380
            try:
 
3381
                records = self._get_build_graph(key)
 
3382
                for key, text, num_lines in self._extract_texts(records):
 
3383
                    yield key, text, num_lines
 
3384
            except errors.RetryWithNewPacks, e:
 
3385
                self._vf._access.reload_or_raise(e)
 
3386
                # The cached build_details are no longer valid
 
3387
                self._all_build_details.clear()
 
3388
 
 
3389
    def _extract_texts(self, records):
 
3390
        """Extract the various texts needed based on records"""
3476
3391
        # We iterate in the order read, rather than a strict order requested
3477
3392
        # However, process what we can, and put off to the side things that
3478
3393
        # still need parents, cleaning them up when those parents are
3479
3394
        # processed.
3480
 
        for (rev_id, record,
3481
 
             digest) in self._knit._read_records_iter(records):
3482
 
            if rev_id in self._annotated_lines:
3483
 
                continue
3484
 
            parent_ids = self._revision_id_graph[rev_id]
3485
 
            parent_ids = [p for p in parent_ids if p not in self._ghosts]
3486
 
            details = self._all_build_details[rev_id]
3487
 
            (index_memo, compression_parent, parents,
 
3395
        # Children that we want to annotate as soon as we get the parent text
 
3396
        # Map from parent_key => [child_key]
 
3397
        pending_annotation = {}
 
3398
        # Children that are missing their compression parent
 
3399
        pending_deltas = {}
 
3400
        for (key, record,
 
3401
             digest) in self._vf._read_records_iter(records):
 
3402
            # parent_ids = [p for p in parent_ids if p not in self._ghosts]
 
3403
            details = self._all_build_details[keys]
 
3404
            (index_memo, compression_parent, parent_keys,
3488
3405
             record_details) = details
3489
 
            nodes_to_annotate = []
 
3406
            assert parent_keys == self._parent_map[key]
3490
3407
            # TODO: Remove the punning between compression parents, and
3491
3408
            #       parent_ids, we should be able to do this without assuming
3492
3409
            #       the build order
3493
 
            if len(parent_ids) == 0:
3494
 
                # There are no parents for this node, so just add it
3495
 
                # TODO: This probably needs to be decoupled
3496
 
                fulltext_content, delta = self._knit._factory.parse_record(
 
3410
            if compression_parent:
 
3411
                # This is a delta, do we have the parent fulltext?
 
3412
                if compression_parent not in self._fulltext_contents:
 
3413
                    # Waiting for the parent
 
3414
                    pending_deltas.setdefault(compression_parent, []).append(
 
3415
                        (key, parent_keys, record, record_details))
 
3416
                    continue
 
3417
                # We can build this as a fulltext
 
3418
                parent_content = self._content_objects[compression_parent]
 
3419
                content, delta = self._vf._factory.parse_record(
 
3420
                    key, record, record_details,
 
3421
                    parent_content,
 
3422
                    # TODO: track when we can copy the base
 
3423
                    copy_base_content=True)
 
3424
            else:
 
3425
                # No compression parent means we have a fulltext
 
3426
                content, delta = self._vf._factory.parse_record(
3497
3427
                    rev_id, record, record_details, None)
3498
 
                fulltext = self._add_fulltext_content(rev_id, fulltext_content)
3499
 
                nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
3500
 
                    parent_ids, left_matching_blocks=None))
3501
 
            else:
3502
 
                child = (rev_id, parent_ids, record)
3503
 
                # Check if all the parents are present
3504
 
                self._check_parents(child, nodes_to_annotate)
3505
 
            while nodes_to_annotate:
3506
 
                # Should we use a queue here instead of a stack?
3507
 
                (rev_id, parent_ids, record) = nodes_to_annotate.pop()
3508
 
                (index_memo, compression_parent, parents,
3509
 
                 record_details) = self._all_build_details[rev_id]
3510
 
                blocks = None
3511
 
                if compression_parent is not None:
3512
 
                    comp_children = self._compression_children[compression_parent]
3513
 
                    if rev_id not in comp_children:
3514
 
                        raise AssertionError("%r not in compression children %r"
3515
 
                            % (rev_id, comp_children))
3516
 
                    # If there is only 1 child, it is safe to reuse this
3517
 
                    # content
3518
 
                    reuse_content = (len(comp_children) == 1
3519
 
                        and compression_parent not in
3520
 
                            self._nodes_to_keep_annotations)
3521
 
                    if reuse_content:
3522
 
                        # Remove it from the cache since it will be changing
3523
 
                        parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
3524
 
                        # Make sure to copy the fulltext since it might be
3525
 
                        # modified
3526
 
                        parent_fulltext = list(parent_fulltext_content.text())
 
3428
            self._content_objects[key] = content
 
3429
            to_process = [(key, parent_keys, content)]
 
3430
            # Check for compression children that we can expand
 
3431
            if key in pending_deltas:
 
3432
                children = pending_deltas.pop(key)
 
3433
                for child_key, child_parent_keys, child_record, child_details in children:
 
3434
                    child_content, child_delta = self._vf._factory.parse_record(
 
3435
                        child_key, child_record, child_details,
 
3436
                        content,
 
3437
                        # TODO: track when we can copy the base
 
3438
                        copy_base_content=True)
 
3439
                    self._content_objects[child_key] = child_content
 
3440
                    to_process.append((child_key, child_parent_keys, child_content))
 
3441
            while to_process:
 
3442
                cur = to_process
 
3443
                to_process = []
 
3444
                for key, parent_keys, content in cur:
 
3445
                    # Check if all parents are present
 
3446
                    for parent_key in parent_keys:
 
3447
                        if parent_key not in self._annotations_cache:
 
3448
                            # still waiting on a parent text
 
3449
                            pending_annotation.setdefault(parent_key,
 
3450
                                []).append((key, parent_keys, content))
 
3451
                            break
3527
3452
                    else:
3528
 
                        parent_fulltext_content = self._fulltext_contents[compression_parent]
3529
 
                        parent_fulltext = parent_fulltext_content.text()
3530
 
                    comp_children.remove(rev_id)
3531
 
                    fulltext_content, delta = self._knit._factory.parse_record(
3532
 
                        rev_id, record, record_details,
3533
 
                        parent_fulltext_content,
3534
 
                        copy_base_content=(not reuse_content))
3535
 
                    fulltext = self._add_fulltext_content(rev_id,
3536
 
                                                          fulltext_content)
3537
 
                    if compression_parent == parent_ids[0]:
3538
 
                        # the compression_parent is the left parent, so we can
3539
 
                        # re-use the delta
3540
 
                        blocks = KnitContent.get_line_delta_blocks(delta,
3541
 
                                parent_fulltext, fulltext)
3542
 
                else:
3543
 
                    fulltext_content = self._knit._factory.parse_fulltext(
3544
 
                        record, rev_id)
3545
 
                    fulltext = self._add_fulltext_content(rev_id,
3546
 
                        fulltext_content)
3547
 
                nodes_to_annotate.extend(
3548
 
                    self._add_annotation(rev_id, fulltext, parent_ids,
3549
 
                                     left_matching_blocks=blocks))
3550
 
 
3551
 
    def _get_heads_provider(self):
3552
 
        """Create a heads provider for resolving ancestry issues."""
3553
 
        if self._heads_provider is not None:
3554
 
            return self._heads_provider
3555
 
        self._heads_provider = _mod_graph.KnownGraph(self._revision_id_graph)
3556
 
        return self._heads_provider
 
3453
                        # All parents present
 
3454
                        lines = content.text()
 
3455
                        yield key, lines, len(lines)
 
3456
                        if key in pending_annotation:
 
3457
                            to_process.extend(pending_annotation.pop(key))
3557
3458
 
3558
3459
    def annotate(self, key):
3559
3460
        """Return the annotated fulltext at the given key.
3560
3461
 
3561
3462
        :param key: The key to annotate.
3562
3463
        """
3563
 
        if len(self._knit._fallback_vfs) > 0:
 
3464
        if len(self._vf._fallback_vfs) > 0:
3564
3465
            # stacked knits can't use the fast path at present.
3565
3466
            return self._simple_annotate(key)
3566
3467
        while True:
3567
3468
            try:
3568
3469
                records = self._get_build_graph(key)
3569
3470
                if key in self._ghosts:
3570
 
                    raise errors.RevisionNotPresent(key, self._knit)
 
3471
                    raise errors.RevisionNotPresent(key, self._vf)
3571
3472
                self._annotate_records(records)
3572
3473
                return self._annotated_lines[key]
3573
3474
            except errors.RetryWithNewPacks, e:
3574
 
                self._knit._access.reload_or_raise(e)
 
3475
                self._vf._access.reload_or_raise(e)
3575
3476
                # The cached build_details are no longer valid
3576
3477
                self._all_build_details.clear()
3577
3478
 
3578
 
    def _simple_annotate(self, key):
3579
 
        """Return annotated fulltext, rediffing from the full texts.
3580
 
 
3581
 
        This is slow but makes no assumptions about the repository
3582
 
        being able to produce line deltas.
3583
 
        """
3584
 
        # TODO: this code generates a parent maps of present ancestors; it
3585
 
        #       could be split out into a separate method
3586
 
        #       -- mbp and robertc 20080704
3587
 
        graph = _mod_graph.Graph(self._knit)
3588
 
        parent_map = dict((k, v) for k, v in graph.iter_ancestry([key])
3589
 
                          if v is not None)
3590
 
        if not parent_map:
3591
 
            raise errors.RevisionNotPresent(key, self)
3592
 
        keys = parent_map.keys()
3593
 
        heads_provider = _mod_graph.KnownGraph(parent_map)
3594
 
        parent_cache = {}
3595
 
        reannotate = annotate.reannotate
3596
 
        for record in self._knit.get_record_stream(keys, 'topological', True):
3597
 
            key = record.key
3598
 
            fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
3599
 
            parents = parent_map[key]
3600
 
            if parents is not None:
3601
 
                parent_lines = [parent_cache[parent] for parent in parent_map[key]]
3602
 
            else:
3603
 
                parent_lines = []
3604
 
            parent_cache[key] = list(
3605
 
                reannotate(parent_lines, fulltext, key, None, heads_provider))
3606
 
        try:
3607
 
            return parent_cache[key]
3608
 
        except KeyError, e:
3609
 
            raise errors.RevisionNotPresent(key, self._knit)
3610
 
 
3611
3479
 
3612
3480
try:
3613
3481
    from bzrlib._knit_load_data_c import _load_data_c as _load_data