/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

  • Committer: John Arbash Meinel
  • Date: 2008-02-19 18:51:28 UTC
  • mto: This revision was merged to the branch mainline in revision 3280.
  • Revision ID: john@arbash-meinel.com-20080219185128-isxp2u65v72znzyx
Refactor the annotation logic into a helper class.

Show diffs side-by-side

added added

removed removed

Lines of Context:
888
888
            build_details = self._index.get_build_details(pending_components)
889
889
            pending_components = set()
890
890
            for version_id, details in build_details.items():
891
 
                method, index_memo, compression_parent = details
 
891
                method, index_memo, compression_parent, parents = details
892
892
                if compression_parent is not None:
893
893
                    pending_components.add(compression_parent)
894
894
                component_data[version_id] = details
1045
1045
        """
1046
1046
        position_map = self._get_components_positions(version_ids)
1047
1047
        # c = component_id, m = method, i_m = index_memo, n = next
1048
 
        records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]
 
1048
        # p = parent_ids
 
1049
        records = [(c, i_m) for c, (m, i_m, n, p) in position_map.iteritems()]
1049
1050
        record_map = {}
1050
1051
        for component_id, content, digest in \
1051
1052
                self._data.read_records_iter(records):
1052
 
            method, index_memo, next = position_map[component_id]
 
1053
            method, index_memo, next, parent_ids = position_map[component_id]
1053
1054
            record_map[component_id] = method, content, digest, next
1054
1055
                          
1055
1056
        return record_map
1428
1429
        """Get the method, index_memo and compression parent for version_ids.
1429
1430
 
1430
1431
        :param version_ids: An iterable of version_ids.
1431
 
        :return: A dict of version_id:(method, index_memo, compression_parent).
 
1432
        :return: A dict of version_id:(method, index_memo, compression_parent,
 
1433
            parents).
1432
1434
        """
1433
1435
        result = {}
1434
1436
        for version_id in version_ids:
1435
1437
            method = self.get_method(version_id)
 
1438
            parents = self.get_parents_with_ghosts(version_id)
1436
1439
            if method == 'fulltext':
1437
1440
                compression_parent = None
1438
1441
            else:
1439
 
                compression_parent = self.get_parents_with_ghosts(version_id)[0]
 
1442
                compression_parent = parents[0]
1440
1443
            index_memo = self.get_position(version_id)
1441
 
            result[version_id] = (method, index_memo, compression_parent)
 
1444
            result[version_id] = (method, index_memo, compression_parent,
 
1445
                                  parents)
1442
1446
        return result
1443
1447
 
1444
1448
    def iter_parents(self, version_ids):
1708
1712
        """Get the method, index_memo and compression parent for version_ids.
1709
1713
 
1710
1714
        :param version_ids: An iterable of version_ids.
1711
 
        :return: A dict of version_id:(method, index_memo, compression_parent).
 
1715
        :return: A dict of version_id:(method, index_memo, compression_parent,
 
1716
            parents).
1712
1717
        """
1713
1718
        result = {}
1714
1719
        entries = self._get_entries(self._version_ids_to_keys(version_ids), True)
1715
1720
        for entry in entries:
1716
1721
            version_id = self._keys_to_version_ids((entry[1],))[0]
 
1722
            parents = self._keys_to_version_ids(entry[3][0])
1717
1723
            if not self._deltas:
1718
1724
                compression_parent = None
1719
1725
            else:
1728
1734
            else:
1729
1735
                method = 'fulltext'
1730
1736
            result[version_id] = (method, self._node_to_position(entry),
1731
 
                compression_parent)
 
1737
                compression_parent, parents)
1732
1738
        return result
1733
1739
 
1734
1740
    def _compression_parent(self, an_entry):
2762
2768
    def __init__(self, knit):
2763
2769
        self._knit = knit
2764
2770
 
 
2771
        # unannotated lines of various revisions, this will have the final
 
2772
        # newline correct
 
2773
        self._fulltexts = {}
 
2774
        # Content objects, differs from fulltexts because of how final newlines
 
2775
        # are treated by knits. the content objects here will always have a
 
2776
        # final newline
 
2777
        self._fulltext_contents = {}
 
2778
 
 
2779
        # Annotated lines of specific revisions
 
2780
        self._annotated_lines = {}
 
2781
 
 
2782
        # Track the raw data for nodes that we could not process yet.
 
2783
        # This maps the revision_id of the base to a list of children that will
 
2784
        # annotated from it.
 
2785
        self._pending_children = {}
 
2786
 
 
2787
        self._all_build_details = {}
 
2788
        self._revision_id_graph = {}
 
2789
 
 
2790
    def _add_fulltext_content(self, revision_id, content_obj, noeol_flag):
 
2791
        self._fulltext_contents[revision_id] = content_obj
 
2792
        if noeol_flag:
 
2793
            content_obj = content_obj.copy()
 
2794
            content_obj.strip_last_line_newline()
 
2795
        fulltext = content_obj.text()
 
2796
        self._fulltexts[revision_id] = fulltext
 
2797
        # XXX: It would probably be good to check the sha1digest here
 
2798
        return fulltext
 
2799
 
 
2800
    def _check_parents(self, child, nodes_to_annotate):
 
2801
        """Check if all parents have been processed.
 
2802
 
 
2803
        :param child: A tuple of (rev_id, parents, raw_content)
 
2804
        :param nodes_to_annotate: If child is ready, add it to
 
2805
            nodes_to_annotate, otherwise put it back in self._pending_children
 
2806
        """
 
2807
        for parent_id in child[1]:
 
2808
            if parent_id not in self._annotated_lines:
 
2809
                # This parent is present, but another parent is missing
 
2810
                self._pending_children.setdefault(parent_id,
 
2811
                                                  []).append(child)
 
2812
                break
 
2813
        else:
 
2814
            # This one is ready to be processed
 
2815
            nodes_to_annotate.append(child)
 
2816
 
 
2817
    def _add_annotation(self, revision_id, fulltext, parent_ids,
 
2818
                        left_matching_blocks=None):
 
2819
        """Add an annotation entry.
 
2820
 
 
2821
        All parents should already have been annotated.
 
2822
        :return: A list of children that now have their parents satisfied.
 
2823
        """
 
2824
        a = self._annotated_lines
 
2825
        annotated_parent_lines = [a[p] for p in parent_ids]
 
2826
        annotated_lines = list(annotate.reannotate(annotated_parent_lines,
 
2827
            fulltext, revision_id, left_matching_blocks))
 
2828
        self._annotated_lines[revision_id] = annotated_lines
 
2829
        # Now that we've added this one, see if there are any pending
 
2830
        # deltas to be done, certainly this parent is finished
 
2831
        nodes_to_annotate = []
 
2832
        for child in self._pending_children.pop(revision_id, []):
 
2833
            self._check_parents(child, nodes_to_annotate)
 
2834
        return nodes_to_annotate
 
2835
 
 
2836
    def _get_build_graph(self, revision_id):
 
2837
        """Get the graphs for building texts and annotations.
 
2838
 
 
2839
        The data you need for creating a full text may be different than the
 
2840
        data you need to annotate that text. (At a minimum, you need both
 
2841
        parents to create an annotation, but only need 1 parent to generate the
 
2842
        fulltext.)
 
2843
 
 
2844
        :return: A list of (revision_id, index_memo) records, suitable for
 
2845
            passing to read_records_iter to start reading in the raw data from
 
2846
            the pack file.
 
2847
        """
 
2848
        pending = set([revision_id])
 
2849
        records = []
 
2850
        while pending:
 
2851
            # get all pending nodes
 
2852
            this_iteration = pending
 
2853
            build_details = self._knit._index.get_build_details(this_iteration)
 
2854
            self._all_build_details.update(build_details)
 
2855
            # new_nodes = self._knit._index._get_entries(this_iteration)
 
2856
            pending = set()
 
2857
            for rev_id, details in build_details.iteritems():
 
2858
                method, index_memo, compression_parent, parents = details
 
2859
                self._revision_id_graph[rev_id] = parents
 
2860
                records.append((rev_id, index_memo))
 
2861
                pending.update(p for p in parents
 
2862
                                 if p not in self._all_build_details)
 
2863
 
 
2864
            missing_versions = this_iteration.difference(build_details.keys())
 
2865
            for missing_version in missing_versions:
 
2866
                # add a key, no parents
 
2867
                self._revision_id_graph[missing_versions] = ()
 
2868
                pending.discard(missing_version) # don't look for it
 
2869
        # Generally we will want to read the records in reverse order, because
 
2870
        # we find the parent nodes after the children
 
2871
        records.reverse()
 
2872
        return records
 
2873
 
 
2874
    def _annotate_records(self, records):
 
2875
        """Build the annotations for the listed records."""
 
2876
        # We iterate in the order read, rather than a strict order requested
 
2877
        # However, process what we can, and put off to the side things that still
 
2878
        # need parents, cleaning them up when those parents are processed.
 
2879
        for (rev_id, raw_content,
 
2880
             digest) in self._knit._data.read_records_iter(records):
 
2881
            if rev_id in self._annotated_lines:
 
2882
                continue
 
2883
            parent_ids = self._revision_id_graph[rev_id]
 
2884
            details = self._all_build_details[rev_id]
 
2885
            method, index_memo, compression_parent, parent_ids = details
 
2886
            # XXX: We don't want to be going back to the index here, make it
 
2887
            #      part of details
 
2888
            noeol = 'no-eol' in self._knit._index.get_options(rev_id)
 
2889
            nodes_to_annotate = []
 
2890
            # TODO: Remove the punning between compression parents, and
 
2891
            #       parent_ids, we should be able to do this without assuming
 
2892
            #       the build order
 
2893
            if len(parent_ids) == 0:
 
2894
                # There are no parents for this node, so just add it
 
2895
                # TODO: This probably needs to be decoupled
 
2896
                assert compression_parent is None and method == 'fulltext'
 
2897
                fulltext_content = self._knit.factory.parse_fulltext(
 
2898
                    raw_content, rev_id)
 
2899
                fulltext = self._add_fulltext_content(rev_id, fulltext_content,
 
2900
                                                      noeol)
 
2901
                nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
 
2902
                    parent_ids, left_matching_blocks=None))
 
2903
            else:
 
2904
                child = (rev_id, parent_ids, raw_content)
 
2905
                # Check if all the parents are present
 
2906
                self._check_parents(child, nodes_to_annotate)
 
2907
            while nodes_to_annotate:
 
2908
                # Should we use a queue here instead of a stack?
 
2909
                (rev_id, parent_ids, raw_content) = nodes_to_annotate.pop()
 
2910
                (method, index_memo, compression_parent,
 
2911
                 parent_ids) = self._all_build_details[rev_id]
 
2912
                # XXX
 
2913
                noeol = 'no-eol' in self._knit._index.get_options(rev_id)
 
2914
                if method == 'line-delta':
 
2915
                    parent_fulltext_content = self._fulltext_contents[compression_parent]
 
2916
                    delta = self._knit.factory.parse_line_delta(raw_content,
 
2917
                                                                rev_id)
 
2918
                    # TODO: only copy when the parent is still needed elsewhere
 
2919
                    fulltext_content = parent_fulltext_content.copy()
 
2920
                    fulltext_content.apply_delta(delta, rev_id)
 
2921
                    fulltext = self._add_fulltext_content(rev_id,
 
2922
                        fulltext_content, noeol)
 
2923
                    parent_fulltext = self._fulltexts[parent_ids[0]]
 
2924
                    blocks = KnitContent.get_line_delta_blocks(delta,
 
2925
                            parent_fulltext, fulltext)
 
2926
                else:
 
2927
                    assert method == 'fulltext'
 
2928
                    fulltext_content = self._knit.factory.parse_fulltext(
 
2929
                        raw_content, rev_id)
 
2930
                    fulltext = self._add_fulltext_content(rev_id,
 
2931
                        fulltext_content, noeol)
 
2932
                    blocks = None
 
2933
                nodes_to_annotate.extend(
 
2934
                    self._add_annotation(rev_id, fulltext, parent_ids,
 
2935
                                     left_matching_blocks=blocks))
 
2936
 
2765
2937
    def get_annotated_lines(self, revision_id):
2766
2938
        """Return the annotated fulltext at the given revision.
2767
2939
 
2768
2940
        :param revision_id: The revision id for this file
2769
2941
        """
2770
 
        ancestry = self._knit.get_ancestry(revision_id)
2771
 
        fulltext = dict(zip(ancestry, self._knit.get_line_list(ancestry)))
2772
 
        annotations = {}
2773
 
        for candidate in ancestry:
2774
 
            if candidate in annotations:
2775
 
                continue
2776
 
            parents = self._knit.get_parents(candidate)
2777
 
            if len(parents) == 0:
2778
 
                blocks = None
2779
 
            elif self._knit._index.get_method(candidate) != 'line-delta':
2780
 
                blocks = None
2781
 
            else:
2782
 
                parent, sha1, noeol, delta = self._knit.get_delta(candidate)
2783
 
                blocks = KnitContent.get_line_delta_blocks(delta,
2784
 
                    fulltext[parents[0]], fulltext[candidate])
2785
 
            annotations[candidate] = list(annotate.reannotate([annotations[p]
2786
 
                for p in parents], fulltext[candidate], candidate, blocks))
2787
 
        return annotations[revision_id]
 
2942
        records = self._get_build_graph(revision_id)
 
2943
        self._annotate_records(records)
 
2944
        return self._annotated_lines[revision_id]
2788
2945
 
2789
2946
 
2790
2947
try: