/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

  • Committer: Andrew Bennetts
  • Date: 2008-03-27 06:10:18 UTC
  • mfrom: (3309 +trunk)
  • mto: This revision was merged to the branch mainline in revision 3320.
  • Revision ID: andrew.bennetts@canonical.com-20080327061018-dxztpxyv6yoeg3am
Merge from bzr.dev.

Show diffs side-by-side

added added

removed removed

Lines of Context:
74
74
lazy_import(globals(), """
75
75
from bzrlib import (
76
76
    annotate,
 
77
    graph as _mod_graph,
77
78
    lru_cache,
78
79
    pack,
79
80
    trace,
134
135
class KnitContent(object):
135
136
    """Content of a knit version to which deltas can be applied."""
136
137
 
 
138
    def __init__(self):
 
139
        self._should_strip_eol = False
 
140
 
137
141
    def annotate(self):
138
142
        """Return a list of (origin, text) tuples."""
139
143
        return list(self.annotate_iter())
142
146
        """Apply delta to this object to become new_version_id."""
143
147
        raise NotImplementedError(self.apply_delta)
144
148
 
 
149
    def cleanup_eol(self, copy_on_mutate=True):
 
150
        if self._should_strip_eol:
 
151
            if copy_on_mutate:
 
152
                self._lines = self._lines[:]
 
153
            self.strip_last_line_newline()
 
154
 
145
155
    def line_delta_iter(self, new_lines):
146
156
        """Generate line-based delta from this content to new_lines."""
147
157
        new_texts = new_lines.text()
187
197
    """Annotated content."""
188
198
 
189
199
    def __init__(self, lines):
 
200
        KnitContent.__init__(self)
190
201
        self._lines = lines
191
202
 
192
203
    def annotate_iter(self):
204
215
    def strip_last_line_newline(self):
205
216
        line = self._lines[-1][1].rstrip('\n')
206
217
        self._lines[-1] = (self._lines[-1][0], line)
 
218
        self._should_strip_eol = False
207
219
 
208
220
    def text(self):
209
221
        try:
210
 
            return [text for origin, text in self._lines]
 
222
            lines = [text for origin, text in self._lines]
211
223
        except ValueError, e:
212
224
            # most commonly (only?) caused by the internal form of the knit
213
225
            # missing annotation information because of a bug - see thread
216
228
                "line in annotated knit missing annotation information: %s"
217
229
                % (e,))
218
230
 
 
231
        if self._should_strip_eol:
 
232
            anno, line = lines[-1]
 
233
            lines[-1] = (anno, line.rstrip('\n'))
 
234
        return lines
 
235
 
219
236
    def copy(self):
220
237
        return AnnotatedKnitContent(self._lines[:])
221
238
 
229
246
    """
230
247
 
231
248
    def __init__(self, lines, version_id):
 
249
        KnitContent.__init__(self)
232
250
        self._lines = lines
233
251
        self._version_id = version_id
234
252
 
251
269
 
252
270
    def strip_last_line_newline(self):
253
271
        self._lines[-1] = self._lines[-1].rstrip('\n')
 
272
        self._should_strip_eol = False
254
273
 
255
274
    def text(self):
256
 
        return self._lines
257
 
 
258
 
 
259
 
class KnitAnnotateFactory(object):
 
275
        lines = self._lines
 
276
        if self._should_strip_eol:
 
277
            lines = lines[:]
 
278
            lines[-1] = lines[-1].rstrip('\n')
 
279
        return lines
 
280
 
 
281
 
 
282
class _KnitFactory(object):
 
283
    """Base class for common Factory functions."""
 
284
 
 
285
    def parse_record(self, version_id, record, record_details,
 
286
                     base_content, copy_base_content=True):
 
287
        """Parse a record into a full content object.
 
288
 
 
289
        :param version_id: The official version id for this content
 
290
        :param record: The data returned by read_records_iter()
 
291
        :param record_details: Details about the record returned by
 
292
            get_build_details
 
293
        :param base_content: If get_build_details returns a compression_parent,
 
294
            you must return a base_content here, else use None
 
295
        :param copy_base_content: When building from the base_content, decide
 
296
            you can either copy it and return a new object, or modify it in
 
297
            place.
 
298
        :return: (content, delta) A Content object and possibly a line-delta,
 
299
            delta may be None
 
300
        """
 
301
        method, noeol = record_details
 
302
        if method == 'line-delta':
 
303
            assert base_content is not None
 
304
            if copy_base_content:
 
305
                content = base_content.copy()
 
306
            else:
 
307
                content = base_content
 
308
            delta = self.parse_line_delta(record, version_id)
 
309
            content.apply_delta(delta, version_id)
 
310
        else:
 
311
            content = self.parse_fulltext(record, version_id)
 
312
            delta = None
 
313
        content._should_strip_eol = noeol
 
314
        return (content, delta)
 
315
 
 
316
 
 
317
class KnitAnnotateFactory(_KnitFactory):
260
318
    """Factory for creating annotated Content objects."""
261
319
 
262
320
    annotated = True
368
426
        return content.annotate_iter()
369
427
 
370
428
 
371
 
class KnitPlainFactory(object):
 
429
class KnitPlainFactory(_KnitFactory):
372
430
    """Factory for creating plain Content objects."""
373
431
 
374
432
    annotated = False
426
484
        return out
427
485
 
428
486
    def annotate_iter(self, knit, version_id):
429
 
        return annotate_knit(knit, version_id)
 
487
        annotator = _KnitAnnotator(knit)
 
488
        return iter(annotator.annotate(version_id))
430
489
 
431
490
 
432
491
def make_empty_knit(transport, relpath):
516
575
                fulltext_size = size
517
576
                break
518
577
            delta_size += size
519
 
            delta_parents = self._index.get_parents(parent)
 
578
            delta_parents = self._index.get_parent_map([parent])[parent]
520
579
        else:
521
580
            # We couldn't find a fulltext, so we must create a new one
522
581
            return False
661
720
    def get_delta(self, version_id):
662
721
        """Get a delta for constructing version from some other version."""
663
722
        self.check_not_reserved_id(version_id)
664
 
        parents = self.get_parents(version_id)
 
723
        parents = self.get_parent_map([version_id])[version_id]
665
724
        if len(parents):
666
725
            parent = parents[0]
667
726
        else:
748
807
            if self.has_version(version_id):
749
808
                # First check: the list of parents.
750
809
                my_parents = self.get_parents_with_ghosts(version_id)
751
 
                if my_parents != parents:
 
810
                if tuple(my_parents) != tuple(parents):
752
811
                    # XXX: KnitCorrupt is not quite the right exception here.
753
812
                    raise KnitCorrupt(
754
813
                        self.filename,
807
866
            factory = KnitAnnotateFactory()
808
867
        else:
809
868
            raise errors.KnitDataStreamUnknown(format)
810
 
        index = _StreamIndex(data_list)
 
869
        index = _StreamIndex(data_list, self._index)
811
870
        access = _StreamAccess(reader_callable, index, self, factory)
812
871
        return KnitVersionedFile(self.filename, self.transport,
813
872
            factory=factory, index=index, access_method=access)
874
933
 
875
934
        This data is intended to be used for retrieving the knit records.
876
935
 
877
 
        A dict of version_id to (method, data_pos, data_size, next) is
 
936
        A dict of version_id to (record_details, index_memo, next, parents) is
878
937
        returned.
879
938
        method is the way referenced data should be applied.
880
 
        data_pos is the position of the data in the knit.
881
 
        data_size is the size of the data in the knit.
 
939
        index_memo is the handle to pass to the data access to actually get the
 
940
            data
882
941
        next is the build-parent of the version, or None for fulltexts.
 
942
        parents is the version_ids of the parents of this version
883
943
        """
884
944
        component_data = {}
885
 
        for version_id in version_ids:
886
 
            cursor = version_id
887
 
 
888
 
            while cursor is not None and cursor not in component_data:
889
 
                method = self._index.get_method(cursor)
890
 
                if method == 'fulltext':
891
 
                    next = None
892
 
                else:
893
 
                    next = self.get_parents_with_ghosts(cursor)[0]
894
 
                index_memo = self._index.get_position(cursor)
895
 
                component_data[cursor] = (method, index_memo, next)
896
 
                cursor = next
 
945
        pending_components = version_ids
 
946
        while pending_components:
 
947
            build_details = self._index.get_build_details(pending_components)
 
948
            current_components = set(pending_components)
 
949
            pending_components = set()
 
950
            for version_id, details in build_details.iteritems():
 
951
                (index_memo, compression_parent, parents,
 
952
                 record_details) = details
 
953
                method = record_details[0]
 
954
                if compression_parent is not None:
 
955
                    pending_components.add(compression_parent)
 
956
                component_data[version_id] = (record_details, index_memo,
 
957
                                              compression_parent)
 
958
            missing = current_components.difference(build_details)
 
959
            if missing:
 
960
                raise errors.RevisionNotPresent(missing.pop(), self.filename)
897
961
        return component_data
898
962
       
899
963
    def _get_content(self, version_id, parent_texts={}):
913
977
        self._index.check_versions_present(version_ids)
914
978
 
915
979
    def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
916
 
        nostore_sha, random_id, check_content):
 
980
        nostore_sha, random_id, check_content, left_matching_blocks):
917
981
        """See VersionedFile.add_lines_with_ghosts()."""
918
982
        self._check_add(version_id, lines, random_id, check_content)
919
983
        return self._add(version_id, lines, parents, self.delta,
920
 
            parent_texts, None, nostore_sha, random_id)
 
984
            parent_texts, left_matching_blocks, nostore_sha, random_id)
921
985
 
922
986
    def _add_lines(self, version_id, parents, lines, parent_texts,
923
987
        left_matching_blocks, nostore_sha, random_id, check_content):
1037
1101
    def _get_record_map(self, version_ids):
1038
1102
        """Produce a dictionary of knit records.
1039
1103
        
1040
 
        The keys are version_ids, the values are tuples of (method, content,
1041
 
        digest, next).
1042
 
        method is the way the content should be applied.  
1043
 
        content is a KnitContent object.
1044
 
        digest is the SHA1 digest of this version id after all steps are done
1045
 
        next is the build-parent of the version, i.e. the leftmost ancestor.
1046
 
        If the method is fulltext, next will be None.
 
1104
        :return: {version_id:(record, record_details, digest, next)}
 
1105
            record
 
1106
                data returned from read_records
 
1107
            record_details
 
1108
                opaque information to pass to parse_record
 
1109
            digest
 
1110
                SHA1 digest of the full text after all steps are done
 
1111
            next
 
1112
                build-parent of the version, i.e. the leftmost ancestor.
 
1113
                Will be None if the record is not a delta.
1047
1114
        """
1048
1115
        position_map = self._get_components_positions(version_ids)
1049
 
        # c = component_id, m = method, i_m = index_memo, n = next
1050
 
        records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]
 
1116
        # c = component_id, r = record_details, i_m = index_memo, n = next
 
1117
        records = [(c, i_m) for c, (r, i_m, n)
 
1118
                             in position_map.iteritems()]
1051
1119
        record_map = {}
1052
 
        for component_id, content, digest in \
 
1120
        for component_id, record, digest in \
1053
1121
                self._data.read_records_iter(records):
1054
 
            method, index_memo, next = position_map[component_id]
1055
 
            record_map[component_id] = method, content, digest, next
1056
 
                          
 
1122
            (record_details, index_memo, next) = position_map[component_id]
 
1123
            record_map[component_id] = record, record_details, digest, next
 
1124
 
1057
1125
        return record_map
1058
1126
 
1059
1127
    def get_text(self, version_id):
1094
1162
            components = []
1095
1163
            cursor = version_id
1096
1164
            while cursor is not None:
1097
 
                method, data, digest, next = record_map[cursor]
1098
 
                components.append((cursor, method, data, digest))
 
1165
                record, record_details, digest, next = record_map[cursor]
 
1166
                components.append((cursor, record, record_details, digest))
1099
1167
                if cursor in content_map:
1100
1168
                    break
1101
1169
                cursor = next
1102
1170
 
1103
1171
            content = None
1104
 
            for component_id, method, data, digest in reversed(components):
 
1172
            for (component_id, record, record_details,
 
1173
                 digest) in reversed(components):
1105
1174
                if component_id in content_map:
1106
1175
                    content = content_map[component_id]
1107
1176
                else:
1108
 
                    if method == 'fulltext':
1109
 
                        assert content is None
1110
 
                        content = self.factory.parse_fulltext(data, version_id)
1111
 
                    elif method == 'line-delta':
1112
 
                        delta = self.factory.parse_line_delta(data, version_id)
1113
 
                        if multiple_versions:
1114
 
                            # only doing this when we want multiple versions
1115
 
                            # output avoids list copies - which reference and
1116
 
                            # dereference many strings.
1117
 
                            content = content.copy()
1118
 
                        content.apply_delta(delta, version_id)
 
1177
                    content, delta = self.factory.parse_record(version_id,
 
1178
                        record, record_details, content,
 
1179
                        copy_base_content=multiple_versions)
1119
1180
                    if multiple_versions:
1120
1181
                        content_map[component_id] = content
1121
1182
 
1122
 
            if 'no-eol' in self._index.get_options(version_id):
1123
 
                if multiple_versions:
1124
 
                    content = content.copy()
1125
 
                content.strip_last_line_newline()
 
1183
            content.cleanup_eol(copy_on_mutate=multiple_versions)
1126
1184
            final_content[version_id] = content
1127
1185
 
1128
1186
            # digest here is the digest from the last applied component.
1200
1258
        """See VersionedFile.annotate_iter."""
1201
1259
        return self.factory.annotate_iter(self, version_id)
1202
1260
 
1203
 
    def get_parents(self, version_id):
1204
 
        """See VersionedFile.get_parents."""
1205
 
        # perf notes:
1206
 
        # optimism counts!
1207
 
        # 52554 calls in 1264 872 internal down from 3674
1208
 
        try:
1209
 
            return self._index.get_parents(version_id)
1210
 
        except KeyError:
1211
 
            raise RevisionNotPresent(version_id, self.filename)
1212
 
 
1213
 
    def get_parents_with_ghosts(self, version_id):
1214
 
        """See VersionedFile.get_parents."""
1215
 
        try:
1216
 
            return self._index.get_parents_with_ghosts(version_id)
1217
 
        except KeyError:
1218
 
            raise RevisionNotPresent(version_id, self.filename)
 
1261
    def get_parent_map(self, version_ids):
 
1262
        """See VersionedFile.get_parent_map."""
 
1263
        return self._index.get_parent_map(version_ids)
1219
1264
 
1220
1265
    def get_ancestry(self, versions, topo_sorted=True):
1221
1266
        """See VersionedFile.get_ancestry."""
1426
1471
                graph[version] = parents
1427
1472
        return topo_sort(graph.items())
1428
1473
 
 
1474
    def get_build_details(self, version_ids):
 
1475
        """Get the method, index_memo and compression parent for version_ids.
 
1476
 
 
1477
        Ghosts are omitted from the result.
 
1478
 
 
1479
        :param version_ids: An iterable of version_ids.
 
1480
        :return: A dict of version_id:(index_memo, compression_parent,
 
1481
                                       parents, record_details).
 
1482
            index_memo
 
1483
                opaque structure to pass to read_records to extract the raw
 
1484
                data
 
1485
            compression_parent
 
1486
                Content that this record is built upon, may be None
 
1487
            parents
 
1488
                Logical parents of this node
 
1489
            record_details
 
1490
                extra information about the content which needs to be passed to
 
1491
                Factory.parse_record
 
1492
        """
 
1493
        result = {}
 
1494
        for version_id in version_ids:
 
1495
            if version_id not in self._cache:
 
1496
                # ghosts are omitted
 
1497
                continue
 
1498
            method = self.get_method(version_id)
 
1499
            parents = self.get_parents_with_ghosts(version_id)
 
1500
            if method == 'fulltext':
 
1501
                compression_parent = None
 
1502
            else:
 
1503
                compression_parent = parents[0]
 
1504
            noeol = 'no-eol' in self.get_options(version_id)
 
1505
            index_memo = self.get_position(version_id)
 
1506
            result[version_id] = (index_memo, compression_parent,
 
1507
                                  parents, (method, noeol))
 
1508
        return result
 
1509
 
1429
1510
    def iter_parents(self, version_ids):
1430
1511
        """Iterate through the parents for many version ids.
1431
1512
 
1435
1516
            The order is undefined, allowing for different optimisations in
1436
1517
            the underlying implementation.
1437
1518
        """
1438
 
        for version_id in version_ids:
1439
 
            try:
1440
 
                yield version_id, tuple(self.get_parents(version_id))
1441
 
            except KeyError:
1442
 
                pass
 
1519
        parent_map = self.get_parent_map(version_ids)
 
1520
        parent_map_set = set(parent_map)
 
1521
        unknown_existence = set()
 
1522
        for parents in parent_map.itervalues():
 
1523
            unknown_existence.update(parents)
 
1524
        unknown_existence.difference_update(parent_map_set)
 
1525
        present_parents = set(self.get_parent_map(unknown_existence))
 
1526
        present_parents.update(parent_map_set)
 
1527
        for version_id, parents in parent_map.iteritems():
 
1528
            parents = tuple(parent for parent in parents
 
1529
                if parent in present_parents)
 
1530
            yield version_id, parents
1443
1531
 
1444
1532
    def num_versions(self):
1445
1533
        return len(self._history)
1488
1576
                assert isinstance(line, str), \
1489
1577
                    'content must be utf-8 encoded: %r' % (line,)
1490
1578
                lines.append(line)
1491
 
                self._cache_version(version_id, options, pos, size, parents)
 
1579
                self._cache_version(version_id, options, pos, size, tuple(parents))
1492
1580
            if not self._need_to_create:
1493
1581
                self._transport.append_bytes(self._filename, ''.join(lines))
1494
1582
            else:
1543
1631
        """
1544
1632
        return self._cache[version_id][1]
1545
1633
 
1546
 
    def get_parents(self, version_id):
1547
 
        """Return parents of specified version ignoring ghosts."""
1548
 
        return [parent for parent in self._cache[version_id][4] 
1549
 
                if parent in self._cache]
 
1634
    def get_parent_map(self, version_ids):
 
1635
        """Passed through to by KnitVersionedFile.get_parent_map."""
 
1636
        result = {}
 
1637
        for version_id in version_ids:
 
1638
            try:
 
1639
                result[version_id] = tuple(self._cache[version_id][4])
 
1640
            except KeyError:
 
1641
                pass
 
1642
        return result
1550
1643
 
1551
1644
    def get_parents_with_ghosts(self, version_id):
1552
1645
        """Return parents of specified version with ghosts."""
1553
 
        return self._cache[version_id][4] 
 
1646
        try:
 
1647
            return self.get_parent_map([version_id])[version_id]
 
1648
        except KeyError:
 
1649
            raise RevisionNotPresent(version_id, self)
1554
1650
 
1555
1651
    def check_versions_present(self, version_ids):
1556
1652
        """Check that all specified versions are present."""
1689
1785
        result_keys = topo_sort(graph.items())
1690
1786
        return [key[0] for key in result_keys]
1691
1787
 
 
1788
    def get_build_details(self, version_ids):
 
1789
        """Get the method, index_memo and compression parent for version_ids.
 
1790
 
 
1791
        Ghosts are omitted from the result.
 
1792
 
 
1793
        :param version_ids: An iterable of version_ids.
 
1794
        :return: A dict of version_id:(index_memo, compression_parent,
 
1795
                                       parents, record_details).
 
1796
            index_memo
 
1797
                opaque structure to pass to read_records to extract the raw
 
1798
                data
 
1799
            compression_parent
 
1800
                Content that this record is built upon, may be None
 
1801
            parents
 
1802
                Logical parents of this node
 
1803
            record_details
 
1804
                extra information about the content which needs to be passed to
 
1805
                Factory.parse_record
 
1806
        """
 
1807
        result = {}
 
1808
        entries = self._get_entries(self._version_ids_to_keys(version_ids), True)
 
1809
        for entry in entries:
 
1810
            version_id = self._keys_to_version_ids((entry[1],))[0]
 
1811
            if not self._parents:
 
1812
                parents = ()
 
1813
            else:
 
1814
                parents = self._keys_to_version_ids(entry[3][0])
 
1815
            if not self._deltas:
 
1816
                compression_parent = None
 
1817
            else:
 
1818
                compression_parent_key = self._compression_parent(entry)
 
1819
                if compression_parent_key:
 
1820
                    compression_parent = self._keys_to_version_ids(
 
1821
                    (compression_parent_key,))[0]
 
1822
                else:
 
1823
                    compression_parent = None
 
1824
            noeol = (entry[2][0] == 'N')
 
1825
            if compression_parent:
 
1826
                method = 'line-delta'
 
1827
            else:
 
1828
                method = 'fulltext'
 
1829
            result[version_id] = (self._node_to_position(entry),
 
1830
                                  compression_parent, parents,
 
1831
                                  (method, noeol))
 
1832
        return result
 
1833
 
 
1834
    def _compression_parent(self, an_entry):
 
1835
        # return the key that an_entry is compressed against, or None
 
1836
        # Grab the second parent list (as deltas implies parents currently)
 
1837
        compression_parents = an_entry[3][1]
 
1838
        if not compression_parents:
 
1839
            return None
 
1840
        assert len(compression_parents) == 1
 
1841
        return compression_parents[0]
 
1842
 
 
1843
    def _get_method(self, node):
 
1844
        if not self._deltas:
 
1845
            return 'fulltext'
 
1846
        if self._compression_parent(node):
 
1847
            return 'line-delta'
 
1848
        else:
 
1849
            return 'fulltext'
 
1850
 
1692
1851
    def get_graph(self):
1693
1852
        """Return a list of the node:parents lists from this knit index."""
1694
1853
        if not self._parents:
1750
1909
            logic to get the record.
1751
1910
        """
1752
1911
        node = self._get_node(version_id)
 
1912
        return self._node_to_position(node)
 
1913
 
 
1914
    def _node_to_position(self, node):
 
1915
        """Convert an index value to position details."""
1753
1916
        bits = node[2][1:].split(' ')
1754
1917
        return node[0], int(bits[0]), int(bits[1])
1755
1918
 
1756
1919
    def get_method(self, version_id):
1757
1920
        """Return compression method of specified version."""
1758
 
        if not self._deltas:
1759
 
            return 'fulltext'
1760
 
        return self._parent_compression(self._get_node(version_id)[3][1])
1761
 
 
1762
 
    def _parent_compression(self, reference_list):
1763
 
        # use the second reference list to decide if this is delta'd or not.
1764
 
        if len(reference_list):
1765
 
            return 'line-delta'
1766
 
        else:
1767
 
            return 'fulltext'
 
1921
        return self._get_method(self._get_node(version_id))
1768
1922
 
1769
1923
    def _get_node(self, version_id):
1770
1924
        try:
1778
1932
        e.g. ['foo', 'bar']
1779
1933
        """
1780
1934
        node = self._get_node(version_id)
1781
 
        if not self._deltas:
1782
 
            options = ['fulltext']
1783
 
        else:
1784
 
            options = [self._parent_compression(node[3][1])]
 
1935
        options = [self._get_method(node)]
1785
1936
        if node[2][0] == 'N':
1786
1937
            options.append('no-eol')
1787
1938
        return options
1788
1939
 
1789
 
    def get_parents(self, version_id):
1790
 
        """Return parents of specified version ignoring ghosts."""
1791
 
        parents = list(self.iter_parents([version_id]))
1792
 
        if not parents:
1793
 
            # missing key
1794
 
            raise errors.RevisionNotPresent(version_id, self)
1795
 
        return parents[0][1]
 
1940
    def get_parent_map(self, version_ids):
 
1941
        """Passed through to by KnitVersionedFile.get_parent_map."""
 
1942
        nodes = self._get_entries(self._version_ids_to_keys(version_ids))
 
1943
        result = {}
 
1944
        if self._parents:
 
1945
            for node in nodes:
 
1946
                result[node[1][0]] = self._keys_to_version_ids(node[3][0])
 
1947
        else:
 
1948
            for node in nodes:
 
1949
                result[node[1][0]] = ()
 
1950
        return result
1796
1951
 
1797
1952
    def get_parents_with_ghosts(self, version_id):
1798
1953
        """Return parents of specified version with ghosts."""
1799
 
        nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),
1800
 
            check_present=True))
1801
 
        if not self._parents:
1802
 
            return ()
1803
 
        return self._keys_to_version_ids(nodes[0][3][0])
 
1954
        try:
 
1955
            return self.get_parent_map([version_id])[version_id]
 
1956
        except KeyError:
 
1957
            raise RevisionNotPresent(version_id, self)
1804
1958
 
1805
1959
    def check_versions_present(self, version_ids):
1806
1960
        """Check that all specified versions are present."""
2124
2278
class _StreamIndex(object):
2125
2279
    """A Knit Index object that uses the data map from a datastream."""
2126
2280
 
2127
 
    def __init__(self, data_list):
 
2281
    def __init__(self, data_list, backing_index):
2128
2282
        """Create a _StreamIndex object.
2129
2283
 
2130
2284
        :param data_list: The data_list from the datastream.
 
2285
        :param backing_index: The index which will supply values for nodes
 
2286
            referenced outside of this stream.
2131
2287
        """
2132
2288
        self.data_list = data_list
 
2289
        self.backing_index = backing_index
2133
2290
        self._by_version = {}
2134
2291
        pos = 0
2135
2292
        for key, options, length, parents in data_list:
2159
2316
            ancestry.add(version)
2160
2317
        return list(ancestry)
2161
2318
 
 
2319
    def get_build_details(self, version_ids):
 
2320
        """Get the method, index_memo and compression parent for version_ids.
 
2321
 
 
2322
        Ghosts are omitted from the result.
 
2323
 
 
2324
        :param version_ids: An iterable of version_ids.
 
2325
        :return: A dict of version_id:(index_memo, compression_parent,
 
2326
                                       parents, record_details).
 
2327
            index_memo
 
2328
                opaque structure to pass to read_records to extract the raw
 
2329
                data
 
2330
            compression_parent
 
2331
                Content that this record is built upon, may be None
 
2332
            parents
 
2333
                Logical parents of this node
 
2334
            record_details
 
2335
                extra information about the content which needs to be passed to
 
2336
                Factory.parse_record
 
2337
        """
 
2338
        result = {}
 
2339
        for version_id in version_ids:
 
2340
            try:
 
2341
                method = self.get_method(version_id)
 
2342
            except errors.RevisionNotPresent:
 
2343
                # ghosts are omitted
 
2344
                continue
 
2345
            parent_ids = self.get_parents_with_ghosts(version_id)
 
2346
            noeol = ('no-eol' in self.get_options(version_id))
 
2347
            if method == 'fulltext':
 
2348
                compression_parent = None
 
2349
            else:
 
2350
                compression_parent = parent_ids[0]
 
2351
            index_memo = self.get_position(version_id)
 
2352
            result[version_id] = (index_memo, compression_parent,
 
2353
                                  parent_ids, (method, noeol))
 
2354
        return result
 
2355
 
2162
2356
    def get_method(self, version_id):
2163
2357
        """Return compression method of specified version."""
2164
2358
        try:
2166
2360
        except KeyError:
2167
2361
            # Strictly speaking this should check in the backing knit, but
2168
2362
            # until we have a test to discriminate, this will do.
2169
 
            return 'fulltext'
 
2363
            return self.backing_index.get_method(version_id)
2170
2364
        if 'fulltext' in options:
2171
2365
            return 'fulltext'
2172
2366
        elif 'line-delta' in options:
2179
2373
 
2180
2374
        e.g. ['foo', 'bar']
2181
2375
        """
2182
 
        return self._by_version[version_id][0]
 
2376
        try:
 
2377
            return self._by_version[version_id][0]
 
2378
        except KeyError:
 
2379
            return self.backing_index.get_options(version_id)
 
2380
 
 
2381
    def get_parent_map(self, version_ids):
 
2382
        """Passed through to by KnitVersionedFile.get_parent_map."""
 
2383
        result = {}
 
2384
        pending_ids = set()
 
2385
        for version_id in version_ids:
 
2386
            try:
 
2387
                result[version_id] = self._by_version[version_id][2]
 
2388
            except KeyError:
 
2389
                pending_ids.add(version_id)
 
2390
        result.update(self.backing_index.get_parent_map(pending_ids))
 
2391
        return result
2183
2392
 
2184
2393
    def get_parents_with_ghosts(self, version_id):
2185
2394
        """Return parents of specified version with ghosts."""
2186
 
        return self._by_version[version_id][2]
 
2395
        try:
 
2396
            return self.get_parent_map([version_id])[version_id]
 
2397
        except KeyError:
 
2398
            raise RevisionNotPresent(version_id, self)
2187
2399
 
2188
2400
    def get_position(self, version_id):
2189
2401
        """Return details needed to access the version.
2639
2851
            # do the join:
2640
2852
            count = 0
2641
2853
            total = len(version_list)
 
2854
            parent_map = self.source.get_parent_map(version_list)
2642
2855
            for version_id in version_list:
2643
2856
                pb.update("Converting to knit", count, total)
2644
 
                parents = self.source.get_parents(version_id)
 
2857
                parents = parent_map[version_id]
2645
2858
                # check that its will be a consistent copy:
2646
2859
                for parent in parents:
2647
2860
                    # if source has the parent, we must already have it
2668
2881
    It will work for knits with cached annotations, but this is not
2669
2882
    recommended.
2670
2883
    """
2671
 
    ancestry = knit.get_ancestry(revision_id)
2672
 
    fulltext = dict(zip(ancestry, knit.get_line_list(ancestry)))
2673
 
    annotations = {}
2674
 
    for candidate in ancestry:
2675
 
        if candidate in annotations:
2676
 
            continue
2677
 
        parents = knit.get_parents(candidate)
2678
 
        if len(parents) == 0:
2679
 
            blocks = None
2680
 
        elif knit._index.get_method(candidate) != 'line-delta':
2681
 
            blocks = None
 
2884
    annotator = _KnitAnnotator(knit)
 
2885
    return iter(annotator.annotate(revision_id))
 
2886
 
 
2887
 
 
2888
class _KnitAnnotator(object):
 
2889
    """Build up the annotations for a text."""
 
2890
 
 
2891
    def __init__(self, knit):
 
2892
        self._knit = knit
 
2893
 
 
2894
        # Content objects, differs from fulltexts because of how final newlines
 
2895
        # are treated by knits. the content objects here will always have a
 
2896
        # final newline
 
2897
        self._fulltext_contents = {}
 
2898
 
 
2899
        # Annotated lines of specific revisions
 
2900
        self._annotated_lines = {}
 
2901
 
 
2902
        # Track the raw data for nodes that we could not process yet.
 
2903
        # This maps the revision_id of the base to a list of children that will
 
2904
        # annotated from it.
 
2905
        self._pending_children = {}
 
2906
 
 
2907
        # Nodes which cannot be extracted
 
2908
        self._ghosts = set()
 
2909
 
 
2910
        # Track how many children this node has, so we know if we need to keep
 
2911
        # it
 
2912
        self._annotate_children = {}
 
2913
        self._compression_children = {}
 
2914
 
 
2915
        self._all_build_details = {}
 
2916
        # The children => parent revision_id graph
 
2917
        self._revision_id_graph = {}
 
2918
 
 
2919
        self._heads_provider = None
 
2920
 
 
2921
        self._nodes_to_keep_annotations = set()
 
2922
        self._generations_until_keep = 100
 
2923
 
 
2924
    def set_generations_until_keep(self, value):
 
2925
        """Set the number of generations before caching a node.
 
2926
 
 
2927
        Setting this to -1 will cache every merge node, setting this higher
 
2928
        will cache fewer nodes.
 
2929
        """
 
2930
        self._generations_until_keep = value
 
2931
 
 
2932
    def _add_fulltext_content(self, revision_id, content_obj):
 
2933
        self._fulltext_contents[revision_id] = content_obj
 
2934
        # TODO: jam 20080305 It might be good to check the sha1digest here
 
2935
        return content_obj.text()
 
2936
 
 
2937
    def _check_parents(self, child, nodes_to_annotate):
 
2938
        """Check if all parents have been processed.
 
2939
 
 
2940
        :param child: A tuple of (rev_id, parents, raw_content)
 
2941
        :param nodes_to_annotate: If child is ready, add it to
 
2942
            nodes_to_annotate, otherwise put it back in self._pending_children
 
2943
        """
 
2944
        for parent_id in child[1]:
 
2945
            if (parent_id not in self._annotated_lines):
 
2946
                # This parent is present, but another parent is missing
 
2947
                self._pending_children.setdefault(parent_id,
 
2948
                                                  []).append(child)
 
2949
                break
2682
2950
        else:
2683
 
            parent, sha1, noeol, delta = knit.get_delta(candidate)
2684
 
            blocks = KnitContent.get_line_delta_blocks(delta,
2685
 
                fulltext[parents[0]], fulltext[candidate])
2686
 
        annotations[candidate] = list(annotate.reannotate([annotations[p]
2687
 
            for p in parents], fulltext[candidate], candidate, blocks))
2688
 
    return iter(annotations[revision_id])
 
2951
            # This one is ready to be processed
 
2952
            nodes_to_annotate.append(child)
 
2953
 
 
2954
    def _add_annotation(self, revision_id, fulltext, parent_ids,
 
2955
                        left_matching_blocks=None):
 
2956
        """Add an annotation entry.
 
2957
 
 
2958
        All parents should already have been annotated.
 
2959
        :return: A list of children that now have their parents satisfied.
 
2960
        """
 
2961
        a = self._annotated_lines
 
2962
        annotated_parent_lines = [a[p] for p in parent_ids]
 
2963
        annotated_lines = list(annotate.reannotate(annotated_parent_lines,
 
2964
            fulltext, revision_id, left_matching_blocks,
 
2965
            heads_provider=self._get_heads_provider()))
 
2966
        self._annotated_lines[revision_id] = annotated_lines
 
2967
        for p in parent_ids:
 
2968
            ann_children = self._annotate_children[p]
 
2969
            ann_children.remove(revision_id)
 
2970
            if (not ann_children
 
2971
                and p not in self._nodes_to_keep_annotations):
 
2972
                del self._annotated_lines[p]
 
2973
                del self._all_build_details[p]
 
2974
                if p in self._fulltext_contents:
 
2975
                    del self._fulltext_contents[p]
 
2976
        # Now that we've added this one, see if there are any pending
 
2977
        # deltas to be done, certainly this parent is finished
 
2978
        nodes_to_annotate = []
 
2979
        for child in self._pending_children.pop(revision_id, []):
 
2980
            self._check_parents(child, nodes_to_annotate)
 
2981
        return nodes_to_annotate
 
2982
 
 
2983
    def _get_build_graph(self, revision_id):
 
2984
        """Get the graphs for building texts and annotations.
 
2985
 
 
2986
        The data you need for creating a full text may be different than the
 
2987
        data you need to annotate that text. (At a minimum, you need both
 
2988
        parents to create an annotation, but only need 1 parent to generate the
 
2989
        fulltext.)
 
2990
 
 
2991
        :return: A list of (revision_id, index_memo) records, suitable for
 
2992
            passing to read_records_iter to start reading in the raw data fro/
 
2993
            the pack file.
 
2994
        """
 
2995
        if revision_id in self._annotated_lines:
 
2996
            # Nothing to do
 
2997
            return []
 
2998
        pending = set([revision_id])
 
2999
        records = []
 
3000
        generation = 0
 
3001
        kept_generation = 0
 
3002
        while pending:
 
3003
            # get all pending nodes
 
3004
            generation += 1
 
3005
            this_iteration = pending
 
3006
            build_details = self._knit._index.get_build_details(this_iteration)
 
3007
            self._all_build_details.update(build_details)
 
3008
            # new_nodes = self._knit._index._get_entries(this_iteration)
 
3009
            pending = set()
 
3010
            for rev_id, details in build_details.iteritems():
 
3011
                (index_memo, compression_parent, parents,
 
3012
                 record_details) = details
 
3013
                self._revision_id_graph[rev_id] = parents
 
3014
                records.append((rev_id, index_memo))
 
3015
                # Do we actually need to check _annotated_lines?
 
3016
                pending.update(p for p in parents
 
3017
                                 if p not in self._all_build_details)
 
3018
                if compression_parent:
 
3019
                    self._compression_children.setdefault(compression_parent,
 
3020
                        []).append(rev_id)
 
3021
                if parents:
 
3022
                    for parent in parents:
 
3023
                        self._annotate_children.setdefault(parent,
 
3024
                            []).append(rev_id)
 
3025
                    num_gens = generation - kept_generation
 
3026
                    if ((num_gens >= self._generations_until_keep)
 
3027
                        and len(parents) > 1):
 
3028
                        kept_generation = generation
 
3029
                        self._nodes_to_keep_annotations.add(rev_id)
 
3030
 
 
3031
            missing_versions = this_iteration.difference(build_details.keys())
 
3032
            self._ghosts.update(missing_versions)
 
3033
            for missing_version in missing_versions:
 
3034
                # add a key, no parents
 
3035
                self._revision_id_graph[missing_version] = ()
 
3036
                pending.discard(missing_version) # don't look for it
 
3037
        # XXX: This should probably be a real exception, as it is a data
 
3038
        #      inconsistency
 
3039
        assert not self._ghosts.intersection(self._compression_children), \
 
3040
            "We cannot have nodes which have a compression parent of a ghost."
 
3041
        # Cleanout anything that depends on a ghost so that we don't wait for
 
3042
        # the ghost to show up
 
3043
        for node in self._ghosts:
 
3044
            if node in self._annotate_children:
 
3045
                # We won't be building this node
 
3046
                del self._annotate_children[node]
 
3047
        # Generally we will want to read the records in reverse order, because
 
3048
        # we find the parent nodes after the children
 
3049
        records.reverse()
 
3050
        return records
 
3051
 
 
3052
    def _annotate_records(self, records):
 
3053
        """Build the annotations for the listed records."""
 
3054
        # We iterate in the order read, rather than a strict order requested
 
3055
        # However, process what we can, and put off to the side things that
 
3056
        # still need parents, cleaning them up when those parents are
 
3057
        # processed.
 
3058
        for (rev_id, record,
 
3059
             digest) in self._knit._data.read_records_iter(records):
 
3060
            if rev_id in self._annotated_lines:
 
3061
                continue
 
3062
            parent_ids = self._revision_id_graph[rev_id]
 
3063
            parent_ids = [p for p in parent_ids if p not in self._ghosts]
 
3064
            details = self._all_build_details[rev_id]
 
3065
            (index_memo, compression_parent, parents,
 
3066
             record_details) = details
 
3067
            nodes_to_annotate = []
 
3068
            # TODO: Remove the punning between compression parents, and
 
3069
            #       parent_ids, we should be able to do this without assuming
 
3070
            #       the build order
 
3071
            if len(parent_ids) == 0:
 
3072
                # There are no parents for this node, so just add it
 
3073
                # TODO: This probably needs to be decoupled
 
3074
                assert compression_parent is None
 
3075
                fulltext_content, delta = self._knit.factory.parse_record(
 
3076
                    rev_id, record, record_details, None)
 
3077
                fulltext = self._add_fulltext_content(rev_id, fulltext_content)
 
3078
                nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
 
3079
                    parent_ids, left_matching_blocks=None))
 
3080
            else:
 
3081
                child = (rev_id, parent_ids, record)
 
3082
                # Check if all the parents are present
 
3083
                self._check_parents(child, nodes_to_annotate)
 
3084
            while nodes_to_annotate:
 
3085
                # Should we use a queue here instead of a stack?
 
3086
                (rev_id, parent_ids, record) = nodes_to_annotate.pop()
 
3087
                (index_memo, compression_parent, parents,
 
3088
                 record_details) = self._all_build_details[rev_id]
 
3089
                if compression_parent is not None:
 
3090
                    comp_children = self._compression_children[compression_parent]
 
3091
                    assert rev_id in comp_children
 
3092
                    # If there is only 1 child, it is safe to reuse this
 
3093
                    # content
 
3094
                    reuse_content = (len(comp_children) == 1
 
3095
                        and compression_parent not in
 
3096
                            self._nodes_to_keep_annotations)
 
3097
                    if reuse_content:
 
3098
                        # Remove it from the cache since it will be changing
 
3099
                        parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
 
3100
                        # Make sure to copy the fulltext since it might be
 
3101
                        # modified
 
3102
                        parent_fulltext = list(parent_fulltext_content.text())
 
3103
                    else:
 
3104
                        parent_fulltext_content = self._fulltext_contents[compression_parent]
 
3105
                        parent_fulltext = parent_fulltext_content.text()
 
3106
                    comp_children.remove(rev_id)
 
3107
                    fulltext_content, delta = self._knit.factory.parse_record(
 
3108
                        rev_id, record, record_details,
 
3109
                        parent_fulltext_content,
 
3110
                        copy_base_content=(not reuse_content))
 
3111
                    fulltext = self._add_fulltext_content(rev_id,
 
3112
                                                          fulltext_content)
 
3113
                    blocks = KnitContent.get_line_delta_blocks(delta,
 
3114
                            parent_fulltext, fulltext)
 
3115
                else:
 
3116
                    fulltext_content = self._knit.factory.parse_fulltext(
 
3117
                        record, rev_id)
 
3118
                    fulltext = self._add_fulltext_content(rev_id,
 
3119
                        fulltext_content)
 
3120
                    blocks = None
 
3121
                nodes_to_annotate.extend(
 
3122
                    self._add_annotation(rev_id, fulltext, parent_ids,
 
3123
                                     left_matching_blocks=blocks))
 
3124
 
 
3125
    def _get_heads_provider(self):
 
3126
        """Create a heads provider for resolving ancestry issues."""
 
3127
        if self._heads_provider is not None:
 
3128
            return self._heads_provider
 
3129
        parent_provider = _mod_graph.DictParentsProvider(
 
3130
            self._revision_id_graph)
 
3131
        graph_obj = _mod_graph.Graph(parent_provider)
 
3132
        head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
 
3133
        self._heads_provider = head_cache
 
3134
        return head_cache
 
3135
 
 
3136
    def annotate(self, revision_id):
 
3137
        """Return the annotated fulltext at the given revision.
 
3138
 
 
3139
        :param revision_id: The revision id for this file
 
3140
        """
 
3141
        records = self._get_build_graph(revision_id)
 
3142
        if revision_id in self._ghosts:
 
3143
            raise errors.RevisionNotPresent(revision_id, self._knit)
 
3144
        self._annotate_records(records)
 
3145
        return self._annotated_lines[revision_id]
2689
3146
 
2690
3147
 
2691
3148
try: