/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
1
# groupcompress, a bzr plugin providing new compression logic.
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Core compression logic for compressing streams of related files."""
19
0.17.13 by Robert Collins
Do not output copy instructions which take more to encode than a fresh insert. (But do not refer to those insertions when finding ranges to copy: they are not interesting).
20
from itertools import izip
0.17.5 by Robert Collins
nograph tests completely passing.
21
from cStringIO import StringIO
22
import zlib
23
0.17.4 by Robert Collins
Annotate.
24
from bzrlib import (
25
    annotate,
0.17.5 by Robert Collins
nograph tests completely passing.
26
    debug,
0.17.4 by Robert Collins
Annotate.
27
    diff,
0.17.5 by Robert Collins
nograph tests completely passing.
28
    errors,
0.17.4 by Robert Collins
Annotate.
29
    graph as _mod_graph,
0.20.2 by John Arbash Meinel
Teach groupcompress about 'chunked' encoding
30
    osutils,
0.17.4 by Robert Collins
Annotate.
31
    pack,
32
    patiencediff,
33
    )
34
from bzrlib.graph import Graph
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
35
from bzrlib.knit import _DirectPackAccess
0.17.2 by Robert Collins
Core proof of concept working.
36
from bzrlib.osutils import (
37
    contains_whitespace,
38
    contains_linebreaks,
39
    sha_string,
40
    sha_strings,
41
    split_lines,
42
    )
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
43
from bzrlib.btree_index import BTreeBuilder
0.17.24 by Robert Collins
Add a group cache to decompression, 5 times faster than knit at decompression when accessing everything in a group.
44
from bzrlib.lru_cache import LRUSizeCache
0.18.6 by John Arbash Meinel
Use the new EquivalenceTable to track the lines.
45
from bzrlib.plugins.groupcompress import equivalence_table
0.17.9 by Robert Collins
Initial stab at repository format support.
46
from bzrlib.tsort import topo_sort
0.17.2 by Robert Collins
Core proof of concept working.
47
from bzrlib.versionedfile import (
0.17.5 by Robert Collins
nograph tests completely passing.
48
    adapter_registry,
49
    AbsentContentFactory,
0.20.5 by John Arbash Meinel
Finish the Fulltext => Chunked conversions so that we work in the more-efficient Chunks.
50
    ChunkedContentFactory,
0.17.2 by Robert Collins
Core proof of concept working.
51
    FulltextContentFactory,
52
    VersionedFiles,
53
    )
54
0.23.15 by John Arbash Meinel
Handle when self._index is NULL, mostly because the source text was the empty strig.
55
_NO_LABELS = False
0.23.55 by John Arbash Meinel
Make sure the default is _FAST=False for now.
56
_FAST = False
0.17.2 by Robert Collins
Core proof of concept working.
57
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
58
def parse(bytes):
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
59
    if _NO_LABELS:
60
        action_byte = bytes[0]
61
        action = {'f':'fulltext', 'd':'delta'}[action_byte]
62
        return action, None, None, bytes[1:]
0.23.12 by John Arbash Meinel
Add a 'len:' field to the data.
63
    (action, label_line, sha1_line, len_line,
0.23.16 by John Arbash Meinel
Properly restore the label functionality.
64
     delta_bytes) = bytes.split('\n', 4)
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
65
    if (action not in ('fulltext', 'delta')
0.23.27 by John Arbash Meinel
Fix up some failing tests.
66
        or not label_line.startswith('label:')
67
        or not sha1_line.startswith('sha1:')
68
        or not len_line.startswith('len:')
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
69
        ):
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
70
        raise AssertionError("bad text record %r" % (bytes,))
0.23.53 by John Arbash Meinel
Remove the temporary adjustment for handling multiple formats of labels.
71
    label = tuple(label_line[6:].split('\x00'))
72
    sha1 = sha1_line[5:]
0.23.27 by John Arbash Meinel
Fix up some failing tests.
73
    length = int(len_line[4:])
0.23.12 by John Arbash Meinel
Add a 'len:' field to the data.
74
    if not len(delta_bytes) == length:
75
        raise AssertionError("bad length record %r" % (bytes,))
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
76
    return action, label, sha1, delta_bytes
77
78
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
79
def sort_gc_optimal(parent_map):
80
    """Sort and group the keys in parent_map into gc-optimal order.
81
82
    gc-optimal is defined (currently) as reverse-topological order, grouped by
83
    the key prefix.
84
85
    :return: A sorted-list of keys
86
    """
87
    # gc-optimal ordering is approximately reverse topological,
88
    # properly grouped by file-id.
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
89
    per_prefix_map = {}
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
90
    for item in parent_map.iteritems():
91
        key = item[0]
92
        if isinstance(key, str) or len(key) == 1:
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
93
            prefix = ''
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
94
        else:
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
95
            prefix = key[0]
96
        try:
97
            per_prefix_map[prefix].append(item)
98
        except KeyError:
99
            per_prefix_map[prefix] = [item]
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
100
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
101
    present_keys = []
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
102
    for prefix in sorted(per_prefix_map):
103
        present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))
104
    return present_keys
105
106
0.17.2 by Robert Collins
Core proof of concept working.
107
class GroupCompressor(object):
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
108
    """Produce a serialised group of compressed texts.
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
109
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
110
    It contains code very similar to SequenceMatcher because of having a similar
111
    task. However some key differences apply:
112
     - there is no junk, we want a minimal edit not a human readable diff.
113
     - we don't filter very common lines (because we don't know where a good
114
       range will start, and after the first text we want to be emitting minmal
115
       edits only.
116
     - we chain the left side, not the right side
117
     - we incrementally update the adjacency matrix as new lines are provided.
118
     - we look for matches in all of the left side, so the routine which does
119
       the analagous task of find_longest_match does not need to filter on the
120
       left side.
121
    """
0.17.2 by Robert Collins
Core proof of concept working.
122
123
    def __init__(self, delta=True):
124
        """Create a GroupCompressor.
125
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
126
        :param delta: If False, do not compress records.
0.17.2 by Robert Collins
Core proof of concept working.
127
        """
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
128
        # Consider seeding the lines with some sort of GC Start flag, or
129
        # putting it as part of the output stream, rather than in the
130
        # compressed bytes.
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
131
        self.lines = []
0.17.2 by Robert Collins
Core proof of concept working.
132
        self.endpoint = 0
133
        self.input_bytes = 0
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
134
        self.labels_deltas = {}
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
135
        self._delta_index = _groupcompress_pyx.DeltaIndex()
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
136
137
    def compress(self, key, chunks, expected_sha, soft=False):
0.17.2 by Robert Collins
Core proof of concept working.
138
        """Compress lines with label key.
139
140
        :param key: A key tuple. It is stored in the output
0.17.26 by Robert Collins
Working better --gc-plain-chk.
141
            for identification of the text during decompression. If the last
142
            element is 'None' it is replaced with the sha1 of the text -
143
            e.g. sha1:xxxxxxx.
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
144
        :param chunks: The chunks to be compressed
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
145
        :param expected_sha: If non-None, the sha the lines are believed to
0.17.2 by Robert Collins
Core proof of concept working.
146
            have. During compression the sha is calculated; a mismatch will
147
            cause an error.
0.20.15 by John Arbash Meinel
Change so that regions that have lots of copies get converted back
148
        :param soft: Do a 'soft' compression. This means that we require larger
149
            ranges to match to be considered for a copy command.
0.17.2 by Robert Collins
Core proof of concept working.
150
        :return: The sha1 of lines, and the number of bytes accumulated in
151
            the group output so far.
152
        """
0.23.52 by John Arbash Meinel
Use the max_delta flag.
153
        # TODO: Change this to a bytes interface, since the output is now a
154
        #       bytes interface anyway.
155
        bytes = ''.join(chunks)
0.23.54 by John Arbash Meinel
'bzr pack' _FAST during compress() now is 32s versus 25s.
156
        if not _FAST or expected_sha is None:
157
            sha1 = sha_string(bytes)
158
        else:
159
            sha1 = expected_sha
0.17.26 by Robert Collins
Working better --gc-plain-chk.
160
        if key[-1] is None:
161
            key = key[:-1] + ('sha1:' + sha1,)
0.17.2 by Robert Collins
Core proof of concept working.
162
        label = '\x00'.join(key)
0.23.52 by John Arbash Meinel
Use the max_delta flag.
163
        input_len = len(bytes)
0.23.12 by John Arbash Meinel
Add a 'len:' field to the data.
164
        # By having action/label/sha1/len, we can parse the group if the index
165
        # was ever destroyed, we have the key in 'label', we know the final
166
        # bytes are valid from sha1, and we know where to find the end of this
167
        # record because of 'len'. (the delta record itself will store the
168
        # total length for the expanded record)
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
169
        # 'len: %d\n' costs approximately 1% increase in total data
170
        # Having the labels at all costs us 9-10% increase, 38% increase for
171
        # inventory pages, and 5.8% increase for text pages
172
        if _NO_LABELS:
173
            new_chunks = []
174
        else:
0.23.27 by John Arbash Meinel
Fix up some failing tests.
175
            new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]
0.23.33 by John Arbash Meinel
Fix a bug when handling multiple large-range copies.
176
        if self._delta_index._source_offset != self.endpoint:
177
            raise AssertionError('_source_offset != endpoint'
178
                ' somehow the DeltaIndex got out of sync with'
179
                ' the output lines')
0.23.52 by John Arbash Meinel
Use the max_delta flag.
180
        max_delta_size = len(bytes) / 2
181
        delta = self._delta_index.make_delta(bytes, max_delta_size)
182
        if (delta is None):
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
183
            # We can't delta (perhaps source_text is empty)
184
            # so mark this as an insert
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
185
            if _NO_LABELS:
186
                new_chunks = ['f']
187
            else:
188
                new_chunks.insert(0, 'fulltext\n')
0.23.27 by John Arbash Meinel
Fix up some failing tests.
189
                new_chunks.append('len:%s\n' % (input_len,))
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
190
            unadded_bytes = sum(map(len, new_chunks))
0.23.52 by John Arbash Meinel
Use the max_delta flag.
191
            self._delta_index.add_source(bytes, unadded_bytes)
192
            new_chunks.append(bytes)
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
193
        else:
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
194
            if _NO_LABELS:
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
195
                new_chunks = ['d']
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
196
            else:
197
                new_chunks.insert(0, 'delta\n')
0.23.27 by John Arbash Meinel
Fix up some failing tests.
198
                new_chunks.append('len:%s\n' % (len(delta),))
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
199
            if _FAST:
200
                new_chunks.append(delta)
201
                unadded_bytes = sum(map(len, new_chunks))
202
                self._delta_index._source_offset += unadded_bytes
203
            else:
204
                unadded_bytes = sum(map(len, new_chunks))
0.23.47 by John Arbash Meinel
Use the new add_delta_source.
205
                self._delta_index.add_delta_source(delta, unadded_bytes)
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
206
                new_chunks.append(delta)
0.18.9 by John Arbash Meinel
If we are going to do it this way, we don't need to explicitly distinguish left and right
207
        delta_start = (self.endpoint, len(self.lines))
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
208
        self.output_chunks(new_chunks)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
209
        self.input_bytes += input_len
0.18.9 by John Arbash Meinel
If we are going to do it this way, we don't need to explicitly distinguish left and right
210
        delta_end = (self.endpoint, len(self.lines))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
211
        self.labels_deltas[key] = (delta_start, delta_end)
0.23.29 by John Arbash Meinel
Forgot to add the delta bytes to the index objects.
212
        if not self._delta_index._source_offset == self.endpoint:
213
            raise AssertionError('the delta index is out of sync'
214
                'with the output lines %s != %s'
215
                % (self._delta_index._source_offset, self.endpoint))
0.17.2 by Robert Collins
Core proof of concept working.
216
        return sha1, self.endpoint
217
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
218
    def extract(self, key):
0.17.12 by Robert Collins
Encode copy ranges as bytes not lines, halves decode overhead.
219
        """Extract a key previously added to the compressor.
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
220
0.17.12 by Robert Collins
Encode copy ranges as bytes not lines, halves decode overhead.
221
        :param key: The key to extract.
222
        :return: An iterable over bytes and the sha1.
223
        """
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
224
        delta_details = self.labels_deltas[key]
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
225
        delta_chunks = self.lines[delta_details[0][1]:delta_details[1][1]]
226
        action, label, sha1, delta = parse(''.join(delta_chunks))
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
227
        if not _NO_LABELS and label != key:
0.20.21 by John Arbash Meinel
Merge the chk sorting code.
228
            raise AssertionError("wrong key: %r, wanted %r" % (label, key))
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
229
        if action == 'fulltext':
230
            bytes = delta
231
        else:
232
            source = ''.join(self.lines[delta_details[0][0]])
0.23.21 by John Arbash Meinel
Rename the extension to _pyx, since Robert prefers that form
233
            bytes = _groupcompress_pyx.apply_delta(source, delta)
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
234
        if _NO_LABELS:
235
            sha1 = sha_string(bytes)
236
        else:
237
            assert sha1 == sha_string(bytes)
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
238
        return [bytes], sha1
239
240
    def output_chunks(self, new_chunks):
241
        """Output some chunks.
242
243
        :param new_chunks: The chunks to output.
244
        """
0.17.12 by Robert Collins
Encode copy ranges as bytes not lines, halves decode overhead.
245
        endpoint = self.endpoint
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
246
        self.lines.extend(new_chunks)
247
        endpoint += sum(map(len, new_chunks))
0.17.12 by Robert Collins
Encode copy ranges as bytes not lines, halves decode overhead.
248
        self.endpoint = endpoint
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
249
0.17.2 by Robert Collins
Core proof of concept working.
250
    def ratio(self):
251
        """Return the overall compression ratio."""
252
        return float(self.input_bytes) / float(self.endpoint)
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
253
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
254
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
255
def make_pack_factory(graph, delta, keylength):
256
    """Create a factory for creating a pack based groupcompress.
257
258
    This is only functional enough to run interface tests, it doesn't try to
259
    provide a full pack environment.
260
    
261
    :param graph: Store a graph.
262
    :param delta: Delta compress contents.
263
    :param keylength: How long should keys be.
264
    """
265
    def factory(transport):
266
        parents = graph or delta
267
        ref_length = 0
268
        if graph:
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
269
            ref_length = 1
0.17.7 by Robert Collins
Update for current index2 changes.
270
        graph_index = BTreeBuilder(reference_lists=ref_length,
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
271
            key_elements=keylength)
272
        stream = transport.open_write_stream('newpack')
273
        writer = pack.ContainerWriter(stream.write)
274
        writer.begin()
275
        index = _GCGraphIndex(graph_index, lambda:True, parents=parents,
0.17.9 by Robert Collins
Initial stab at repository format support.
276
            add_callback=graph_index.add_nodes)
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
277
        access = _DirectPackAccess({})
278
        access.set_writer(writer, graph_index, (transport, 'newpack'))
0.17.2 by Robert Collins
Core proof of concept working.
279
        result = GroupCompressVersionedFiles(index, access, delta)
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
280
        result.stream = stream
281
        result.writer = writer
282
        return result
283
    return factory
284
285
286
def cleanup_pack_group(versioned_files):
0.17.23 by Robert Collins
Only decompress as much of the zlib data as is needed to read the text recipe.
287
    versioned_files.writer.end()
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
288
    versioned_files.stream.close()
289
290
291
class GroupCompressVersionedFiles(VersionedFiles):
292
    """A group-compress based VersionedFiles implementation."""
293
0.17.2 by Robert Collins
Core proof of concept working.
294
    def __init__(self, index, access, delta=True):
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
295
        """Create a GroupCompressVersionedFiles object.
296
297
        :param index: The index object storing access and graph data.
298
        :param access: The access object storing raw data.
0.17.2 by Robert Collins
Core proof of concept working.
299
        :param delta: Whether to delta compress or just entropy compress.
300
        """
301
        self._index = index
302
        self._access = access
303
        self._delta = delta
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
304
        self._unadded_refs = {}
0.17.24 by Robert Collins
Add a group cache to decompression, 5 times faster than knit at decompression when accessing everything in a group.
305
        self._group_cache = LRUSizeCache(max_size=50*1024*1024)
0.17.2 by Robert Collins
Core proof of concept working.
306
307
    def add_lines(self, key, parents, lines, parent_texts=None,
308
        left_matching_blocks=None, nostore_sha=None, random_id=False,
309
        check_content=True):
310
        """Add a text to the store.
311
312
        :param key: The key tuple of the text to add.
313
        :param parents: The parents key tuples of the text to add.
314
        :param lines: A list of lines. Each line must be a bytestring. And all
315
            of them except the last must be terminated with \n and contain no
316
            other \n's. The last line may either contain no \n's or a single
317
            terminating \n. If the lines list does meet this constraint the add
318
            routine may error or may succeed - but you will be unable to read
319
            the data back accurately. (Checking the lines have been split
320
            correctly is expensive and extremely unlikely to catch bugs so it
321
            is not done at runtime unless check_content is True.)
322
        :param parent_texts: An optional dictionary containing the opaque 
323
            representations of some or all of the parents of version_id to
324
            allow delta optimisations.  VERY IMPORTANT: the texts must be those
325
            returned by add_lines or data corruption can be caused.
326
        :param left_matching_blocks: a hint about which areas are common
327
            between the text and its left-hand-parent.  The format is
328
            the SequenceMatcher.get_matching_blocks format.
329
        :param nostore_sha: Raise ExistingContent and do not add the lines to
330
            the versioned file if the digest of the lines matches this.
331
        :param random_id: If True a random id has been selected rather than
332
            an id determined by some deterministic process such as a converter
333
            from a foreign VCS. When True the backend may choose not to check
334
            for uniqueness of the resulting key within the versioned file, so
335
            this should only be done when the result is expected to be unique
336
            anyway.
337
        :param check_content: If True, the lines supplied are verified to be
338
            bytestrings that are correctly formed lines.
339
        :return: The text sha1, the number of bytes in the text, and an opaque
340
                 representation of the inserted version which can be provided
341
                 back to future add_lines calls in the parent_texts dictionary.
342
        """
343
        self._index._check_write_ok()
344
        self._check_add(key, lines, random_id, check_content)
345
        if parents is None:
346
            # The caller might pass None if there is no graph data, but kndx
347
            # indexes can't directly store that, so we give them
348
            # an empty tuple instead.
349
            parents = ()
350
        # double handling for now. Make it work until then.
0.20.5 by John Arbash Meinel
Finish the Fulltext => Chunked conversions so that we work in the more-efficient Chunks.
351
        length = sum(map(len, lines))
352
        record = ChunkedContentFactory(key, parents, None, lines)
0.17.5 by Robert Collins
nograph tests completely passing.
353
        sha1 = list(self._insert_record_stream([record], random_id=random_id))[0]
0.20.5 by John Arbash Meinel
Finish the Fulltext => Chunked conversions so that we work in the more-efficient Chunks.
354
        return sha1, length, None
0.17.2 by Robert Collins
Core proof of concept working.
355
0.17.4 by Robert Collins
Annotate.
356
    def annotate(self, key):
357
        """See VersionedFiles.annotate."""
358
        graph = Graph(self)
0.17.5 by Robert Collins
nograph tests completely passing.
359
        parent_map = self.get_parent_map([key])
360
        if not parent_map:
361
            raise errors.RevisionNotPresent(key, self)
362
        if parent_map[key] is not None:
363
            search = graph._make_breadth_first_searcher([key])
364
            keys = set()
365
            while True:
366
                try:
367
                    present, ghosts = search.next_with_ghosts()
368
                except StopIteration:
369
                    break
370
                keys.update(present)
371
            parent_map = self.get_parent_map(keys)
372
        else:
373
            keys = [key]
374
            parent_map = {key:()}
0.17.4 by Robert Collins
Annotate.
375
        head_cache = _mod_graph.FrozenHeadsCache(graph)
376
        parent_cache = {}
377
        reannotate = annotate.reannotate
378
        for record in self.get_record_stream(keys, 'topological', True):
379
            key = record.key
0.20.2 by John Arbash Meinel
Teach groupcompress about 'chunked' encoding
380
            chunks = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
0.17.4 by Robert Collins
Annotate.
381
            parent_lines = [parent_cache[parent] for parent in parent_map[key]]
382
            parent_cache[key] = list(
0.20.21 by John Arbash Meinel
Merge the chk sorting code.
383
                reannotate(parent_lines, chunks, key, None, head_cache))
0.17.4 by Robert Collins
Annotate.
384
        return parent_cache[key]
385
0.17.5 by Robert Collins
nograph tests completely passing.
386
    def check(self, progress_bar=None):
387
        """See VersionedFiles.check()."""
388
        keys = self.keys()
389
        for record in self.get_record_stream(keys, 'unordered', True):
390
            record.get_bytes_as('fulltext')
391
0.17.2 by Robert Collins
Core proof of concept working.
392
    def _check_add(self, key, lines, random_id, check_content):
393
        """check that version_id and lines are safe to add."""
394
        version_id = key[-1]
0.17.26 by Robert Collins
Working better --gc-plain-chk.
395
        if version_id is not None:
396
            if contains_whitespace(version_id):
397
                raise InvalidRevisionId(version_id, self)
0.17.2 by Robert Collins
Core proof of concept working.
398
        self.check_not_reserved_id(version_id)
399
        # TODO: If random_id==False and the key is already present, we should
400
        # probably check that the existing content is identical to what is
401
        # being inserted, and otherwise raise an exception.  This would make
402
        # the bundle code simpler.
403
        if check_content:
404
            self._check_lines_not_unicode(lines)
405
            self._check_lines_are_lines(lines)
406
0.17.5 by Robert Collins
nograph tests completely passing.
407
    def get_parent_map(self, keys):
408
        """Get a map of the parents of keys.
409
410
        :param keys: The keys to look up parents for.
411
        :return: A mapping from keys to parents. Absent keys are absent from
412
            the mapping.
413
        """
414
        result = {}
415
        sources = [self._index]
416
        source_results = []
417
        missing = set(keys)
418
        for source in sources:
419
            if not missing:
420
                break
421
            new_result = source.get_parent_map(missing)
422
            source_results.append(new_result)
423
            result.update(new_result)
424
            missing.difference_update(set(new_result))
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
425
        if self._unadded_refs:
426
            for key in missing:
427
                if key in self._unadded_refs:
428
                    result[key] = self._unadded_refs[key]
0.17.5 by Robert Collins
nograph tests completely passing.
429
        return result
430
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
431
    def _get_group_and_delta_bytes(self, index_memo):
0.20.14 by John Arbash Meinel
Factor out _get_group_and_delta_lines.
432
        read_memo = index_memo[0:3]
433
        # get the group:
434
        try:
435
            plain = self._group_cache[read_memo]
436
        except KeyError:
437
            # read the group
438
            zdata = self._access.get_raw_records([read_memo]).next()
439
            # decompress - whole thing - this is not a bug, as it
440
            # permits caching. We might want to store the partially
441
            # decompresed group and decompress object, so that recent
442
            # texts are not penalised by big groups.
443
            plain = zlib.decompress(zdata) #, index_memo[4])
444
            self._group_cache[read_memo] = plain
445
        # cheapo debugging:
446
        # print len(zdata), len(plain)
447
        # parse - requires split_lines, better to have byte offsets
448
        # here (but not by much - we only split the region for the
449
        # recipe, and we often want to end up with lines anyway.
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
450
        return plain, plain[index_memo[3]:index_memo[4]]
0.20.14 by John Arbash Meinel
Factor out _get_group_and_delta_lines.
451
0.20.18 by John Arbash Meinel
Implement new handling of get_bytes_as(), and get_missing_compression_parent_keys()
452
    def get_missing_compression_parent_keys(self):
453
        """Return the keys of missing compression parents.
454
455
        Missing compression parents occur when a record stream was missing
456
        basis texts, or a index was scanned that had missing basis texts.
457
        """
458
        # GroupCompress cannot currently reference texts that are not in the
459
        # group, so this is valid for now
460
        return frozenset()
461
0.17.5 by Robert Collins
nograph tests completely passing.
462
    def get_record_stream(self, keys, ordering, include_delta_closure):
463
        """Get a stream of records for keys.
464
465
        :param keys: The keys to include.
466
        :param ordering: Either 'unordered' or 'topological'. A topologically
467
            sorted stream has compression parents strictly before their
468
            children.
469
        :param include_delta_closure: If True then the closure across any
470
            compression parents will be included (in the opaque data).
471
        :return: An iterator of ContentFactory objects, each of which is only
472
            valid until the iterator is advanced.
473
        """
474
        # keys might be a generator
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
475
        orig_keys = list(keys)
476
        keys = set(orig_keys)
0.17.5 by Robert Collins
nograph tests completely passing.
477
        if not keys:
478
            return
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
479
        if (not self._index.has_graph
480
            and ordering in ('topological', 'gc-optimal')):
0.17.5 by Robert Collins
nograph tests completely passing.
481
            # Cannot topological order when no graph has been stored.
482
            ordering = 'unordered'
483
        # Cheap: iterate
484
        locations = self._index.get_build_details(keys)
0.20.10 by John Arbash Meinel
Change the extraction ordering for 'unordered'.
485
        local_keys = frozenset(keys).intersection(set(self._unadded_refs))
0.17.5 by Robert Collins
nograph tests completely passing.
486
        if ordering == 'topological':
487
            # would be better to not globally sort initially but instead
488
            # start with one key, recurse to its oldest parent, then grab
489
            # everything in the same group, etc.
490
            parent_map = dict((key, details[2]) for key, details in
491
                locations.iteritems())
0.20.10 by John Arbash Meinel
Change the extraction ordering for 'unordered'.
492
            for key in local_keys:
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
493
                parent_map[key] = self._unadded_refs[key]
0.17.5 by Robert Collins
nograph tests completely passing.
494
            present_keys = topo_sort(parent_map)
495
            # Now group by source:
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
496
        elif ordering == 'gc-optimal':
497
            parent_map = dict((key, details[2]) for key, details in
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
498
                              locations.iteritems())
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
499
            for key in local_keys:
500
                parent_map[key] = self._unadded_refs[key]
501
            # XXX: This only optimizes for the target ordering. We may need to
502
            #      balance that with the time it takes to extract ordering, by
503
            #      somehow grouping based on locations[key][0:3]
504
            present_keys = sort_gc_optimal(parent_map)
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
505
        elif ordering == 'as-requested':
0.20.32 by John Arbash Meinel
Fix bug #336373 by adding local keys to locations after the fact, rather than before.
506
            present_keys = [key for key in orig_keys if key in locations
507
                            or key in local_keys]
0.17.5 by Robert Collins
nograph tests completely passing.
508
        else:
0.20.10 by John Arbash Meinel
Change the extraction ordering for 'unordered'.
509
            # We want to yield the keys in a semi-optimal (read-wise) ordering.
510
            # Otherwise we thrash the _group_cache and destroy performance
511
            def get_group(key):
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
512
                # This is the group the bytes are stored in, followed by the
513
                # location in the group
514
                return locations[key][0]
0.20.10 by John Arbash Meinel
Change the extraction ordering for 'unordered'.
515
            present_keys = sorted(locations.iterkeys(), key=get_group)
516
            # We don't have an ordering for keys in the in-memory object, but
517
            # lets process the in-memory ones first.
518
            present_keys = list(local_keys) + present_keys
0.20.32 by John Arbash Meinel
Fix bug #336373 by adding local keys to locations after the fact, rather than before.
519
        locations.update((key, None) for key in local_keys)
0.17.5 by Robert Collins
nograph tests completely passing.
520
        absent_keys = keys.difference(set(locations))
521
        for key in absent_keys:
522
            yield AbsentContentFactory(key)
523
        for key in present_keys:
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
524
            if key in self._unadded_refs:
0.20.28 by John Arbash Meinel
Fix typo with the recent lines => chunks rename.
525
                chunks, sha1 = self._compressor.extract(key)
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
526
                parents = self._unadded_refs[key]
527
            else:
528
                index_memo, _, parents, (method, _) = locations[key]
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
529
                plain, delta_bytes = self._get_group_and_delta_bytes(index_memo)
530
                action, label, sha1, delta = parse(delta_bytes)
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
531
                if not _NO_LABELS and label != key:
0.20.21 by John Arbash Meinel
Merge the chk sorting code.
532
                    raise AssertionError("wrong key: %r, wanted %r" % (label, key))
0.23.9 by John Arbash Meinel
We now basically have full support for using diff-delta as the compressor.
533
                if action == 'fulltext':
534
                    chunks = [delta]
535
                else:
536
                    # TODO: relax apply_delta so that it can allow source to be
537
                    #       longer than expected
0.23.21 by John Arbash Meinel
Rename the extension to _pyx, since Robert prefers that form
538
                    bytes = _groupcompress_pyx.apply_delta(plain, delta)
0.23.10 by John Arbash Meinel
Allowing the source bytes to be longer than expected.
539
                    if bytes is None:
540
                        import pdb; pdb.set_trace()
541
                    chunks = [bytes]
542
                    del bytes
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
543
                if _NO_LABELS:
544
                    sha1 = sha_strings(chunks)
545
                else:
0.23.54 by John Arbash Meinel
'bzr pack' _FAST during compress() now is 32s versus 25s.
546
                    if not _FAST and sha_strings(chunks) != sha1:
0.23.13 by John Arbash Meinel
Factor out the ability to have/not have labels.
547
                        raise AssertionError('sha1 sum did not match')
0.20.22 by John Arbash Meinel
Make it clear that the bits you get from 'apply_delta' are chunks, not lines.
548
            yield ChunkedContentFactory(key, parents, sha1, chunks)
0.20.5 by John Arbash Meinel
Finish the Fulltext => Chunked conversions so that we work in the more-efficient Chunks.
549
0.17.5 by Robert Collins
nograph tests completely passing.
550
    def get_sha1s(self, keys):
551
        """See VersionedFiles.get_sha1s()."""
552
        result = {}
553
        for record in self.get_record_stream(keys, 'unordered', True):
554
            if record.sha1 != None:
555
                result[record.key] = record.sha1
556
            else:
557
                if record.storage_kind != 'absent':
558
                    result[record.key] == sha_string(record.get_bytes_as(
559
                        'fulltext'))
560
        return result
561
0.17.2 by Robert Collins
Core proof of concept working.
562
    def insert_record_stream(self, stream):
563
        """Insert a record stream into this container.
564
565
        :param stream: A stream of records to insert. 
566
        :return: None
567
        :seealso VersionedFiles.get_record_stream:
568
        """
0.17.5 by Robert Collins
nograph tests completely passing.
569
        for _ in self._insert_record_stream(stream):
570
            pass
0.17.2 by Robert Collins
Core proof of concept working.
571
0.17.5 by Robert Collins
nograph tests completely passing.
572
    def _insert_record_stream(self, stream, random_id=False):
0.17.2 by Robert Collins
Core proof of concept working.
573
        """Internal core to insert a record stream into this container.
574
575
        This helper function has a different interface than insert_record_stream
576
        to allow add_lines to be minimal, but still return the needed data.
577
578
        :param stream: A stream of records to insert. 
579
        :return: An iterator over the sha1 of the inserted records.
580
        :seealso insert_record_stream:
581
        :seealso add_lines:
582
        """
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
583
        adapters = {}
0.17.5 by Robert Collins
nograph tests completely passing.
584
        def get_adapter(adapter_key):
585
            try:
586
                return adapters[adapter_key]
587
            except KeyError:
588
                adapter_factory = adapter_registry.get(adapter_key)
589
                adapter = adapter_factory(self)
590
                adapters[adapter_key] = adapter
591
                return adapter
0.17.2 by Robert Collins
Core proof of concept working.
592
        # This will go up to fulltexts for gc to gc fetching, which isn't
593
        # ideal.
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
594
        self._compressor = GroupCompressor(self._delta)
595
        self._unadded_refs = {}
0.17.5 by Robert Collins
nograph tests completely passing.
596
        keys_to_add = []
597
        basis_end = 0
0.17.6 by Robert Collins
Cap group size at 20MB internal buffer. (Probably way too big).
598
        groups = 1
599
        def flush():
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
600
            compressed = zlib.compress(''.join(self._compressor.lines))
0.17.6 by Robert Collins
Cap group size at 20MB internal buffer. (Probably way too big).
601
            index, start, length = self._access.add_raw_records(
602
                [(None, len(compressed))], compressed)[0]
603
            nodes = []
604
            for key, reads, refs in keys_to_add:
605
                nodes.append((key, "%d %d %s" % (start, length, reads), refs))
606
            self._index.add_records(nodes, random_id=random_id)
0.20.15 by John Arbash Meinel
Change so that regions that have lots of copies get converted back
607
        last_prefix = None
0.17.2 by Robert Collins
Core proof of concept working.
608
        for record in stream:
0.17.5 by Robert Collins
nograph tests completely passing.
609
            # Raise an error when a record is missing.
610
            if record.storage_kind == 'absent':
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
611
                raise errors.RevisionNotPresent(record.key, self)
0.20.18 by John Arbash Meinel
Implement new handling of get_bytes_as(), and get_missing_compression_parent_keys()
612
            try:
0.23.52 by John Arbash Meinel
Use the max_delta flag.
613
                bytes = record.get_bytes_as('fulltext')
0.20.18 by John Arbash Meinel
Implement new handling of get_bytes_as(), and get_missing_compression_parent_keys()
614
            except errors.UnavailableRepresentation:
0.17.5 by Robert Collins
nograph tests completely passing.
615
                adapter_key = record.storage_kind, 'fulltext'
616
                adapter = get_adapter(adapter_key)
0.20.21 by John Arbash Meinel
Merge the chk sorting code.
617
                bytes = adapter.get_bytes(record)
0.20.15 by John Arbash Meinel
Change so that regions that have lots of copies get converted back
618
            soft = False
0.20.13 by John Arbash Meinel
Play around a bit.
619
            if len(record.key) > 1:
620
                prefix = record.key[0]
0.20.15 by John Arbash Meinel
Change so that regions that have lots of copies get converted back
621
                if (last_prefix is not None and prefix != last_prefix):
622
                    soft = True
0.23.11 by John Arbash Meinel
Insert a fulltext if the delta is more than half the total size.
623
                    if basis_end > 1024 * 1024 * 2:
0.20.15 by John Arbash Meinel
Change so that regions that have lots of copies get converted back
624
                        flush()
625
                        self._compressor = GroupCompressor(self._delta)
626
                        self._unadded_refs = {}
627
                        keys_to_add = []
628
                        basis_end = 0
629
                        groups += 1
630
                last_prefix = prefix
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
631
            found_sha1, end_point = self._compressor.compress(record.key,
0.23.52 by John Arbash Meinel
Use the max_delta flag.
632
                [bytes], record.sha1, soft=soft)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
633
            if record.key[-1] is None:
634
                key = record.key[:-1] + ('sha1:' + found_sha1,)
635
            else:
636
                key = record.key
637
            self._unadded_refs[key] = record.parents
0.17.3 by Robert Collins
new encoder, allows non monotonically increasing sequence matches for moar compression.
638
            yield found_sha1
0.17.26 by Robert Collins
Working better --gc-plain-chk.
639
            keys_to_add.append((key, '%d %d' % (basis_end, end_point),
0.17.5 by Robert Collins
nograph tests completely passing.
640
                (record.parents,)))
641
            basis_end = end_point
0.23.11 by John Arbash Meinel
Insert a fulltext if the delta is more than half the total size.
642
            if basis_end > 1024 * 1024 * 4:
0.17.6 by Robert Collins
Cap group size at 20MB internal buffer. (Probably way too big).
643
                flush()
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
644
                self._compressor = GroupCompressor(self._delta)
645
                self._unadded_refs = {}
0.17.6 by Robert Collins
Cap group size at 20MB internal buffer. (Probably way too big).
646
                keys_to_add = []
647
                basis_end = 0
648
                groups += 1
0.17.8 by Robert Collins
Flush pending updates at the end of _insert_record_stream
649
        if len(keys_to_add):
650
            flush()
0.17.11 by Robert Collins
Add extraction of just-compressed texts to support converting from knits.
651
        self._compressor = None
652
        self._unadded_refs = {}
0.17.5 by Robert Collins
nograph tests completely passing.
653
654
    def iter_lines_added_or_present_in_keys(self, keys, pb=None):
655
        """Iterate over the lines in the versioned files from keys.
656
657
        This may return lines from other keys. Each item the returned
658
        iterator yields is a tuple of a line and a text version that that line
659
        is present in (not introduced in).
660
661
        Ordering of results is in whatever order is most suitable for the
662
        underlying storage format.
663
664
        If a progress bar is supplied, it may be used to indicate progress.
665
        The caller is responsible for cleaning up progress bars (because this
666
        is an iterator).
667
668
        NOTES:
669
         * Lines are normalised by the underlying store: they will all have \n
670
           terminators.
671
         * Lines are returned in arbitrary order.
672
673
        :return: An iterator over (line, key).
674
        """
675
        if pb is None:
676
            pb = progress.DummyProgress()
677
        keys = set(keys)
678
        total = len(keys)
679
        # we don't care about inclusions, the caller cares.
680
        # but we need to setup a list of records to visit.
681
        # we need key, position, length
682
        for key_idx, record in enumerate(self.get_record_stream(keys,
683
            'unordered', True)):
684
            # XXX: todo - optimise to use less than full texts.
685
            key = record.key
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
686
            pb.update('Walking content.', key_idx + 1, total)
0.17.5 by Robert Collins
nograph tests completely passing.
687
            if record.storage_kind == 'absent':
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
688
                raise errors.RevisionNotPresent(key, self)
0.17.5 by Robert Collins
nograph tests completely passing.
689
            lines = split_lines(record.get_bytes_as('fulltext'))
690
            for line in lines:
691
                yield line, key
692
        pb.update('Walking content.', total, total)
693
694
    def keys(self):
695
        """See VersionedFiles.keys."""
696
        if 'evil' in debug.debug_flags:
697
            trace.mutter_callsite(2, "keys scales with size of history")
698
        sources = [self._index]
699
        result = set()
700
        for source in sources:
701
            result.update(source.keys())
702
        return result
703
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
704
705
class _GCGraphIndex(object):
706
    """Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""
707
0.17.9 by Robert Collins
Initial stab at repository format support.
708
    def __init__(self, graph_index, is_locked, parents=True,
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
709
        add_callback=None):
710
        """Construct a _GCGraphIndex on a graph_index.
711
712
        :param graph_index: An implementation of bzrlib.index.GraphIndex.
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
713
        :param is_locked: A callback, returns True if the index is locked and
714
            thus usable.
0.17.1 by Robert Collins
Starting point. Interface tests hooked up and failing.
715
        :param parents: If True, record knits parents, if not do not record 
716
            parents.
717
        :param add_callback: If not None, allow additions to the index and call
718
            this callback with a list of added GraphIndex nodes:
719
            [(node, value, node_refs), ...]
720
        """
721
        self._add_callback = add_callback
722
        self._graph_index = graph_index
723
        self._parents = parents
724
        self.has_graph = parents
725
        self._is_locked = is_locked
726
0.17.5 by Robert Collins
nograph tests completely passing.
727
    def add_records(self, records, random_id=False):
728
        """Add multiple records to the index.
729
        
730
        This function does not insert data into the Immutable GraphIndex
731
        backing the KnitGraphIndex, instead it prepares data for insertion by
732
        the caller and checks that it is safe to insert then calls
733
        self._add_callback with the prepared GraphIndex nodes.
734
735
        :param records: a list of tuples:
736
                         (key, options, access_memo, parents).
737
        :param random_id: If True the ids being added were randomly generated
738
            and no check for existence will be performed.
739
        """
740
        if not self._add_callback:
741
            raise errors.ReadOnlyError(self)
742
        # we hope there are no repositories with inconsistent parentage
743
        # anymore.
744
745
        changed = False
746
        keys = {}
747
        for (key, value, refs) in records:
748
            if not self._parents:
749
                if refs:
750
                    for ref in refs:
751
                        if ref:
752
                            raise KnitCorrupt(self,
753
                                "attempt to add node with parents "
754
                                "in parentless index.")
755
                    refs = ()
756
                    changed = True
757
            keys[key] = (value, refs)
758
        # check for dups
759
        if not random_id:
760
            present_nodes = self._get_entries(keys)
761
            for (index, key, value, node_refs) in present_nodes:
762
                if node_refs != keys[key][1]:
763
                    raise errors.KnitCorrupt(self, "inconsistent details in add_records"
764
                        ": %s %s" % ((value, node_refs), keys[key]))
765
                del keys[key]
766
                changed = True
767
        if changed:
768
            result = []
769
            if self._parents:
770
                for key, (value, node_refs) in keys.iteritems():
771
                    result.append((key, value, node_refs))
772
            else:
773
                for key, (value, node_refs) in keys.iteritems():
774
                    result.append((key, value))
775
            records = result
776
        self._add_callback(records)
777
        
778
    def _check_read(self):
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
779
        """Raise an exception if reads are not permitted."""
0.17.5 by Robert Collins
nograph tests completely passing.
780
        if not self._is_locked():
781
            raise errors.ObjectNotLocked(self)
782
0.17.2 by Robert Collins
Core proof of concept working.
783
    def _check_write_ok(self):
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
784
        """Raise an exception if writes are not permitted."""
0.17.2 by Robert Collins
Core proof of concept working.
785
        if not self._is_locked():
786
            raise errors.ObjectNotLocked(self)
787
0.17.5 by Robert Collins
nograph tests completely passing.
788
    def _get_entries(self, keys, check_present=False):
789
        """Get the entries for keys.
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
790
791
        Note: Callers are responsible for checking that the index is locked
792
        before calling this method.
793
0.17.5 by Robert Collins
nograph tests completely passing.
794
        :param keys: An iterable of index key tuples.
795
        """
796
        keys = set(keys)
797
        found_keys = set()
798
        if self._parents:
799
            for node in self._graph_index.iter_entries(keys):
800
                yield node
801
                found_keys.add(node[1])
802
        else:
803
            # adapt parentless index to the rest of the code.
804
            for node in self._graph_index.iter_entries(keys):
805
                yield node[0], node[1], node[2], ()
806
                found_keys.add(node[1])
807
        if check_present:
808
            missing_keys = keys.difference(found_keys)
809
            if missing_keys:
810
                raise RevisionNotPresent(missing_keys.pop(), self)
811
812
    def get_parent_map(self, keys):
813
        """Get a map of the parents of keys.
814
815
        :param keys: The keys to look up parents for.
816
        :return: A mapping from keys to parents. Absent keys are absent from
817
            the mapping.
818
        """
819
        self._check_read()
820
        nodes = self._get_entries(keys)
821
        result = {}
822
        if self._parents:
823
            for node in nodes:
824
                result[node[1]] = node[3][0]
825
        else:
826
            for node in nodes:
827
                result[node[1]] = None
828
        return result
829
830
    def get_build_details(self, keys):
831
        """Get the various build details for keys.
832
833
        Ghosts are omitted from the result.
834
835
        :param keys: An iterable of keys.
836
        :return: A dict of key:
837
            (index_memo, compression_parent, parents, record_details).
838
            index_memo
839
                opaque structure to pass to read_records to extract the raw
840
                data
841
            compression_parent
842
                Content that this record is built upon, may be None
843
            parents
844
                Logical parents of this node
845
            record_details
846
                extra information about the content which needs to be passed to
847
                Factory.parse_record
848
        """
849
        self._check_read()
850
        result = {}
0.20.29 by Ian Clatworthy
groupcompress.py code cleanups
851
        entries = self._get_entries(keys)
0.17.5 by Robert Collins
nograph tests completely passing.
852
        for entry in entries:
853
            key = entry[1]
854
            if not self._parents:
855
                parents = None
856
            else:
857
                parents = entry[3][0]
858
            method = 'group'
859
            result[key] = (self._node_to_position(entry),
860
                                  None, parents, (method, None))
861
        return result
862
    
863
    def keys(self):
864
        """Get all the keys in the collection.
865
        
866
        The keys are not ordered.
867
        """
868
        self._check_read()
869
        return [node[1] for node in self._graph_index.iter_all_entries()]
870
    
871
    def _node_to_position(self, node):
872
        """Convert an index value to position details."""
873
        bits = node[2].split(' ')
874
        # It would be nice not to read the entire gzip.
875
        start = int(bits[0])
876
        stop = int(bits[1])
877
        basis_end = int(bits[2])
878
        delta_end = int(bits[3])
879
        return node[0], start, stop, basis_end, delta_end
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
880
881
0.18.26 by John Arbash Meinel
Start with a copy implementation of the _get_longest_match function.
882
def _get_longest_match(equivalence_table, pos, max_pos, locations):
0.18.25 by John Arbash Meinel
Factor the get_longest_match into a helper func
883
    """Get the longest possible match for the current position."""
884
    range_start = pos
885
    range_len = 0
886
    copy_ends = None
0.18.26 by John Arbash Meinel
Start with a copy implementation of the _get_longest_match function.
887
    while pos < max_pos:
0.18.25 by John Arbash Meinel
Factor the get_longest_match into a helper func
888
        if locations is None:
889
            locations = equivalence_table.get_idx_matches(pos)
890
        if locations is None:
891
            # No more matches, just return whatever we have, but we know that
892
            # this last position is not going to match anything
893
            pos += 1
894
            break
895
        else:
896
            if copy_ends is None:
897
                # We are starting a new range
898
                copy_ends = [loc + 1 for loc in locations]
899
                range_len = 1
900
                locations = None # Consumed
901
            else:
902
                # We are currently in the middle of a match
903
                next_locations = set(copy_ends).intersection(locations)
904
                if len(next_locations):
905
                    # range continues
906
                    copy_ends = [loc + 1 for loc in next_locations]
907
                    range_len += 1
908
                    locations = None # Consumed
909
                else:
910
                    # But we are done with this match, we should be
911
                    # starting a new one, though. We will pass back 'locations'
912
                    # so that we don't have to do another lookup.
913
                    break
914
        pos += 1
915
    if copy_ends is None:
916
        return None, pos, locations
917
    return ((min(copy_ends) - range_len, range_start, range_len)), pos, locations
918
919
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
920
try:
0.23.21 by John Arbash Meinel
Rename the extension to _pyx, since Robert prefers that form
921
    from bzrlib.plugins.groupcompress_rabin import _groupcompress_pyx
0.18.14 by John Arbash Meinel
A bit more work, not really usable yet.
922
except ImportError:
923
    pass