/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
1
# Copyright (C) 2007-2011 Canonical Ltd
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
18
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
19
import errno
20
import os
21
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
22
from .lazy_import import lazy_import
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
23
24
lazy_import(globals(), """
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
25
import gzip
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
26
import itertools
0.9.19 by Aaron Bentley
More tweakage
27
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
28
from breezy import (
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
29
    bencode,
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
30
    errors,
0.9.25 by Aaron Bentley
More messy hacking
31
    patiencediff,
32
    ui,
33
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
34
""")
7143.15.5 by Jelmer Vernooij
More PEP8 fixes.
35
from .i18n import gettext
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
36
from .sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
37
    BytesIO,
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
38
    range,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
39
    )
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
40
0.9.33 by Aaron Bentley
Enable caching commandline param
41
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
42
def topo_iter_keys(vf, keys=None):
43
    if keys is None:
44
        keys = vf.keys()
45
    parents = vf.get_parent_map(keys)
46
    return _topo_iter(parents, keys)
47
7143.15.2 by Jelmer Vernooij
Run autopep8.
48
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
49
def topo_iter(vf, versions=None):
50
    if versions is None:
51
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
52
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
53
    return _topo_iter(parents, versions)
54
7143.15.2 by Jelmer Vernooij
Run autopep8.
55
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
56
def _topo_iter(parents, versions):
57
    seen = set()
58
    descendants = {}
7143.15.2 by Jelmer Vernooij
Run autopep8.
59
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
60
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
61
        if parents[version] is None:
62
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
63
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
64
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
65
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
66
        if parents[version_id] is None:
67
            # parentless
68
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
69
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
70
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
71
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
72
    while len(cur) > 0:
73
        next = []
74
        for version_id in cur:
75
            if version_id in seen:
76
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
77
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
78
                continue
79
            next.extend(descendants.get(version_id, []))
80
            yield version_id
81
            seen.add(version_id)
82
        cur = next
83
84
0.9.1 by Aaron Bentley
Get trivial case passing
85
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
86
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
87
5374.2.9 by John Arbash Meinel
change slots back to a list.
88
    __slots__ = ['hunks']
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
89
0.9.2 by Aaron Bentley
Get single-parent comparison working
90
    def __init__(self, hunks=None):
91
        if hunks is not None:
92
            self.hunks = hunks
93
        else:
94
            self.hunks = []
95
96
    def __repr__(self):
97
        return "MultiParent(%r)" % self.hunks
98
99
    def __eq__(self, other):
100
        if self.__class__ is not other.__class__:
101
            return False
102
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
103
104
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
105
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
106
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
107
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
108
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
109
                                                           text)
110
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
111
        if len(parents) > 0:
112
            if left_blocks is None:
113
                left_blocks = compare(parents[0])
114
            parent_comparisons = [left_blocks] + [compare(p) for p in
115
                                                  parents[1:]]
116
        else:
117
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
118
        cur_line = 0
119
        new_text = NewText([])
120
        parent_text = []
121
        block_iter = [iter(i) for i in parent_comparisons]
122
        diff = MultiParent([])
7143.15.2 by Jelmer Vernooij
Run autopep8.
123
0.9.2 by Aaron Bentley
Get single-parent comparison working
124
        def next_block(p):
125
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
126
                return next(block_iter[p])
0.9.2 by Aaron Bentley
Get single-parent comparison working
127
            except StopIteration:
128
                return None
129
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
130
        while cur_line < len(text):
131
            best_match = None
132
            for p, block in enumerate(cur_block):
133
                if block is None:
134
                    continue
135
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
136
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
137
                    block = cur_block[p] = next_block(p)
138
                    if block is None:
139
                        break
140
                    i, j, n = block
141
                if block is None:
142
                    continue
143
                if j > cur_line:
144
                    continue
145
                offset = cur_line - j
146
                i += offset
147
                j = cur_line
148
                n -= offset
149
                if n == 0:
150
                    continue
151
                if best_match is None or n > best_match.num_lines:
152
                    best_match = ParentText(p, i, j, n)
153
            if best_match is None:
154
                new_text.lines.append(text[cur_line])
155
                cur_line += 1
156
            else:
157
                if len(new_text.lines) > 0:
158
                    diff.hunks.append(new_text)
159
                    new_text = NewText([])
160
                diff.hunks.append(best_match)
161
                cur_line += best_match.num_lines
162
        if len(new_text.lines) > 0:
163
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
164
        return diff
165
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
166
    def get_matching_blocks(self, parent, parent_len):
167
        for hunk in self.hunks:
168
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
169
                continue
170
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
171
        yield parent_len, self.num_lines(), 0
172
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
173
    def to_lines(self, parents=()):
174
        """Contruct a fulltext from this diff and its parents"""
175
        mpvf = MultiMemoryVersionedFile()
176
        for num, parent in enumerate(parents):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
177
            mpvf.add_version(BytesIO(parent).readlines(), num, [])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
178
        mpvf.add_diff(self, 'a', list(range(len(parents))))
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
179
        return mpvf.get_line_list(['a'])[0]
180
0.9.1 by Aaron Bentley
Get trivial case passing
181
    @classmethod
182
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
183
        """Produce a MultiParent from a text and list of parent text"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
184
        return cls.from_lines(BytesIO(text).readlines(),
185
                              [BytesIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
186
0.9.4 by Aaron Bentley
Start supporting serialization
187
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
188
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
189
        for hunk in self.hunks:
190
            for line in hunk.to_patch():
191
                yield line
192
0.9.25 by Aaron Bentley
More messy hacking
193
    def patch_len(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
194
        return len(b''.join(self.to_patch()))
0.9.25 by Aaron Bentley
More messy hacking
195
196
    def zipped_patch_len(self):
197
        return len(gzip_string(self.to_patch()))
198
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
199
    @classmethod
200
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
201
        """Create a MultiParent from its string form"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
202
        return cls._from_patch(BytesIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
203
0.9.18 by Aaron Bentley
Implement from_patch
204
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
205
    def _from_patch(lines):
206
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
207
        line_iter = iter(lines)
208
        hunks = []
209
        cur_line = None
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
210
        while True:
0.9.18 by Aaron Bentley
Implement from_patch
211
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
212
                cur_line = next(line_iter)
0.9.18 by Aaron Bentley
Implement from_patch
213
            except StopIteration:
214
                break
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
215
            first_char = cur_line[0:1]
216
            if first_char == b'i':
6973.7.9 by Jelmer Vernooij
Port multiparent.
217
                num_lines = int(cur_line.split(b' ')[1])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
218
                hunk_lines = [next(line_iter) for _ in range(num_lines)]
0.9.18 by Aaron Bentley
Implement from_patch
219
                hunk_lines[-1] = hunk_lines[-1][:-1]
220
                hunks.append(NewText(hunk_lines))
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
221
            elif first_char == b'\n':
6973.7.9 by Jelmer Vernooij
Port multiparent.
222
                hunks[-1].lines[-1] += b'\n'
0.9.18 by Aaron Bentley
Implement from_patch
223
            else:
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
224
                if not (first_char == b'c'):
225
                    raise AssertionError(first_char)
0.9.18 by Aaron Bentley
Implement from_patch
226
                parent, parent_pos, child_pos, num_lines =\
6973.7.9 by Jelmer Vernooij
Port multiparent.
227
                    [int(v) for v in cur_line.split(b' ')[1:]]
0.9.18 by Aaron Bentley
Implement from_patch
228
                hunks.append(ParentText(parent, parent_pos, child_pos,
229
                                        num_lines))
230
        return MultiParent(hunks)
231
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
232
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
233
        """Iterate through the hunks, with range indicated
234
235
        kind is "new" or "parent".
236
        for "new", data is a list of lines.
237
        for "parent", data is (parent, parent_start, parent_end)
238
        :return: a generator of (start, end, kind, data)
239
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
240
        start = 0
241
        for hunk in self.hunks:
242
            if isinstance(hunk, NewText):
243
                kind = 'new'
244
                end = start + len(hunk.lines)
245
                data = hunk.lines
246
            else:
247
                kind = 'parent'
248
                start = hunk.child_pos
249
                end = start + hunk.num_lines
250
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
251
                        hunk.num_lines)
252
            yield start, end, kind, data
253
            start = end
254
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
255
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
256
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
257
        extra_n = 0
258
        for hunk in reversed(self.hunks):
259
            if isinstance(hunk, ParentText):
7143.15.2 by Jelmer Vernooij
Run autopep8.
260
                return hunk.child_pos + hunk.num_lines + extra_n
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
261
            extra_n += len(hunk.lines)
262
        return extra_n
263
0.9.25 by Aaron Bentley
More messy hacking
264
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
265
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
266
        if len(self.hunks) != 1:
267
            return False
268
        return (isinstance(self.hunks[0], NewText))
269
0.9.1 by Aaron Bentley
Get trivial case passing
270
271
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
272
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
273
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
274
    __slots__ = ['lines']
275
0.9.1 by Aaron Bentley
Get trivial case passing
276
    def __init__(self, lines):
277
        self.lines = lines
278
279
    def __eq__(self, other):
280
        if self.__class__ is not other.__class__:
281
            return False
282
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
283
284
    def __repr__(self):
285
        return 'NewText(%r)' % self.lines
286
0.9.4 by Aaron Bentley
Start supporting serialization
287
    def to_patch(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
288
        yield b'i %d\n' % len(self.lines)
0.9.4 by Aaron Bentley
Start supporting serialization
289
        for line in self.lines:
290
            yield line
6973.7.9 by Jelmer Vernooij
Port multiparent.
291
        yield b'\n'
0.9.4 by Aaron Bentley
Start supporting serialization
292
0.9.2 by Aaron Bentley
Get single-parent comparison working
293
294
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
295
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
296
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
297
    __slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines']
298
0.9.2 by Aaron Bentley
Get single-parent comparison working
299
    def __init__(self, parent, parent_pos, child_pos, num_lines):
300
        self.parent = parent
301
        self.parent_pos = parent_pos
302
        self.child_pos = child_pos
303
        self.num_lines = num_lines
304
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
305
    def _as_dict(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
306
        return {b'parent': self.parent,
307
                b'parent_pos': self.parent_pos,
308
                b'child_pos': self.child_pos,
309
                b'num_lines': self.num_lines}
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
310
0.9.2 by Aaron Bentley
Get single-parent comparison working
311
    def __repr__(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
312
        return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'
313
                ' %(num_lines)r)' % self._as_dict())
0.9.2 by Aaron Bentley
Get single-parent comparison working
314
315
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
316
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
317
            return False
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
318
        return self._as_dict() == other._as_dict()
0.9.4 by Aaron Bentley
Start supporting serialization
319
320
    def to_patch(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
321
        yield (b'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
322
               % self._as_dict())
0.9.8 by Aaron Bentley
get add_version working
323
324
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
325
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
326
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
327
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
328
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
329
        self._lines = {}
330
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
331
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
332
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
333
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
334
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
335
    def versions(self):
336
        return iter(self._parents)
337
2520.4.61 by Aaron Bentley
Do bulk insertion of records
338
    def has_version(self, version):
339
        return version in self._parents
340
0.9.12 by Aaron Bentley
Make benchmarks for mp
341
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
342
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
343
        if self.snapshot_interval is None:
344
            return False
345
        if self.max_snapshots is not None and\
7143.15.2 by Jelmer Vernooij
Run autopep8.
346
                len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
347
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
348
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
349
            return True
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
350
        for ignored in range(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
351
            if len(parent_ids) == 0:
352
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
353
            version_ids = parent_ids
354
            parent_ids = []
355
            for version_id in version_ids:
356
                if version_id not in self._snapshots:
357
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
358
        else:
359
            return True
0.9.8 by Aaron Bentley
get add_version working
360
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
361
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
362
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
363
        """Add a version to the versionedfile
364
365
        :param lines: The list of lines to add.  Must be split on '\n'.
366
        :param version_id: The version_id of the version to add
367
        :param force_snapshot: If true, force this version to be added as a
368
            snapshot version.  If false, force this version to be added as a
369
            diff.  If none, determine this automatically.
370
        :param single_parent: If true, use a single parent, rather than
371
            multiple parents.
372
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
373
        if force_snapshot is None:
374
            do_snapshot = self.do_snapshot(version_id, parent_ids)
375
        else:
376
            do_snapshot = force_snapshot
377
        if do_snapshot:
378
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
379
            diff = MultiParent([NewText(lines)])
380
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
381
            if single_parent:
382
                parent_lines = self.get_line_list(parent_ids[:1])
383
            else:
384
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
385
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
386
            if diff.is_snapshot():
387
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
388
        self.add_diff(diff, version_id, parent_ids)
389
        self._lines[version_id] = lines
390
0.9.35 by Aaron Bentley
Add build ranking
391
    def get_parents(self, version_id):
392
        return self._parents[version_id]
393
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
394
    def make_snapshot(self, version_id):
395
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
396
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
397
        self._snapshots.add(version_id)
398
0.9.20 by Aaron Bentley
Convert to a plugin
399
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
400
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
401
        """Import all revisions of a versionedfile
402
403
        :param vf: The versionedfile to import
404
        :param snapshots: If provided, the revisions to make snapshots of.
405
            Otherwise, this will be auto-determined
406
        :param no_cache: If true, clear the cache after every add.
407
        :param single_parent: If true, omit all but one parent text, (but
408
            retain parent metadata).
409
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
410
        if not (no_cache or not verify):
411
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
412
        revisions = set(vf.versions())
413
        total = len(revisions)
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
414
        with ui.ui_factory.nested_progress_bar() as pb:
0.9.20 by Aaron Bentley
Convert to a plugin
415
            while len(revisions) > 0:
416
                added = set()
417
                for revision in revisions:
418
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
419
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
420
                        continue
6973.7.9 by Jelmer Vernooij
Port multiparent.
421
                    lines = [a + b' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
422
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
423
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
424
                        force_snapshot = None
425
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
426
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
427
                    self.add_version(lines, revision, parents, force_snapshot,
428
                                     single_parent)
429
                    added.add(revision)
430
                    if no_cache:
431
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
432
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
433
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
434
                            if not (lines == self.get_line_list([revision])[0]):
435
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
436
                            self.clear_cache()
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
437
                    pb.update(gettext('Importing revisions'),
0.9.20 by Aaron Bentley
Convert to a plugin
438
                              (total - len(revisions)) + len(added), total)
439
                revisions = [r for r in revisions if r not in added]
0.9.19 by Aaron Bentley
More tweakage
440
0.9.23 by Aaron Bentley
handle snapshots all at once
441
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
442
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
443
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
444
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
445
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
446
            potential_build_ancestors = set(vf.get_parents(version_id))
447
            parents = vf.get_parents(version_id)
448
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
449
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
450
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
451
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
452
                for parent in vf.get_parents(version_id):
453
                    potential_build_ancestors.update(build_ancestors[parent])
454
                if len(potential_build_ancestors) > self.snapshot_interval:
455
                    snapshots.add(version_id)
456
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
457
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
458
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
459
        return snapshots
460
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
461
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
462
        """Select snapshots for minimum output size"""
463
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
464
        new_snapshots = self.get_size_ranking()[-num:]
465
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
466
467
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
468
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
469
        versions = []
470
        for version_id in self.versions():
471
            if version_id in self._snapshots:
472
                continue
473
            diff_len = self.get_diff(version_id).patch_len()
474
            snapshot_len = MultiParent([NewText(
475
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
476
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
477
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
478
        return versions
479
480
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
481
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
482
        for version_id in vf.versions():
483
            self.add_diff(vf.get_diff(version_id), version_id,
484
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
485
0.9.35 by Aaron Bentley
Add build ranking
486
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
487
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
488
        could_avoid = {}
489
        referenced_by = {}
490
        for version_id in topo_iter(self):
491
            could_avoid[version_id] = set()
492
            if version_id not in self._snapshots:
493
                for parent_id in self._parents[version_id]:
494
                    could_avoid[version_id].update(could_avoid[parent_id])
495
                could_avoid[version_id].update(self._parents)
496
                could_avoid[version_id].discard(version_id)
497
            for avoid_id in could_avoid[version_id]:
498
                referenced_by.setdefault(avoid_id, set()).add(version_id)
499
        available_versions = list(self.versions())
500
        ranking = []
501
        while len(available_versions) > 0:
502
            available_versions.sort(key=lambda x:
7143.15.2 by Jelmer Vernooij
Run autopep8.
503
                                    len(could_avoid[x]) *
504
                                    len(referenced_by.get(x, [])))
0.9.35 by Aaron Bentley
Add build ranking
505
            selected = available_versions.pop()
506
            ranking.append(selected)
507
            for version_id in referenced_by[selected]:
508
                could_avoid[version_id].difference_update(
509
                    could_avoid[selected])
510
            for version_id in could_avoid[selected]:
511
                referenced_by[version_id].difference_update(
512
                    referenced_by[selected]
513
                )
514
        return ranking
515
0.9.8 by Aaron Bentley
get add_version working
516
    def clear_cache(self):
517
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
518
519
    def get_line_list(self, version_ids):
520
        return [self.cache_version(v) for v in version_ids]
521
522
    def cache_version(self, version_id):
523
        try:
524
            return self._lines[version_id]
525
        except KeyError:
526
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
527
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
528
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
529
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
530
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
531
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
532
        return lines
533
0.9.33 by Aaron Bentley
Enable caching commandline param
534
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
535
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
536
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
537
538
    def __init__(self, snapshot_interval=25, max_snapshots=None):
539
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
540
        self._diffs = {}
541
542
    def add_diff(self, diff, version_id, parent_ids):
543
        self._diffs[version_id] = diff
544
        self._parents[version_id] = parent_ids
545
546
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
547
        try:
548
            return self._diffs[version_id]
549
        except KeyError:
550
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
551
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
552
    def destroy(self):
553
        self._diffs = {}
554
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
555
556
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
557
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
558
559
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
560
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
561
        self._filename = filename
562
        self._diff_offset = {}
563
564
    def get_diff(self, version_id):
565
        start, count = self._diff_offset[version_id]
6977.2.3 by Jelmer Vernooij
Use context managers.
566
        with open(self._filename + '.mpknit', 'rb') as infile:
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
567
            infile.seek(start)
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
568
            sio = BytesIO(infile.read(count))
6977.2.3 by Jelmer Vernooij
Use context managers.
569
        with gzip.GzipFile(None, mode='rb', fileobj=sio) as zip_file:
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
570
            file_version_id = zip_file.readline()
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
571
            content = zip_file.read()
572
            return MultiParent.from_patch(content)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
573
574
    def add_diff(self, diff, version_id, parent_ids):
6977.2.3 by Jelmer Vernooij
Use context managers.
575
        with open(self._filename + '.mpknit', 'ab') as outfile:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
576
            outfile.seek(0, 2)      # workaround for windows bug:
7143.15.2 by Jelmer Vernooij
Run autopep8.
577
            # .tell() for files opened in 'ab' mode
578
            # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
579
            start = outfile.tell()
6977.2.3 by Jelmer Vernooij
Use context managers.
580
            with gzip.GzipFile(None, mode='ab', fileobj=outfile) as zipfile:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
581
                zipfile.writelines(itertools.chain(
6973.7.9 by Jelmer Vernooij
Port multiparent.
582
                    [b'version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
583
            end = outfile.tell()
7143.15.2 by Jelmer Vernooij
Run autopep8.
584
        self._diff_offset[version_id] = (start, end - start)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
585
        self._parents[version_id] = parent_ids
586
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
587
    def destroy(self):
588
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
589
            os.unlink(self._filename + '.mpknit')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
590
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
591
            if e.errno != errno.ENOENT:
592
                raise
593
        try:
594
            os.unlink(self._filename + '.mpidx')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
595
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
596
            if e.errno != errno.ENOENT:
597
                raise
598
599
    def save(self):
600
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
601
            (self._parents, list(self._snapshots), self._diff_offset)))
602
603
    def load(self):
604
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
605
            open(self._filename + '.mpidx', 'rb').read())
606
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
607
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
608
609
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
610
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
611
612
    def __init__(self, diffs, lines, parents):
613
        self.diffs = diffs
614
        self.lines = lines
615
        self.parents = parents
616
        self.cursor = {}
617
618
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
619
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
620
        parent_id = self.parents[version_id][parent_text.parent]
621
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
622
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
623
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
624
625
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
626
        """Append lines for the requested version_id range"""
627
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
628
        if req_start == req_end:
629
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
630
        pending_reqs = [(req_version_id, req_start, req_end)]
631
        while len(pending_reqs) > 0:
632
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
633
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
634
            if req_version_id in self.lines:
635
                lines.extend(self.lines[req_version_id][req_start:req_end])
636
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
637
            try:
638
                start, end, kind, data, iterator = self.cursor[req_version_id]
639
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
640
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
641
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
642
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
643
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
644
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
645
0.9.10 by Aaron Bentley
Text reconstruction seems to work
646
            # find the first hunk relevant to the request
647
            while end <= req_start:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
648
                start, end, kind, data = next(iterator)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
649
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
650
            # if the hunk can't satisfy the whole request, split it in two,
651
            # and leave the second half for later.
652
            if req_end > end:
653
                pending_reqs.append((req_version_id, end, req_end))
654
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
655
            if kind == 'new':
656
                lines.extend(data[req_start - start: (req_end - start)])
657
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
658
                # If the hunk is a ParentText, rewrite it as a range request
659
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
660
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
661
                new_version_id = self.parents[req_version_id][parent]
662
                new_start = parent_start + req_start - start
663
                new_end = parent_end + req_end - end
664
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
665
666
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
667
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
668
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
669
2520.4.6 by Aaron Bentley
Get installation started
670
0.9.25 by Aaron Bentley
More messy hacking
671
def gzip_string(lines):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
672
    sio = BytesIO()
6977.2.3 by Jelmer Vernooij
Use context managers.
673
    with gzip.GzipFile(None, mode='wb', fileobj=sio) as data_file:
674
        data_file.writelines(lines)
0.9.25 by Aaron Bentley
More messy hacking
675
    return sio.getvalue()