/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
1
# Copyright (C) 2007-2011 Canonical Ltd
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
18
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
19
import errno
20
import os
21
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
22
from .lazy_import import lazy_import
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
23
24
lazy_import(globals(), """
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
25
import gzip
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
26
import itertools
7290.14.1 by Jelmer Vernooij
Use external patiencediff.
27
import patiencediff
0.9.19 by Aaron Bentley
More tweakage
28
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
29
from breezy import (
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
30
    bencode,
0.9.25 by Aaron Bentley
More messy hacking
31
    ui,
32
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
33
""")
7413.8.11 by Jelmer Vernooij
Don't lazy-import errors.
34
from . import (
35
    errors,
36
    )
7143.15.5 by Jelmer Vernooij
More PEP8 fixes.
37
from .i18n import gettext
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
38
from .sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
39
    BytesIO,
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
40
    range,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
41
    )
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
42
0.9.33 by Aaron Bentley
Enable caching commandline param
43
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
44
def topo_iter_keys(vf, keys=None):
45
    if keys is None:
46
        keys = vf.keys()
47
    parents = vf.get_parent_map(keys)
48
    return _topo_iter(parents, keys)
49
7143.15.2 by Jelmer Vernooij
Run autopep8.
50
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
51
def topo_iter(vf, versions=None):
52
    if versions is None:
53
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
54
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
55
    return _topo_iter(parents, versions)
56
7143.15.2 by Jelmer Vernooij
Run autopep8.
57
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
58
def _topo_iter(parents, versions):
59
    seen = set()
60
    descendants = {}
7143.15.2 by Jelmer Vernooij
Run autopep8.
61
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
62
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
63
        if parents[version] is None:
64
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
65
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
66
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
67
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
68
        if parents[version_id] is None:
69
            # parentless
70
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
71
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
72
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
73
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
74
    while len(cur) > 0:
75
        next = []
76
        for version_id in cur:
77
            if version_id in seen:
78
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
79
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
80
                continue
81
            next.extend(descendants.get(version_id, []))
82
            yield version_id
83
            seen.add(version_id)
84
        cur = next
85
86
0.9.1 by Aaron Bentley
Get trivial case passing
87
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
88
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
89
5374.2.9 by John Arbash Meinel
change slots back to a list.
90
    __slots__ = ['hunks']
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
91
0.9.2 by Aaron Bentley
Get single-parent comparison working
92
    def __init__(self, hunks=None):
93
        if hunks is not None:
94
            self.hunks = hunks
95
        else:
96
            self.hunks = []
97
98
    def __repr__(self):
99
        return "MultiParent(%r)" % self.hunks
100
101
    def __eq__(self, other):
102
        if self.__class__ is not other.__class__:
103
            return False
104
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
105
106
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
107
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
108
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
109
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
110
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
111
                                                           text)
112
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
113
        if len(parents) > 0:
114
            if left_blocks is None:
115
                left_blocks = compare(parents[0])
116
            parent_comparisons = [left_blocks] + [compare(p) for p in
117
                                                  parents[1:]]
118
        else:
119
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
120
        cur_line = 0
121
        new_text = NewText([])
122
        parent_text = []
123
        block_iter = [iter(i) for i in parent_comparisons]
124
        diff = MultiParent([])
7143.15.2 by Jelmer Vernooij
Run autopep8.
125
0.9.2 by Aaron Bentley
Get single-parent comparison working
126
        def next_block(p):
127
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
128
                return next(block_iter[p])
0.9.2 by Aaron Bentley
Get single-parent comparison working
129
            except StopIteration:
130
                return None
131
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
132
        while cur_line < len(text):
133
            best_match = None
134
            for p, block in enumerate(cur_block):
135
                if block is None:
136
                    continue
137
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
138
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
139
                    block = cur_block[p] = next_block(p)
140
                    if block is None:
141
                        break
142
                    i, j, n = block
143
                if block is None:
144
                    continue
145
                if j > cur_line:
146
                    continue
147
                offset = cur_line - j
148
                i += offset
149
                j = cur_line
150
                n -= offset
151
                if n == 0:
152
                    continue
153
                if best_match is None or n > best_match.num_lines:
154
                    best_match = ParentText(p, i, j, n)
155
            if best_match is None:
156
                new_text.lines.append(text[cur_line])
157
                cur_line += 1
158
            else:
159
                if len(new_text.lines) > 0:
160
                    diff.hunks.append(new_text)
161
                    new_text = NewText([])
162
                diff.hunks.append(best_match)
163
                cur_line += best_match.num_lines
164
        if len(new_text.lines) > 0:
165
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
166
        return diff
167
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
168
    def get_matching_blocks(self, parent, parent_len):
169
        for hunk in self.hunks:
170
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
171
                continue
172
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
173
        yield parent_len, self.num_lines(), 0
174
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
175
    def to_lines(self, parents=()):
176
        """Contruct a fulltext from this diff and its parents"""
177
        mpvf = MultiMemoryVersionedFile()
178
        for num, parent in enumerate(parents):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
179
            mpvf.add_version(BytesIO(parent).readlines(), num, [])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
180
        mpvf.add_diff(self, 'a', list(range(len(parents))))
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
181
        return mpvf.get_line_list(['a'])[0]
182
0.9.1 by Aaron Bentley
Get trivial case passing
183
    @classmethod
184
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
185
        """Produce a MultiParent from a text and list of parent text"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
186
        return cls.from_lines(BytesIO(text).readlines(),
187
                              [BytesIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
188
0.9.4 by Aaron Bentley
Start supporting serialization
189
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
190
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
191
        for hunk in self.hunks:
192
            for line in hunk.to_patch():
193
                yield line
194
0.9.25 by Aaron Bentley
More messy hacking
195
    def patch_len(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
196
        return len(b''.join(self.to_patch()))
0.9.25 by Aaron Bentley
More messy hacking
197
198
    def zipped_patch_len(self):
199
        return len(gzip_string(self.to_patch()))
200
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
201
    @classmethod
202
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
203
        """Create a MultiParent from its string form"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
204
        return cls._from_patch(BytesIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
205
0.9.18 by Aaron Bentley
Implement from_patch
206
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
207
    def _from_patch(lines):
208
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
209
        line_iter = iter(lines)
210
        hunks = []
211
        cur_line = None
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
212
        while True:
0.9.18 by Aaron Bentley
Implement from_patch
213
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
214
                cur_line = next(line_iter)
0.9.18 by Aaron Bentley
Implement from_patch
215
            except StopIteration:
216
                break
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
217
            first_char = cur_line[0:1]
218
            if first_char == b'i':
6973.7.9 by Jelmer Vernooij
Port multiparent.
219
                num_lines = int(cur_line.split(b' ')[1])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
220
                hunk_lines = [next(line_iter) for _ in range(num_lines)]
0.9.18 by Aaron Bentley
Implement from_patch
221
                hunk_lines[-1] = hunk_lines[-1][:-1]
222
                hunks.append(NewText(hunk_lines))
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
223
            elif first_char == b'\n':
6973.7.9 by Jelmer Vernooij
Port multiparent.
224
                hunks[-1].lines[-1] += b'\n'
0.9.18 by Aaron Bentley
Implement from_patch
225
            else:
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
226
                if not (first_char == b'c'):
227
                    raise AssertionError(first_char)
0.9.18 by Aaron Bentley
Implement from_patch
228
                parent, parent_pos, child_pos, num_lines =\
6973.7.9 by Jelmer Vernooij
Port multiparent.
229
                    [int(v) for v in cur_line.split(b' ')[1:]]
0.9.18 by Aaron Bentley
Implement from_patch
230
                hunks.append(ParentText(parent, parent_pos, child_pos,
231
                                        num_lines))
232
        return MultiParent(hunks)
233
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
234
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
235
        """Iterate through the hunks, with range indicated
236
237
        kind is "new" or "parent".
238
        for "new", data is a list of lines.
239
        for "parent", data is (parent, parent_start, parent_end)
240
        :return: a generator of (start, end, kind, data)
241
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
242
        start = 0
243
        for hunk in self.hunks:
244
            if isinstance(hunk, NewText):
245
                kind = 'new'
246
                end = start + len(hunk.lines)
247
                data = hunk.lines
248
            else:
249
                kind = 'parent'
250
                start = hunk.child_pos
251
                end = start + hunk.num_lines
252
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
253
                        hunk.num_lines)
254
            yield start, end, kind, data
255
            start = end
256
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
257
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
258
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
259
        extra_n = 0
260
        for hunk in reversed(self.hunks):
261
            if isinstance(hunk, ParentText):
7143.15.2 by Jelmer Vernooij
Run autopep8.
262
                return hunk.child_pos + hunk.num_lines + extra_n
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
263
            extra_n += len(hunk.lines)
264
        return extra_n
265
0.9.25 by Aaron Bentley
More messy hacking
266
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
267
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
268
        if len(self.hunks) != 1:
269
            return False
270
        return (isinstance(self.hunks[0], NewText))
271
0.9.1 by Aaron Bentley
Get trivial case passing
272
273
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
274
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
275
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
276
    __slots__ = ['lines']
277
0.9.1 by Aaron Bentley
Get trivial case passing
278
    def __init__(self, lines):
279
        self.lines = lines
280
281
    def __eq__(self, other):
282
        if self.__class__ is not other.__class__:
283
            return False
284
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
285
286
    def __repr__(self):
287
        return 'NewText(%r)' % self.lines
288
0.9.4 by Aaron Bentley
Start supporting serialization
289
    def to_patch(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
290
        yield b'i %d\n' % len(self.lines)
0.9.4 by Aaron Bentley
Start supporting serialization
291
        for line in self.lines:
292
            yield line
6973.7.9 by Jelmer Vernooij
Port multiparent.
293
        yield b'\n'
0.9.4 by Aaron Bentley
Start supporting serialization
294
0.9.2 by Aaron Bentley
Get single-parent comparison working
295
296
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
297
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
298
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
299
    __slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines']
300
0.9.2 by Aaron Bentley
Get single-parent comparison working
301
    def __init__(self, parent, parent_pos, child_pos, num_lines):
302
        self.parent = parent
303
        self.parent_pos = parent_pos
304
        self.child_pos = child_pos
305
        self.num_lines = num_lines
306
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
307
    def _as_dict(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
308
        return {b'parent': self.parent,
309
                b'parent_pos': self.parent_pos,
310
                b'child_pos': self.child_pos,
311
                b'num_lines': self.num_lines}
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
312
0.9.2 by Aaron Bentley
Get single-parent comparison working
313
    def __repr__(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
314
        return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'
315
                ' %(num_lines)r)' % self._as_dict())
0.9.2 by Aaron Bentley
Get single-parent comparison working
316
317
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
318
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
319
            return False
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
320
        return self._as_dict() == other._as_dict()
0.9.4 by Aaron Bentley
Start supporting serialization
321
322
    def to_patch(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
323
        yield (b'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
324
               % self._as_dict())
0.9.8 by Aaron Bentley
get add_version working
325
326
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
327
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
328
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
329
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
330
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
331
        self._lines = {}
332
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
333
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
334
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
335
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
336
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
337
    def versions(self):
338
        return iter(self._parents)
339
2520.4.61 by Aaron Bentley
Do bulk insertion of records
340
    def has_version(self, version):
341
        return version in self._parents
342
0.9.12 by Aaron Bentley
Make benchmarks for mp
343
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
344
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
345
        if self.snapshot_interval is None:
346
            return False
347
        if self.max_snapshots is not None and\
7143.15.2 by Jelmer Vernooij
Run autopep8.
348
                len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
349
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
350
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
351
            return True
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
352
        for ignored in range(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
353
            if len(parent_ids) == 0:
354
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
355
            version_ids = parent_ids
356
            parent_ids = []
357
            for version_id in version_ids:
358
                if version_id not in self._snapshots:
359
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
360
        else:
361
            return True
0.9.8 by Aaron Bentley
get add_version working
362
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
363
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
364
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
365
        """Add a version to the versionedfile
366
367
        :param lines: The list of lines to add.  Must be split on '\n'.
368
        :param version_id: The version_id of the version to add
369
        :param force_snapshot: If true, force this version to be added as a
370
            snapshot version.  If false, force this version to be added as a
371
            diff.  If none, determine this automatically.
372
        :param single_parent: If true, use a single parent, rather than
373
            multiple parents.
374
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
375
        if force_snapshot is None:
376
            do_snapshot = self.do_snapshot(version_id, parent_ids)
377
        else:
378
            do_snapshot = force_snapshot
379
        if do_snapshot:
380
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
381
            diff = MultiParent([NewText(lines)])
382
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
383
            if single_parent:
384
                parent_lines = self.get_line_list(parent_ids[:1])
385
            else:
386
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
387
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
388
            if diff.is_snapshot():
389
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
390
        self.add_diff(diff, version_id, parent_ids)
391
        self._lines[version_id] = lines
392
0.9.35 by Aaron Bentley
Add build ranking
393
    def get_parents(self, version_id):
394
        return self._parents[version_id]
395
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
396
    def make_snapshot(self, version_id):
397
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
398
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
399
        self._snapshots.add(version_id)
400
0.9.20 by Aaron Bentley
Convert to a plugin
401
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
402
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
403
        """Import all revisions of a versionedfile
404
405
        :param vf: The versionedfile to import
406
        :param snapshots: If provided, the revisions to make snapshots of.
407
            Otherwise, this will be auto-determined
408
        :param no_cache: If true, clear the cache after every add.
409
        :param single_parent: If true, omit all but one parent text, (but
410
            retain parent metadata).
411
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
412
        if not (no_cache or not verify):
413
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
414
        revisions = set(vf.versions())
415
        total = len(revisions)
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
416
        with ui.ui_factory.nested_progress_bar() as pb:
0.9.20 by Aaron Bentley
Convert to a plugin
417
            while len(revisions) > 0:
418
                added = set()
419
                for revision in revisions:
420
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
421
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
422
                        continue
6973.7.9 by Jelmer Vernooij
Port multiparent.
423
                    lines = [a + b' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
424
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
425
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
426
                        force_snapshot = None
427
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
428
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
429
                    self.add_version(lines, revision, parents, force_snapshot,
430
                                     single_parent)
431
                    added.add(revision)
432
                    if no_cache:
433
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
434
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
435
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
436
                            if not (lines == self.get_line_list([revision])[0]):
437
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
438
                            self.clear_cache()
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
439
                    pb.update(gettext('Importing revisions'),
0.9.20 by Aaron Bentley
Convert to a plugin
440
                              (total - len(revisions)) + len(added), total)
441
                revisions = [r for r in revisions if r not in added]
0.9.19 by Aaron Bentley
More tweakage
442
0.9.23 by Aaron Bentley
handle snapshots all at once
443
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
444
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
445
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
446
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
447
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
448
            potential_build_ancestors = set(vf.get_parents(version_id))
449
            parents = vf.get_parents(version_id)
450
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
451
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
452
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
453
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
454
                for parent in vf.get_parents(version_id):
455
                    potential_build_ancestors.update(build_ancestors[parent])
456
                if len(potential_build_ancestors) > self.snapshot_interval:
457
                    snapshots.add(version_id)
458
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
459
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
460
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
461
        return snapshots
462
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
463
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
464
        """Select snapshots for minimum output size"""
465
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
466
        new_snapshots = self.get_size_ranking()[-num:]
467
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
468
469
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
470
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
471
        versions = []
472
        for version_id in self.versions():
473
            if version_id in self._snapshots:
474
                continue
475
            diff_len = self.get_diff(version_id).patch_len()
476
            snapshot_len = MultiParent([NewText(
477
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
478
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
479
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
480
        return versions
481
482
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
483
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
484
        for version_id in vf.versions():
485
            self.add_diff(vf.get_diff(version_id), version_id,
486
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
487
0.9.35 by Aaron Bentley
Add build ranking
488
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
489
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
490
        could_avoid = {}
491
        referenced_by = {}
492
        for version_id in topo_iter(self):
493
            could_avoid[version_id] = set()
494
            if version_id not in self._snapshots:
495
                for parent_id in self._parents[version_id]:
496
                    could_avoid[version_id].update(could_avoid[parent_id])
497
                could_avoid[version_id].update(self._parents)
498
                could_avoid[version_id].discard(version_id)
499
            for avoid_id in could_avoid[version_id]:
500
                referenced_by.setdefault(avoid_id, set()).add(version_id)
501
        available_versions = list(self.versions())
502
        ranking = []
503
        while len(available_versions) > 0:
504
            available_versions.sort(key=lambda x:
7143.15.2 by Jelmer Vernooij
Run autopep8.
505
                                    len(could_avoid[x]) *
506
                                    len(referenced_by.get(x, [])))
0.9.35 by Aaron Bentley
Add build ranking
507
            selected = available_versions.pop()
508
            ranking.append(selected)
509
            for version_id in referenced_by[selected]:
510
                could_avoid[version_id].difference_update(
511
                    could_avoid[selected])
512
            for version_id in could_avoid[selected]:
513
                referenced_by[version_id].difference_update(
514
                    referenced_by[selected]
515
                )
516
        return ranking
517
0.9.8 by Aaron Bentley
get add_version working
518
    def clear_cache(self):
519
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
520
521
    def get_line_list(self, version_ids):
522
        return [self.cache_version(v) for v in version_ids]
523
524
    def cache_version(self, version_id):
525
        try:
526
            return self._lines[version_id]
527
        except KeyError:
528
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
529
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
530
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
531
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
532
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
533
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
534
        return lines
535
0.9.33 by Aaron Bentley
Enable caching commandline param
536
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
537
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
538
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
539
540
    def __init__(self, snapshot_interval=25, max_snapshots=None):
541
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
542
        self._diffs = {}
543
544
    def add_diff(self, diff, version_id, parent_ids):
545
        self._diffs[version_id] = diff
546
        self._parents[version_id] = parent_ids
547
548
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
549
        try:
550
            return self._diffs[version_id]
551
        except KeyError:
552
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
553
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
554
    def destroy(self):
555
        self._diffs = {}
556
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
557
558
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
559
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
560
561
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
562
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
563
        self._filename = filename
564
        self._diff_offset = {}
565
566
    def get_diff(self, version_id):
567
        start, count = self._diff_offset[version_id]
6977.2.3 by Jelmer Vernooij
Use context managers.
568
        with open(self._filename + '.mpknit', 'rb') as infile:
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
569
            infile.seek(start)
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
570
            sio = BytesIO(infile.read(count))
6977.2.3 by Jelmer Vernooij
Use context managers.
571
        with gzip.GzipFile(None, mode='rb', fileobj=sio) as zip_file:
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
572
            file_version_id = zip_file.readline()
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
573
            content = zip_file.read()
574
            return MultiParent.from_patch(content)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
575
576
    def add_diff(self, diff, version_id, parent_ids):
6977.2.3 by Jelmer Vernooij
Use context managers.
577
        with open(self._filename + '.mpknit', 'ab') as outfile:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
578
            outfile.seek(0, 2)      # workaround for windows bug:
7143.15.2 by Jelmer Vernooij
Run autopep8.
579
            # .tell() for files opened in 'ab' mode
580
            # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
581
            start = outfile.tell()
6977.2.3 by Jelmer Vernooij
Use context managers.
582
            with gzip.GzipFile(None, mode='ab', fileobj=outfile) as zipfile:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
583
                zipfile.writelines(itertools.chain(
6973.7.9 by Jelmer Vernooij
Port multiparent.
584
                    [b'version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
585
            end = outfile.tell()
7143.15.2 by Jelmer Vernooij
Run autopep8.
586
        self._diff_offset[version_id] = (start, end - start)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
587
        self._parents[version_id] = parent_ids
588
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
589
    def destroy(self):
590
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
591
            os.unlink(self._filename + '.mpknit')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
592
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
593
            if e.errno != errno.ENOENT:
594
                raise
595
        try:
596
            os.unlink(self._filename + '.mpidx')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
597
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
598
            if e.errno != errno.ENOENT:
599
                raise
600
601
    def save(self):
602
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
603
            (self._parents, list(self._snapshots), self._diff_offset)))
604
605
    def load(self):
606
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
607
            open(self._filename + '.mpidx', 'rb').read())
608
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
609
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
610
611
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
612
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
613
614
    def __init__(self, diffs, lines, parents):
615
        self.diffs = diffs
616
        self.lines = lines
617
        self.parents = parents
618
        self.cursor = {}
619
620
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
621
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
622
        parent_id = self.parents[version_id][parent_text.parent]
623
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
624
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
625
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
626
627
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
628
        """Append lines for the requested version_id range"""
629
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
630
        if req_start == req_end:
631
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
632
        pending_reqs = [(req_version_id, req_start, req_end)]
633
        while len(pending_reqs) > 0:
634
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
635
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
636
            if req_version_id in self.lines:
637
                lines.extend(self.lines[req_version_id][req_start:req_end])
638
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
639
            try:
640
                start, end, kind, data, iterator = self.cursor[req_version_id]
641
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
642
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
643
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
644
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
645
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
646
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
647
0.9.10 by Aaron Bentley
Text reconstruction seems to work
648
            # find the first hunk relevant to the request
649
            while end <= req_start:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
650
                start, end, kind, data = next(iterator)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
651
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
652
            # if the hunk can't satisfy the whole request, split it in two,
653
            # and leave the second half for later.
654
            if req_end > end:
655
                pending_reqs.append((req_version_id, end, req_end))
656
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
657
            if kind == 'new':
658
                lines.extend(data[req_start - start: (req_end - start)])
659
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
660
                # If the hunk is a ParentText, rewrite it as a range request
661
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
662
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
663
                new_version_id = self.parents[req_version_id][parent]
664
                new_start = parent_start + req_start - start
665
                new_end = parent_end + req_end - end
666
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
667
668
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
669
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
670
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
671
2520.4.6 by Aaron Bentley
Get installation started
672
0.9.25 by Aaron Bentley
More messy hacking
673
def gzip_string(lines):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
674
    sio = BytesIO()
6977.2.3 by Jelmer Vernooij
Use context managers.
675
    with gzip.GzipFile(None, mode='wb', fileobj=sio) as data_file:
676
        data_file.writelines(lines)
0.9.25 by Aaron Bentley
More messy hacking
677
    return sio.getvalue()