/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
1
# Copyright (C) 2007-2011 Canonical Ltd
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
17
import errno
7479.2.1 by Jelmer Vernooij
Drop python2 support.
18
from io import (
19
    BytesIO,
20
    )
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
21
import os
22
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
23
from .lazy_import import lazy_import
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
24
25
lazy_import(globals(), """
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
26
import gzip
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
27
import itertools
7290.14.1 by Jelmer Vernooij
Use external patiencediff.
28
import patiencediff
0.9.19 by Aaron Bentley
More tweakage
29
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
30
from breezy import (
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
31
    bencode,
0.9.25 by Aaron Bentley
More messy hacking
32
    ui,
33
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
34
""")
7413.8.11 by Jelmer Vernooij
Don't lazy-import errors.
35
from . import (
36
    errors,
37
    )
7143.15.5 by Jelmer Vernooij
More PEP8 fixes.
38
from .i18n import gettext
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
39
0.9.33 by Aaron Bentley
Enable caching commandline param
40
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
41
def topo_iter_keys(vf, keys=None):
42
    if keys is None:
43
        keys = vf.keys()
44
    parents = vf.get_parent_map(keys)
45
    return _topo_iter(parents, keys)
46
7143.15.2 by Jelmer Vernooij
Run autopep8.
47
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
48
def topo_iter(vf, versions=None):
49
    if versions is None:
50
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
51
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
52
    return _topo_iter(parents, versions)
53
7143.15.2 by Jelmer Vernooij
Run autopep8.
54
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
55
def _topo_iter(parents, versions):
56
    seen = set()
57
    descendants = {}
7143.15.2 by Jelmer Vernooij
Run autopep8.
58
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
59
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
60
        if parents[version] is None:
61
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
62
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
63
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
64
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
65
        if parents[version_id] is None:
66
            # parentless
67
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
68
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
69
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
70
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
71
    while len(cur) > 0:
72
        next = []
73
        for version_id in cur:
74
            if version_id in seen:
75
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
76
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
77
                continue
78
            next.extend(descendants.get(version_id, []))
79
            yield version_id
80
            seen.add(version_id)
81
        cur = next
82
83
0.9.1 by Aaron Bentley
Get trivial case passing
84
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
85
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
86
5374.2.9 by John Arbash Meinel
change slots back to a list.
87
    __slots__ = ['hunks']
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
88
0.9.2 by Aaron Bentley
Get single-parent comparison working
89
    def __init__(self, hunks=None):
90
        if hunks is not None:
91
            self.hunks = hunks
92
        else:
93
            self.hunks = []
94
95
    def __repr__(self):
96
        return "MultiParent(%r)" % self.hunks
97
98
    def __eq__(self, other):
99
        if self.__class__ is not other.__class__:
100
            return False
101
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
102
103
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
104
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
105
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
106
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
107
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
108
                                                           text)
109
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
110
        if len(parents) > 0:
111
            if left_blocks is None:
112
                left_blocks = compare(parents[0])
113
            parent_comparisons = [left_blocks] + [compare(p) for p in
114
                                                  parents[1:]]
115
        else:
116
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
117
        cur_line = 0
118
        new_text = NewText([])
119
        parent_text = []
120
        block_iter = [iter(i) for i in parent_comparisons]
121
        diff = MultiParent([])
7143.15.2 by Jelmer Vernooij
Run autopep8.
122
0.9.2 by Aaron Bentley
Get single-parent comparison working
123
        def next_block(p):
124
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
125
                return next(block_iter[p])
0.9.2 by Aaron Bentley
Get single-parent comparison working
126
            except StopIteration:
127
                return None
128
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
129
        while cur_line < len(text):
130
            best_match = None
131
            for p, block in enumerate(cur_block):
132
                if block is None:
133
                    continue
134
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
135
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
136
                    block = cur_block[p] = next_block(p)
137
                    if block is None:
138
                        break
139
                    i, j, n = block
140
                if block is None:
141
                    continue
142
                if j > cur_line:
143
                    continue
144
                offset = cur_line - j
145
                i += offset
146
                j = cur_line
147
                n -= offset
148
                if n == 0:
149
                    continue
150
                if best_match is None or n > best_match.num_lines:
151
                    best_match = ParentText(p, i, j, n)
152
            if best_match is None:
153
                new_text.lines.append(text[cur_line])
154
                cur_line += 1
155
            else:
156
                if len(new_text.lines) > 0:
157
                    diff.hunks.append(new_text)
158
                    new_text = NewText([])
159
                diff.hunks.append(best_match)
160
                cur_line += best_match.num_lines
161
        if len(new_text.lines) > 0:
162
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
163
        return diff
164
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
165
    def get_matching_blocks(self, parent, parent_len):
166
        for hunk in self.hunks:
167
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
168
                continue
169
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
170
        yield parent_len, self.num_lines(), 0
171
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
172
    def to_lines(self, parents=()):
173
        """Contruct a fulltext from this diff and its parents"""
174
        mpvf = MultiMemoryVersionedFile()
175
        for num, parent in enumerate(parents):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
176
            mpvf.add_version(BytesIO(parent).readlines(), num, [])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
177
        mpvf.add_diff(self, 'a', list(range(len(parents))))
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
178
        return mpvf.get_line_list(['a'])[0]
179
0.9.1 by Aaron Bentley
Get trivial case passing
180
    @classmethod
181
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
182
        """Produce a MultiParent from a text and list of parent text"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
183
        return cls.from_lines(BytesIO(text).readlines(),
184
                              [BytesIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
185
0.9.4 by Aaron Bentley
Start supporting serialization
186
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
187
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
188
        for hunk in self.hunks:
189
            for line in hunk.to_patch():
190
                yield line
191
0.9.25 by Aaron Bentley
More messy hacking
192
    def patch_len(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
193
        return len(b''.join(self.to_patch()))
0.9.25 by Aaron Bentley
More messy hacking
194
195
    def zipped_patch_len(self):
196
        return len(gzip_string(self.to_patch()))
197
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
198
    @classmethod
199
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
200
        """Create a MultiParent from its string form"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
201
        return cls._from_patch(BytesIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
202
0.9.18 by Aaron Bentley
Implement from_patch
203
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
204
    def _from_patch(lines):
205
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
206
        line_iter = iter(lines)
207
        hunks = []
208
        cur_line = None
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
209
        while True:
0.9.18 by Aaron Bentley
Implement from_patch
210
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
211
                cur_line = next(line_iter)
0.9.18 by Aaron Bentley
Implement from_patch
212
            except StopIteration:
213
                break
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
214
            first_char = cur_line[0:1]
215
            if first_char == b'i':
6973.7.9 by Jelmer Vernooij
Port multiparent.
216
                num_lines = int(cur_line.split(b' ')[1])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
217
                hunk_lines = [next(line_iter) for _ in range(num_lines)]
0.9.18 by Aaron Bentley
Implement from_patch
218
                hunk_lines[-1] = hunk_lines[-1][:-1]
219
                hunks.append(NewText(hunk_lines))
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
220
            elif first_char == b'\n':
6973.7.9 by Jelmer Vernooij
Port multiparent.
221
                hunks[-1].lines[-1] += b'\n'
0.9.18 by Aaron Bentley
Implement from_patch
222
            else:
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
223
                if not (first_char == b'c'):
224
                    raise AssertionError(first_char)
0.9.18 by Aaron Bentley
Implement from_patch
225
                parent, parent_pos, child_pos, num_lines =\
6973.7.9 by Jelmer Vernooij
Port multiparent.
226
                    [int(v) for v in cur_line.split(b' ')[1:]]
0.9.18 by Aaron Bentley
Implement from_patch
227
                hunks.append(ParentText(parent, parent_pos, child_pos,
228
                                        num_lines))
229
        return MultiParent(hunks)
230
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
231
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
232
        """Iterate through the hunks, with range indicated
233
234
        kind is "new" or "parent".
235
        for "new", data is a list of lines.
236
        for "parent", data is (parent, parent_start, parent_end)
237
        :return: a generator of (start, end, kind, data)
238
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
239
        start = 0
240
        for hunk in self.hunks:
241
            if isinstance(hunk, NewText):
242
                kind = 'new'
243
                end = start + len(hunk.lines)
244
                data = hunk.lines
245
            else:
246
                kind = 'parent'
247
                start = hunk.child_pos
248
                end = start + hunk.num_lines
249
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
250
                        hunk.num_lines)
251
            yield start, end, kind, data
252
            start = end
253
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
254
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
255
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
256
        extra_n = 0
257
        for hunk in reversed(self.hunks):
258
            if isinstance(hunk, ParentText):
7143.15.2 by Jelmer Vernooij
Run autopep8.
259
                return hunk.child_pos + hunk.num_lines + extra_n
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
260
            extra_n += len(hunk.lines)
261
        return extra_n
262
0.9.25 by Aaron Bentley
More messy hacking
263
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
264
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
265
        if len(self.hunks) != 1:
266
            return False
267
        return (isinstance(self.hunks[0], NewText))
268
0.9.1 by Aaron Bentley
Get trivial case passing
269
270
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
271
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
272
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
273
    __slots__ = ['lines']
274
0.9.1 by Aaron Bentley
Get trivial case passing
275
    def __init__(self, lines):
276
        self.lines = lines
277
278
    def __eq__(self, other):
279
        if self.__class__ is not other.__class__:
280
            return False
281
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
282
283
    def __repr__(self):
284
        return 'NewText(%r)' % self.lines
285
0.9.4 by Aaron Bentley
Start supporting serialization
286
    def to_patch(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
287
        yield b'i %d\n' % len(self.lines)
0.9.4 by Aaron Bentley
Start supporting serialization
288
        for line in self.lines:
289
            yield line
6973.7.9 by Jelmer Vernooij
Port multiparent.
290
        yield b'\n'
0.9.4 by Aaron Bentley
Start supporting serialization
291
0.9.2 by Aaron Bentley
Get single-parent comparison working
292
293
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
294
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
295
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
296
    __slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines']
297
0.9.2 by Aaron Bentley
Get single-parent comparison working
298
    def __init__(self, parent, parent_pos, child_pos, num_lines):
299
        self.parent = parent
300
        self.parent_pos = parent_pos
301
        self.child_pos = child_pos
302
        self.num_lines = num_lines
303
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
304
    def _as_dict(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
305
        return {b'parent': self.parent,
306
                b'parent_pos': self.parent_pos,
307
                b'child_pos': self.child_pos,
308
                b'num_lines': self.num_lines}
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
309
0.9.2 by Aaron Bentley
Get single-parent comparison working
310
    def __repr__(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
311
        return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'
312
                ' %(num_lines)r)' % self._as_dict())
0.9.2 by Aaron Bentley
Get single-parent comparison working
313
314
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
315
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
316
            return False
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
317
        return self._as_dict() == other._as_dict()
0.9.4 by Aaron Bentley
Start supporting serialization
318
319
    def to_patch(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
320
        yield (b'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
321
               % self._as_dict())
0.9.8 by Aaron Bentley
get add_version working
322
323
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
324
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
325
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
326
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
327
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
328
        self._lines = {}
329
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
330
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
331
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
332
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
333
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
334
    def versions(self):
335
        return iter(self._parents)
336
2520.4.61 by Aaron Bentley
Do bulk insertion of records
337
    def has_version(self, version):
338
        return version in self._parents
339
0.9.12 by Aaron Bentley
Make benchmarks for mp
340
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
341
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
342
        if self.snapshot_interval is None:
343
            return False
344
        if self.max_snapshots is not None and\
7143.15.2 by Jelmer Vernooij
Run autopep8.
345
                len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
346
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
347
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
348
            return True
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
349
        for ignored in range(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
350
            if len(parent_ids) == 0:
351
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
352
            version_ids = parent_ids
353
            parent_ids = []
354
            for version_id in version_ids:
355
                if version_id not in self._snapshots:
356
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
357
        else:
358
            return True
0.9.8 by Aaron Bentley
get add_version working
359
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
360
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
361
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
362
        """Add a version to the versionedfile
363
364
        :param lines: The list of lines to add.  Must be split on '\n'.
365
        :param version_id: The version_id of the version to add
366
        :param force_snapshot: If true, force this version to be added as a
367
            snapshot version.  If false, force this version to be added as a
368
            diff.  If none, determine this automatically.
369
        :param single_parent: If true, use a single parent, rather than
370
            multiple parents.
371
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
372
        if force_snapshot is None:
373
            do_snapshot = self.do_snapshot(version_id, parent_ids)
374
        else:
375
            do_snapshot = force_snapshot
376
        if do_snapshot:
377
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
378
            diff = MultiParent([NewText(lines)])
379
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
380
            if single_parent:
381
                parent_lines = self.get_line_list(parent_ids[:1])
382
            else:
383
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
384
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
385
            if diff.is_snapshot():
386
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
387
        self.add_diff(diff, version_id, parent_ids)
388
        self._lines[version_id] = lines
389
0.9.35 by Aaron Bentley
Add build ranking
390
    def get_parents(self, version_id):
391
        return self._parents[version_id]
392
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
393
    def make_snapshot(self, version_id):
394
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
395
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
396
        self._snapshots.add(version_id)
397
0.9.20 by Aaron Bentley
Convert to a plugin
398
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
399
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
400
        """Import all revisions of a versionedfile
401
402
        :param vf: The versionedfile to import
403
        :param snapshots: If provided, the revisions to make snapshots of.
404
            Otherwise, this will be auto-determined
405
        :param no_cache: If true, clear the cache after every add.
406
        :param single_parent: If true, omit all but one parent text, (but
407
            retain parent metadata).
408
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
409
        if not (no_cache or not verify):
410
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
411
        revisions = set(vf.versions())
412
        total = len(revisions)
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
413
        with ui.ui_factory.nested_progress_bar() as pb:
0.9.20 by Aaron Bentley
Convert to a plugin
414
            while len(revisions) > 0:
415
                added = set()
416
                for revision in revisions:
417
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
418
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
419
                        continue
6973.7.9 by Jelmer Vernooij
Port multiparent.
420
                    lines = [a + b' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
421
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
422
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
423
                        force_snapshot = None
424
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
425
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
426
                    self.add_version(lines, revision, parents, force_snapshot,
427
                                     single_parent)
428
                    added.add(revision)
429
                    if no_cache:
430
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
431
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
432
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
433
                            if not (lines == self.get_line_list([revision])[0]):
434
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
435
                            self.clear_cache()
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
436
                    pb.update(gettext('Importing revisions'),
0.9.20 by Aaron Bentley
Convert to a plugin
437
                              (total - len(revisions)) + len(added), total)
438
                revisions = [r for r in revisions if r not in added]
0.9.19 by Aaron Bentley
More tweakage
439
0.9.23 by Aaron Bentley
handle snapshots all at once
440
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
441
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
442
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
443
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
444
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
445
            potential_build_ancestors = set(vf.get_parents(version_id))
446
            parents = vf.get_parents(version_id)
447
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
448
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
449
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
450
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
451
                for parent in vf.get_parents(version_id):
452
                    potential_build_ancestors.update(build_ancestors[parent])
453
                if len(potential_build_ancestors) > self.snapshot_interval:
454
                    snapshots.add(version_id)
455
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
456
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
457
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
458
        return snapshots
459
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
460
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
461
        """Select snapshots for minimum output size"""
462
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
463
        new_snapshots = self.get_size_ranking()[-num:]
464
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
465
466
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
467
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
468
        versions = []
469
        for version_id in self.versions():
470
            if version_id in self._snapshots:
471
                continue
472
            diff_len = self.get_diff(version_id).patch_len()
473
            snapshot_len = MultiParent([NewText(
474
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
475
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
476
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
477
        return versions
478
479
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
480
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
481
        for version_id in vf.versions():
482
            self.add_diff(vf.get_diff(version_id), version_id,
483
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
484
0.9.35 by Aaron Bentley
Add build ranking
485
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
486
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
487
        could_avoid = {}
488
        referenced_by = {}
489
        for version_id in topo_iter(self):
490
            could_avoid[version_id] = set()
491
            if version_id not in self._snapshots:
492
                for parent_id in self._parents[version_id]:
493
                    could_avoid[version_id].update(could_avoid[parent_id])
494
                could_avoid[version_id].update(self._parents)
495
                could_avoid[version_id].discard(version_id)
496
            for avoid_id in could_avoid[version_id]:
497
                referenced_by.setdefault(avoid_id, set()).add(version_id)
498
        available_versions = list(self.versions())
499
        ranking = []
500
        while len(available_versions) > 0:
501
            available_versions.sort(key=lambda x:
7143.15.2 by Jelmer Vernooij
Run autopep8.
502
                                    len(could_avoid[x]) *
503
                                    len(referenced_by.get(x, [])))
0.9.35 by Aaron Bentley
Add build ranking
504
            selected = available_versions.pop()
505
            ranking.append(selected)
506
            for version_id in referenced_by[selected]:
507
                could_avoid[version_id].difference_update(
508
                    could_avoid[selected])
509
            for version_id in could_avoid[selected]:
510
                referenced_by[version_id].difference_update(
511
                    referenced_by[selected]
512
                )
513
        return ranking
514
0.9.8 by Aaron Bentley
get add_version working
515
    def clear_cache(self):
516
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
517
518
    def get_line_list(self, version_ids):
519
        return [self.cache_version(v) for v in version_ids]
520
521
    def cache_version(self, version_id):
522
        try:
523
            return self._lines[version_id]
524
        except KeyError:
525
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
526
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
527
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
528
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
529
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
530
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
531
        return lines
532
0.9.33 by Aaron Bentley
Enable caching commandline param
533
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
534
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
535
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
536
537
    def __init__(self, snapshot_interval=25, max_snapshots=None):
538
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
539
        self._diffs = {}
540
541
    def add_diff(self, diff, version_id, parent_ids):
542
        self._diffs[version_id] = diff
543
        self._parents[version_id] = parent_ids
544
545
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
546
        try:
547
            return self._diffs[version_id]
548
        except KeyError:
549
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
550
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
551
    def destroy(self):
552
        self._diffs = {}
553
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
554
555
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
556
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
557
558
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
559
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
560
        self._filename = filename
561
        self._diff_offset = {}
562
563
    def get_diff(self, version_id):
564
        start, count = self._diff_offset[version_id]
6977.2.3 by Jelmer Vernooij
Use context managers.
565
        with open(self._filename + '.mpknit', 'rb') as infile:
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
566
            infile.seek(start)
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
567
            sio = BytesIO(infile.read(count))
6977.2.3 by Jelmer Vernooij
Use context managers.
568
        with gzip.GzipFile(None, mode='rb', fileobj=sio) as zip_file:
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
569
            file_version_id = zip_file.readline()
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
570
            content = zip_file.read()
571
            return MultiParent.from_patch(content)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
572
573
    def add_diff(self, diff, version_id, parent_ids):
6977.2.3 by Jelmer Vernooij
Use context managers.
574
        with open(self._filename + '.mpknit', 'ab') as outfile:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
575
            outfile.seek(0, 2)      # workaround for windows bug:
7143.15.2 by Jelmer Vernooij
Run autopep8.
576
            # .tell() for files opened in 'ab' mode
577
            # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
578
            start = outfile.tell()
6977.2.3 by Jelmer Vernooij
Use context managers.
579
            with gzip.GzipFile(None, mode='ab', fileobj=outfile) as zipfile:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
580
                zipfile.writelines(itertools.chain(
6973.7.9 by Jelmer Vernooij
Port multiparent.
581
                    [b'version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
582
            end = outfile.tell()
7143.15.2 by Jelmer Vernooij
Run autopep8.
583
        self._diff_offset[version_id] = (start, end - start)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
584
        self._parents[version_id] = parent_ids
585
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
586
    def destroy(self):
587
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
588
            os.unlink(self._filename + '.mpknit')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
589
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
590
            if e.errno != errno.ENOENT:
591
                raise
592
        try:
593
            os.unlink(self._filename + '.mpidx')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
594
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
595
            if e.errno != errno.ENOENT:
596
                raise
597
598
    def save(self):
599
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
600
            (self._parents, list(self._snapshots), self._diff_offset)))
601
602
    def load(self):
603
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
604
            open(self._filename + '.mpidx', 'rb').read())
605
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
606
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
607
608
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
609
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
610
611
    def __init__(self, diffs, lines, parents):
612
        self.diffs = diffs
613
        self.lines = lines
614
        self.parents = parents
615
        self.cursor = {}
616
617
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
618
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
619
        parent_id = self.parents[version_id][parent_text.parent]
620
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
621
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
622
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
623
624
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
625
        """Append lines for the requested version_id range"""
626
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
627
        if req_start == req_end:
628
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
629
        pending_reqs = [(req_version_id, req_start, req_end)]
630
        while len(pending_reqs) > 0:
631
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
632
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
633
            if req_version_id in self.lines:
634
                lines.extend(self.lines[req_version_id][req_start:req_end])
635
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
636
            try:
637
                start, end, kind, data, iterator = self.cursor[req_version_id]
638
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
639
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
640
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
641
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
642
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
643
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
644
0.9.10 by Aaron Bentley
Text reconstruction seems to work
645
            # find the first hunk relevant to the request
646
            while end <= req_start:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
647
                start, end, kind, data = next(iterator)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
648
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
649
            # if the hunk can't satisfy the whole request, split it in two,
650
            # and leave the second half for later.
651
            if req_end > end:
652
                pending_reqs.append((req_version_id, end, req_end))
653
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
654
            if kind == 'new':
655
                lines.extend(data[req_start - start: (req_end - start)])
656
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
657
                # If the hunk is a ParentText, rewrite it as a range request
658
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
659
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
660
                new_version_id = self.parents[req_version_id][parent]
661
                new_start = parent_start + req_start - start
662
                new_end = parent_end + req_end - end
663
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
664
665
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
666
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
667
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
668
2520.4.6 by Aaron Bentley
Get installation started
669
0.9.25 by Aaron Bentley
More messy hacking
670
def gzip_string(lines):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
671
    sio = BytesIO()
6977.2.3 by Jelmer Vernooij
Use context managers.
672
    with gzip.GzipFile(None, mode='wb', fileobj=sio) as data_file:
673
        data_file.writelines(lines)
0.9.25 by Aaron Bentley
More messy hacking
674
    return sio.getvalue()