/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
1
# Copyright (C) 2007-2011 Canonical Ltd
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
18
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
19
import errno
20
import os
21
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
22
from .lazy_import import lazy_import
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
23
24
lazy_import(globals(), """
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
25
import gzip
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
26
import itertools
0.9.19 by Aaron Bentley
More tweakage
27
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
28
from breezy import (
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
29
    bencode,
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
30
    errors,
0.9.25 by Aaron Bentley
More messy hacking
31
    patiencediff,
32
    ui,
33
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
34
""")
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
35
from .sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
36
    BytesIO,
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
37
    range,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
38
    )
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
39
0.9.33 by Aaron Bentley
Enable caching commandline param
40
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
41
def topo_iter_keys(vf, keys=None):
42
    if keys is None:
43
        keys = vf.keys()
44
    parents = vf.get_parent_map(keys)
45
    return _topo_iter(parents, keys)
46
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
47
def topo_iter(vf, versions=None):
48
    if versions is None:
49
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
50
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
51
    return _topo_iter(parents, versions)
52
53
def _topo_iter(parents, versions):
54
    seen = set()
55
    descendants = {}
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
56
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
57
        if parents[version] is None:
58
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
59
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
60
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
61
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
62
        if parents[version_id] is None:
63
            # parentless
64
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
65
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
66
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
67
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
68
    while len(cur) > 0:
69
        next = []
70
        for version_id in cur:
71
            if version_id in seen:
72
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
73
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
74
                continue
75
            next.extend(descendants.get(version_id, []))
76
            yield version_id
77
            seen.add(version_id)
78
        cur = next
79
80
0.9.1 by Aaron Bentley
Get trivial case passing
81
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
82
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
83
5374.2.9 by John Arbash Meinel
change slots back to a list.
84
    __slots__ = ['hunks']
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
85
0.9.2 by Aaron Bentley
Get single-parent comparison working
86
    def __init__(self, hunks=None):
87
        if hunks is not None:
88
            self.hunks = hunks
89
        else:
90
            self.hunks = []
91
92
    def __repr__(self):
93
        return "MultiParent(%r)" % self.hunks
94
95
    def __eq__(self, other):
96
        if self.__class__ is not other.__class__:
97
            return False
98
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
99
100
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
101
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
102
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
103
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
104
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
105
                                                           text)
106
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
107
        if len(parents) > 0:
108
            if left_blocks is None:
109
                left_blocks = compare(parents[0])
110
            parent_comparisons = [left_blocks] + [compare(p) for p in
111
                                                  parents[1:]]
112
        else:
113
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
114
        cur_line = 0
115
        new_text = NewText([])
116
        parent_text = []
117
        block_iter = [iter(i) for i in parent_comparisons]
118
        diff = MultiParent([])
119
        def next_block(p):
120
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
121
                return next(block_iter[p])
0.9.2 by Aaron Bentley
Get single-parent comparison working
122
            except StopIteration:
123
                return None
124
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
125
        while cur_line < len(text):
126
            best_match = None
127
            for p, block in enumerate(cur_block):
128
                if block is None:
129
                    continue
130
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
131
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
132
                    block = cur_block[p] = next_block(p)
133
                    if block is None:
134
                        break
135
                    i, j, n = block
136
                if block is None:
137
                    continue
138
                if j > cur_line:
139
                    continue
140
                offset = cur_line - j
141
                i += offset
142
                j = cur_line
143
                n -= offset
144
                if n == 0:
145
                    continue
146
                if best_match is None or n > best_match.num_lines:
147
                    best_match = ParentText(p, i, j, n)
148
            if best_match is None:
149
                new_text.lines.append(text[cur_line])
150
                cur_line += 1
151
            else:
152
                if len(new_text.lines) > 0:
153
                    diff.hunks.append(new_text)
154
                    new_text = NewText([])
155
                diff.hunks.append(best_match)
156
                cur_line += best_match.num_lines
157
        if len(new_text.lines) > 0:
158
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
159
        return diff
160
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
161
    def get_matching_blocks(self, parent, parent_len):
162
        for hunk in self.hunks:
163
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
164
                continue
165
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
166
        yield parent_len, self.num_lines(), 0
167
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
168
    def to_lines(self, parents=()):
169
        """Contruct a fulltext from this diff and its parents"""
170
        mpvf = MultiMemoryVersionedFile()
171
        for num, parent in enumerate(parents):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
172
            mpvf.add_version(BytesIO(parent).readlines(), num, [])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
173
        mpvf.add_diff(self, 'a', list(range(len(parents))))
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
174
        return mpvf.get_line_list(['a'])[0]
175
0.9.1 by Aaron Bentley
Get trivial case passing
176
    @classmethod
177
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
178
        """Produce a MultiParent from a text and list of parent text"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
179
        return cls.from_lines(BytesIO(text).readlines(),
180
                              [BytesIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
181
0.9.4 by Aaron Bentley
Start supporting serialization
182
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
183
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
184
        for hunk in self.hunks:
185
            for line in hunk.to_patch():
186
                yield line
187
0.9.25 by Aaron Bentley
More messy hacking
188
    def patch_len(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
189
        return len(b''.join(self.to_patch()))
0.9.25 by Aaron Bentley
More messy hacking
190
191
    def zipped_patch_len(self):
192
        return len(gzip_string(self.to_patch()))
193
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
194
    @classmethod
195
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
196
        """Create a MultiParent from its string form"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
197
        return cls._from_patch(BytesIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
198
0.9.18 by Aaron Bentley
Implement from_patch
199
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
200
    def _from_patch(lines):
201
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
202
        line_iter = iter(lines)
203
        hunks = []
204
        cur_line = None
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
205
        while True:
0.9.18 by Aaron Bentley
Implement from_patch
206
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
207
                cur_line = next(line_iter)
0.9.18 by Aaron Bentley
Implement from_patch
208
            except StopIteration:
209
                break
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
210
            first_char = cur_line[0:1]
211
            if first_char == b'i':
6973.7.9 by Jelmer Vernooij
Port multiparent.
212
                num_lines = int(cur_line.split(b' ')[1])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
213
                hunk_lines = [next(line_iter) for _ in range(num_lines)]
0.9.18 by Aaron Bentley
Implement from_patch
214
                hunk_lines[-1] = hunk_lines[-1][:-1]
215
                hunks.append(NewText(hunk_lines))
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
216
            elif first_char == b'\n':
6973.7.9 by Jelmer Vernooij
Port multiparent.
217
                hunks[-1].lines[-1] += b'\n'
0.9.18 by Aaron Bentley
Implement from_patch
218
            else:
6973.8.1 by Jelmer Vernooij
Appease picky reviewer.
219
                if not (first_char == b'c'):
220
                    raise AssertionError(first_char)
0.9.18 by Aaron Bentley
Implement from_patch
221
                parent, parent_pos, child_pos, num_lines =\
6973.7.9 by Jelmer Vernooij
Port multiparent.
222
                    [int(v) for v in cur_line.split(b' ')[1:]]
0.9.18 by Aaron Bentley
Implement from_patch
223
                hunks.append(ParentText(parent, parent_pos, child_pos,
224
                                        num_lines))
225
        return MultiParent(hunks)
226
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
227
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
228
        """Iterate through the hunks, with range indicated
229
230
        kind is "new" or "parent".
231
        for "new", data is a list of lines.
232
        for "parent", data is (parent, parent_start, parent_end)
233
        :return: a generator of (start, end, kind, data)
234
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
235
        start = 0
236
        for hunk in self.hunks:
237
            if isinstance(hunk, NewText):
238
                kind = 'new'
239
                end = start + len(hunk.lines)
240
                data = hunk.lines
241
            else:
242
                kind = 'parent'
243
                start = hunk.child_pos
244
                end = start + hunk.num_lines
245
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
246
                        hunk.num_lines)
247
            yield start, end, kind, data
248
            start = end
249
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
250
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
251
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
252
        extra_n = 0
253
        for hunk in reversed(self.hunks):
254
            if isinstance(hunk, ParentText):
255
               return hunk.child_pos + hunk.num_lines + extra_n
256
            extra_n += len(hunk.lines)
257
        return extra_n
258
0.9.25 by Aaron Bentley
More messy hacking
259
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
260
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
261
        if len(self.hunks) != 1:
262
            return False
263
        return (isinstance(self.hunks[0], NewText))
264
0.9.1 by Aaron Bentley
Get trivial case passing
265
266
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
267
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
268
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
269
    __slots__ = ['lines']
270
0.9.1 by Aaron Bentley
Get trivial case passing
271
    def __init__(self, lines):
272
        self.lines = lines
273
274
    def __eq__(self, other):
275
        if self.__class__ is not other.__class__:
276
            return False
277
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
278
279
    def __repr__(self):
280
        return 'NewText(%r)' % self.lines
281
0.9.4 by Aaron Bentley
Start supporting serialization
282
    def to_patch(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
283
        yield b'i %d\n' % len(self.lines)
0.9.4 by Aaron Bentley
Start supporting serialization
284
        for line in self.lines:
285
            yield line
6973.7.9 by Jelmer Vernooij
Port multiparent.
286
        yield b'\n'
0.9.4 by Aaron Bentley
Start supporting serialization
287
0.9.2 by Aaron Bentley
Get single-parent comparison working
288
289
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
290
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
291
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
292
    __slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines']
293
0.9.2 by Aaron Bentley
Get single-parent comparison working
294
    def __init__(self, parent, parent_pos, child_pos, num_lines):
295
        self.parent = parent
296
        self.parent_pos = parent_pos
297
        self.child_pos = child_pos
298
        self.num_lines = num_lines
299
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
300
    def _as_dict(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
301
        return {b'parent': self.parent,
302
                b'parent_pos': self.parent_pos,
303
                b'child_pos': self.child_pos,
304
                b'num_lines': self.num_lines}
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
305
0.9.2 by Aaron Bentley
Get single-parent comparison working
306
    def __repr__(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
307
        return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'
308
                ' %(num_lines)r)' % self._as_dict())
0.9.2 by Aaron Bentley
Get single-parent comparison working
309
310
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
311
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
312
            return False
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
313
        return self._as_dict() == other._as_dict()
0.9.4 by Aaron Bentley
Start supporting serialization
314
315
    def to_patch(self):
6973.7.9 by Jelmer Vernooij
Port multiparent.
316
        yield (b'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
317
               % self._as_dict())
0.9.8 by Aaron Bentley
get add_version working
318
319
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
320
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
321
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
322
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
323
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
324
        self._lines = {}
325
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
326
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
327
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
328
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
329
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
330
    def versions(self):
331
        return iter(self._parents)
332
2520.4.61 by Aaron Bentley
Do bulk insertion of records
333
    def has_version(self, version):
334
        return version in self._parents
335
0.9.12 by Aaron Bentley
Make benchmarks for mp
336
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
337
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
338
        if self.snapshot_interval is None:
339
            return False
340
        if self.max_snapshots is not None and\
341
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
342
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
343
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
344
            return True
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
345
        for ignored in range(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
346
            if len(parent_ids) == 0:
347
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
348
            version_ids = parent_ids
349
            parent_ids = []
350
            for version_id in version_ids:
351
                if version_id not in self._snapshots:
352
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
353
        else:
354
            return True
0.9.8 by Aaron Bentley
get add_version working
355
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
356
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
357
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
358
        """Add a version to the versionedfile
359
360
        :param lines: The list of lines to add.  Must be split on '\n'.
361
        :param version_id: The version_id of the version to add
362
        :param force_snapshot: If true, force this version to be added as a
363
            snapshot version.  If false, force this version to be added as a
364
            diff.  If none, determine this automatically.
365
        :param single_parent: If true, use a single parent, rather than
366
            multiple parents.
367
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
368
        if force_snapshot is None:
369
            do_snapshot = self.do_snapshot(version_id, parent_ids)
370
        else:
371
            do_snapshot = force_snapshot
372
        if do_snapshot:
373
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
374
            diff = MultiParent([NewText(lines)])
375
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
376
            if single_parent:
377
                parent_lines = self.get_line_list(parent_ids[:1])
378
            else:
379
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
380
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
381
            if diff.is_snapshot():
382
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
383
        self.add_diff(diff, version_id, parent_ids)
384
        self._lines[version_id] = lines
385
0.9.35 by Aaron Bentley
Add build ranking
386
    def get_parents(self, version_id):
387
        return self._parents[version_id]
388
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
389
    def make_snapshot(self, version_id):
390
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
391
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
392
        self._snapshots.add(version_id)
393
0.9.20 by Aaron Bentley
Convert to a plugin
394
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
395
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
396
        """Import all revisions of a versionedfile
397
398
        :param vf: The versionedfile to import
399
        :param snapshots: If provided, the revisions to make snapshots of.
400
            Otherwise, this will be auto-determined
401
        :param no_cache: If true, clear the cache after every add.
402
        :param single_parent: If true, omit all but one parent text, (but
403
            retain parent metadata).
404
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
405
        if not (no_cache or not verify):
406
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
407
        revisions = set(vf.versions())
408
        total = len(revisions)
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
409
        with ui.ui_factory.nested_progress_bar() as pb:
0.9.20 by Aaron Bentley
Convert to a plugin
410
            while len(revisions) > 0:
411
                added = set()
412
                for revision in revisions:
413
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
414
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
415
                        continue
6973.7.9 by Jelmer Vernooij
Port multiparent.
416
                    lines = [a + b' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
417
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
418
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
419
                        force_snapshot = None
420
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
421
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
422
                    self.add_version(lines, revision, parents, force_snapshot,
423
                                     single_parent)
424
                    added.add(revision)
425
                    if no_cache:
426
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
427
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
428
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
429
                            if not (lines == self.get_line_list([revision])[0]):
430
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
431
                            self.clear_cache()
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
432
                    pb.update(gettext('Importing revisions'),
0.9.20 by Aaron Bentley
Convert to a plugin
433
                              (total - len(revisions)) + len(added), total)
434
                revisions = [r for r in revisions if r not in added]
0.9.19 by Aaron Bentley
More tweakage
435
0.9.23 by Aaron Bentley
handle snapshots all at once
436
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
437
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
438
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
439
        descendants = {}
440
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
441
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
442
            potential_build_ancestors = set(vf.get_parents(version_id))
443
            parents = vf.get_parents(version_id)
444
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
445
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
446
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
447
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
448
                for parent in vf.get_parents(version_id):
449
                    potential_build_ancestors.update(build_ancestors[parent])
450
                if len(potential_build_ancestors) > self.snapshot_interval:
451
                    snapshots.add(version_id)
452
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
453
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
454
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
455
        return snapshots
456
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
457
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
458
        """Select snapshots for minimum output size"""
459
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
460
        new_snapshots = self.get_size_ranking()[-num:]
461
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
462
463
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
464
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
465
        versions = []
466
        new_snapshots = set()
467
        for version_id in self.versions():
468
            if version_id in self._snapshots:
469
                continue
470
            diff_len = self.get_diff(version_id).patch_len()
471
            snapshot_len = MultiParent([NewText(
472
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
473
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
474
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
475
        return versions
476
477
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
478
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
479
        for version_id in vf.versions():
480
            self.add_diff(vf.get_diff(version_id), version_id,
481
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
482
0.9.35 by Aaron Bentley
Add build ranking
483
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
484
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
485
        could_avoid = {}
486
        referenced_by = {}
487
        for version_id in topo_iter(self):
488
            could_avoid[version_id] = set()
489
            if version_id not in self._snapshots:
490
                for parent_id in self._parents[version_id]:
491
                    could_avoid[version_id].update(could_avoid[parent_id])
492
                could_avoid[version_id].update(self._parents)
493
                could_avoid[version_id].discard(version_id)
494
            for avoid_id in could_avoid[version_id]:
495
                referenced_by.setdefault(avoid_id, set()).add(version_id)
496
        available_versions = list(self.versions())
497
        ranking = []
498
        while len(available_versions) > 0:
499
            available_versions.sort(key=lambda x:
500
                len(could_avoid[x]) *
501
                len(referenced_by.get(x, [])))
502
            selected = available_versions.pop()
503
            ranking.append(selected)
504
            for version_id in referenced_by[selected]:
505
                could_avoid[version_id].difference_update(
506
                    could_avoid[selected])
507
            for version_id in could_avoid[selected]:
508
                referenced_by[version_id].difference_update(
509
                    referenced_by[selected]
510
                )
511
        return ranking
512
0.9.8 by Aaron Bentley
get add_version working
513
    def clear_cache(self):
514
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
515
516
    def get_line_list(self, version_ids):
517
        return [self.cache_version(v) for v in version_ids]
518
519
    def cache_version(self, version_id):
520
        try:
521
            return self._lines[version_id]
522
        except KeyError:
523
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
524
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
525
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
526
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
527
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
528
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
529
        return lines
530
0.9.33 by Aaron Bentley
Enable caching commandline param
531
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
532
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
533
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
534
535
    def __init__(self, snapshot_interval=25, max_snapshots=None):
536
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
537
        self._diffs = {}
538
539
    def add_diff(self, diff, version_id, parent_ids):
540
        self._diffs[version_id] = diff
541
        self._parents[version_id] = parent_ids
542
543
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
544
        try:
545
            return self._diffs[version_id]
546
        except KeyError:
547
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
548
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
549
    def destroy(self):
550
        self._diffs = {}
551
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
552
553
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
554
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
555
556
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
557
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
558
        self._filename = filename
559
        self._diff_offset = {}
560
561
    def get_diff(self, version_id):
562
        start, count = self._diff_offset[version_id]
6977.2.3 by Jelmer Vernooij
Use context managers.
563
        with open(self._filename + '.mpknit', 'rb') as infile:
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
564
            infile.seek(start)
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
565
            sio = BytesIO(infile.read(count))
6977.2.3 by Jelmer Vernooij
Use context managers.
566
        with gzip.GzipFile(None, mode='rb', fileobj=sio) as zip_file:
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
567
            file_version_id = zip_file.readline()
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
568
            content = zip_file.read()
569
            return MultiParent.from_patch(content)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
570
571
    def add_diff(self, diff, version_id, parent_ids):
6977.2.3 by Jelmer Vernooij
Use context managers.
572
        with open(self._filename + '.mpknit', 'ab') as outfile:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
573
            outfile.seek(0, 2)      # workaround for windows bug:
574
                                    # .tell() for files opened in 'ab' mode
575
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
576
            start = outfile.tell()
6977.2.3 by Jelmer Vernooij
Use context managers.
577
            with gzip.GzipFile(None, mode='ab', fileobj=outfile) as zipfile:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
578
                zipfile.writelines(itertools.chain(
6973.7.9 by Jelmer Vernooij
Port multiparent.
579
                    [b'version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
580
            end = outfile.tell()
581
        self._diff_offset[version_id] = (start, end-start)
582
        self._parents[version_id] = parent_ids
583
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
584
    def destroy(self):
585
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
586
            os.unlink(self._filename + '.mpknit')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
587
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
588
            if e.errno != errno.ENOENT:
589
                raise
590
        try:
591
            os.unlink(self._filename + '.mpidx')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
592
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
593
            if e.errno != errno.ENOENT:
594
                raise
595
596
    def save(self):
597
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
598
            (self._parents, list(self._snapshots), self._diff_offset)))
599
600
    def load(self):
601
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
602
            open(self._filename + '.mpidx', 'rb').read())
603
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
604
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
605
606
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
607
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
608
609
    def __init__(self, diffs, lines, parents):
610
        self.diffs = diffs
611
        self.lines = lines
612
        self.parents = parents
613
        self.cursor = {}
614
615
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
616
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
617
        parent_id = self.parents[version_id][parent_text.parent]
618
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
619
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
620
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
621
622
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
623
        """Append lines for the requested version_id range"""
624
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
625
        if req_start == req_end:
626
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
627
        pending_reqs = [(req_version_id, req_start, req_end)]
628
        while len(pending_reqs) > 0:
629
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
630
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
631
            if req_version_id in self.lines:
632
                lines.extend(self.lines[req_version_id][req_start:req_end])
633
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
634
            try:
635
                start, end, kind, data, iterator = self.cursor[req_version_id]
636
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
637
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
638
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
639
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
640
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
641
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
642
0.9.10 by Aaron Bentley
Text reconstruction seems to work
643
            # find the first hunk relevant to the request
644
            while end <= req_start:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
645
                start, end, kind, data = next(iterator)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
646
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
647
            # if the hunk can't satisfy the whole request, split it in two,
648
            # and leave the second half for later.
649
            if req_end > end:
650
                pending_reqs.append((req_version_id, end, req_end))
651
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
652
            if kind == 'new':
653
                lines.extend(data[req_start - start: (req_end - start)])
654
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
655
                # If the hunk is a ParentText, rewrite it as a range request
656
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
657
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
658
                new_version_id = self.parents[req_version_id][parent]
659
                new_start = parent_start + req_start - start
660
                new_end = parent_end + req_end - end
661
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
662
663
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
664
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
665
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
666
2520.4.6 by Aaron Bentley
Get installation started
667
0.9.25 by Aaron Bentley
More messy hacking
668
def gzip_string(lines):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
669
    sio = BytesIO()
6977.2.3 by Jelmer Vernooij
Use context managers.
670
    with gzip.GzipFile(None, mode='wb', fileobj=sio) as data_file:
671
        data_file.writelines(lines)
0.9.25 by Aaron Bentley
More messy hacking
672
    return sio.getvalue()