/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
1
# Copyright (C) 2007-2011 Canonical Ltd
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
18
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
19
import errno
20
import os
21
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
22
from .lazy_import import lazy_import
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
23
24
lazy_import(globals(), """
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
25
import gzip
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
26
import itertools
0.9.19 by Aaron Bentley
More tweakage
27
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
28
from breezy import (
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
29
    bencode,
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
30
    errors,
0.9.25 by Aaron Bentley
More messy hacking
31
    patiencediff,
32
    ui,
33
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
34
""")
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
35
from .sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
36
    BytesIO,
37
    )
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
38
0.9.33 by Aaron Bentley
Enable caching commandline param
39
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
40
def topo_iter_keys(vf, keys=None):
41
    if keys is None:
42
        keys = vf.keys()
43
    parents = vf.get_parent_map(keys)
44
    return _topo_iter(parents, keys)
45
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
46
def topo_iter(vf, versions=None):
47
    if versions is None:
48
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
49
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
50
    return _topo_iter(parents, versions)
51
52
def _topo_iter(parents, versions):
53
    seen = set()
54
    descendants = {}
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
55
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
56
        if parents[version] is None:
57
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
58
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
59
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
60
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
61
        if parents[version_id] is None:
62
            # parentless
63
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
64
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
65
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
66
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
67
    while len(cur) > 0:
68
        next = []
69
        for version_id in cur:
70
            if version_id in seen:
71
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
72
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
73
                continue
74
            next.extend(descendants.get(version_id, []))
75
            yield version_id
76
            seen.add(version_id)
77
        cur = next
78
79
0.9.1 by Aaron Bentley
Get trivial case passing
80
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
81
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
82
5374.2.9 by John Arbash Meinel
change slots back to a list.
83
    __slots__ = ['hunks']
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
84
0.9.2 by Aaron Bentley
Get single-parent comparison working
85
    def __init__(self, hunks=None):
86
        if hunks is not None:
87
            self.hunks = hunks
88
        else:
89
            self.hunks = []
90
91
    def __repr__(self):
92
        return "MultiParent(%r)" % self.hunks
93
94
    def __eq__(self, other):
95
        if self.__class__ is not other.__class__:
96
            return False
97
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
98
99
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
100
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
101
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
102
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
103
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
104
                                                           text)
105
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
106
        if len(parents) > 0:
107
            if left_blocks is None:
108
                left_blocks = compare(parents[0])
109
            parent_comparisons = [left_blocks] + [compare(p) for p in
110
                                                  parents[1:]]
111
        else:
112
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
113
        cur_line = 0
114
        new_text = NewText([])
115
        parent_text = []
116
        block_iter = [iter(i) for i in parent_comparisons]
117
        diff = MultiParent([])
118
        def next_block(p):
119
            try:
120
                return block_iter[p].next()
121
            except StopIteration:
122
                return None
123
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
124
        while cur_line < len(text):
125
            best_match = None
126
            for p, block in enumerate(cur_block):
127
                if block is None:
128
                    continue
129
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
130
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
131
                    block = cur_block[p] = next_block(p)
132
                    if block is None:
133
                        break
134
                    i, j, n = block
135
                if block is None:
136
                    continue
137
                if j > cur_line:
138
                    continue
139
                offset = cur_line - j
140
                i += offset
141
                j = cur_line
142
                n -= offset
143
                if n == 0:
144
                    continue
145
                if best_match is None or n > best_match.num_lines:
146
                    best_match = ParentText(p, i, j, n)
147
            if best_match is None:
148
                new_text.lines.append(text[cur_line])
149
                cur_line += 1
150
            else:
151
                if len(new_text.lines) > 0:
152
                    diff.hunks.append(new_text)
153
                    new_text = NewText([])
154
                diff.hunks.append(best_match)
155
                cur_line += best_match.num_lines
156
        if len(new_text.lines) > 0:
157
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
158
        return diff
159
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
160
    def get_matching_blocks(self, parent, parent_len):
161
        for hunk in self.hunks:
162
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
163
                continue
164
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
165
        yield parent_len, self.num_lines(), 0
166
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
167
    def to_lines(self, parents=()):
168
        """Contruct a fulltext from this diff and its parents"""
169
        mpvf = MultiMemoryVersionedFile()
170
        for num, parent in enumerate(parents):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
171
            mpvf.add_version(BytesIO(parent).readlines(), num, [])
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
172
        mpvf.add_diff(self, 'a', range(len(parents)))
173
        return mpvf.get_line_list(['a'])[0]
174
0.9.1 by Aaron Bentley
Get trivial case passing
175
    @classmethod
176
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
177
        """Produce a MultiParent from a text and list of parent text"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
178
        return cls.from_lines(BytesIO(text).readlines(),
179
                              [BytesIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
180
0.9.4 by Aaron Bentley
Start supporting serialization
181
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
182
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
183
        for hunk in self.hunks:
184
            for line in hunk.to_patch():
185
                yield line
186
0.9.25 by Aaron Bentley
More messy hacking
187
    def patch_len(self):
188
        return len(''.join(self.to_patch()))
189
190
    def zipped_patch_len(self):
191
        return len(gzip_string(self.to_patch()))
192
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
193
    @classmethod
194
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
195
        """Create a MultiParent from its string form"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
196
        return cls._from_patch(BytesIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
197
0.9.18 by Aaron Bentley
Implement from_patch
198
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
199
    def _from_patch(lines):
200
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
201
        line_iter = iter(lines)
202
        hunks = []
203
        cur_line = None
204
        while(True):
205
            try:
206
                cur_line = line_iter.next()
207
            except StopIteration:
208
                break
209
            if cur_line[0] == 'i':
210
                num_lines = int(cur_line.split(' ')[1])
211
                hunk_lines = [line_iter.next() for x in xrange(num_lines)]
212
                hunk_lines[-1] = hunk_lines[-1][:-1]
213
                hunks.append(NewText(hunk_lines))
214
            elif cur_line[0] == '\n':
215
                hunks[-1].lines[-1] += '\n'
216
            else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
217
                if not (cur_line[0] == 'c'):
218
                    raise AssertionError(cur_line[0])
0.9.18 by Aaron Bentley
Implement from_patch
219
                parent, parent_pos, child_pos, num_lines =\
220
                    [int(v) for v in cur_line.split(' ')[1:]]
221
                hunks.append(ParentText(parent, parent_pos, child_pos,
222
                                        num_lines))
223
        return MultiParent(hunks)
224
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
225
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
226
        """Iterate through the hunks, with range indicated
227
228
        kind is "new" or "parent".
229
        for "new", data is a list of lines.
230
        for "parent", data is (parent, parent_start, parent_end)
231
        :return: a generator of (start, end, kind, data)
232
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
233
        start = 0
234
        for hunk in self.hunks:
235
            if isinstance(hunk, NewText):
236
                kind = 'new'
237
                end = start + len(hunk.lines)
238
                data = hunk.lines
239
            else:
240
                kind = 'parent'
241
                start = hunk.child_pos
242
                end = start + hunk.num_lines
243
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
244
                        hunk.num_lines)
245
            yield start, end, kind, data
246
            start = end
247
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
248
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
249
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
250
        extra_n = 0
251
        for hunk in reversed(self.hunks):
252
            if isinstance(hunk, ParentText):
253
               return hunk.child_pos + hunk.num_lines + extra_n
254
            extra_n += len(hunk.lines)
255
        return extra_n
256
0.9.25 by Aaron Bentley
More messy hacking
257
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
258
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
259
        if len(self.hunks) != 1:
260
            return False
261
        return (isinstance(self.hunks[0], NewText))
262
0.9.1 by Aaron Bentley
Get trivial case passing
263
264
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
265
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
266
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
267
    __slots__ = ['lines']
268
0.9.1 by Aaron Bentley
Get trivial case passing
269
    def __init__(self, lines):
270
        self.lines = lines
271
272
    def __eq__(self, other):
273
        if self.__class__ is not other.__class__:
274
            return False
275
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
276
277
    def __repr__(self):
278
        return 'NewText(%r)' % self.lines
279
0.9.4 by Aaron Bentley
Start supporting serialization
280
    def to_patch(self):
281
        yield 'i %d\n' % len(self.lines)
282
        for line in self.lines:
283
            yield line
284
        yield '\n'
285
0.9.2 by Aaron Bentley
Get single-parent comparison working
286
287
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
288
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
289
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
290
    __slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines']
291
0.9.2 by Aaron Bentley
Get single-parent comparison working
292
    def __init__(self, parent, parent_pos, child_pos, num_lines):
293
        self.parent = parent
294
        self.parent_pos = parent_pos
295
        self.child_pos = child_pos
296
        self.num_lines = num_lines
297
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
298
    def _as_dict(self):
299
        return dict(parent=self.parent, parent_pos=self.parent_pos,
300
                    child_pos=self.child_pos, num_lines=self.num_lines)
301
0.9.2 by Aaron Bentley
Get single-parent comparison working
302
    def __repr__(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
303
        return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'
304
                ' %(num_lines)r)' % self._as_dict())
0.9.2 by Aaron Bentley
Get single-parent comparison working
305
306
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
307
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
308
            return False
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
309
        return self._as_dict() == other._as_dict()
0.9.4 by Aaron Bentley
Start supporting serialization
310
311
    def to_patch(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
312
        yield ('c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'
313
               % self._as_dict())
0.9.8 by Aaron Bentley
get add_version working
314
315
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
316
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
317
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
318
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
319
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
320
        self._lines = {}
321
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
322
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
323
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
324
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
325
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
326
    def versions(self):
327
        return iter(self._parents)
328
2520.4.61 by Aaron Bentley
Do bulk insertion of records
329
    def has_version(self, version):
330
        return version in self._parents
331
0.9.12 by Aaron Bentley
Make benchmarks for mp
332
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
333
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
334
        if self.snapshot_interval is None:
335
            return False
336
        if self.max_snapshots is not None and\
337
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
338
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
339
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
340
            return True
341
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
342
            if len(parent_ids) == 0:
343
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
344
            version_ids = parent_ids
345
            parent_ids = []
346
            for version_id in version_ids:
347
                if version_id not in self._snapshots:
348
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
349
        else:
350
            return True
0.9.8 by Aaron Bentley
get add_version working
351
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
352
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
353
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
354
        """Add a version to the versionedfile
355
356
        :param lines: The list of lines to add.  Must be split on '\n'.
357
        :param version_id: The version_id of the version to add
358
        :param force_snapshot: If true, force this version to be added as a
359
            snapshot version.  If false, force this version to be added as a
360
            diff.  If none, determine this automatically.
361
        :param single_parent: If true, use a single parent, rather than
362
            multiple parents.
363
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
364
        if force_snapshot is None:
365
            do_snapshot = self.do_snapshot(version_id, parent_ids)
366
        else:
367
            do_snapshot = force_snapshot
368
        if do_snapshot:
369
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
370
            diff = MultiParent([NewText(lines)])
371
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
372
            if single_parent:
373
                parent_lines = self.get_line_list(parent_ids[:1])
374
            else:
375
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
376
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
377
            if diff.is_snapshot():
378
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
379
        self.add_diff(diff, version_id, parent_ids)
380
        self._lines[version_id] = lines
381
0.9.35 by Aaron Bentley
Add build ranking
382
    def get_parents(self, version_id):
383
        return self._parents[version_id]
384
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
385
    def make_snapshot(self, version_id):
386
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
387
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
388
        self._snapshots.add(version_id)
389
0.9.20 by Aaron Bentley
Convert to a plugin
390
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
391
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
392
        """Import all revisions of a versionedfile
393
394
        :param vf: The versionedfile to import
395
        :param snapshots: If provided, the revisions to make snapshots of.
396
            Otherwise, this will be auto-determined
397
        :param no_cache: If true, clear the cache after every add.
398
        :param single_parent: If true, omit all but one parent text, (but
399
            retain parent metadata).
400
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
401
        if not (no_cache or not verify):
402
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
403
        revisions = set(vf.versions())
404
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
405
        pb = ui.ui_factory.nested_progress_bar()
406
        try:
407
            while len(revisions) > 0:
408
                added = set()
409
                for revision in revisions:
410
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
411
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
412
                        continue
413
                    lines = [a + ' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
414
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
415
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
416
                        force_snapshot = None
417
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
418
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
419
                    self.add_version(lines, revision, parents, force_snapshot,
420
                                     single_parent)
421
                    added.add(revision)
422
                    if no_cache:
423
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
424
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
425
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
426
                            if not (lines == self.get_line_list([revision])[0]):
427
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
428
                            self.clear_cache()
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
429
                    pb.update(gettext('Importing revisions'),
0.9.20 by Aaron Bentley
Convert to a plugin
430
                              (total - len(revisions)) + len(added), total)
431
                revisions = [r for r in revisions if r not in added]
432
        finally:
433
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
434
0.9.23 by Aaron Bentley
handle snapshots all at once
435
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
436
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
437
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
438
        descendants = {}
439
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
440
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
441
            potential_build_ancestors = set(vf.get_parents(version_id))
442
            parents = vf.get_parents(version_id)
443
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
444
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
445
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
446
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
447
                for parent in vf.get_parents(version_id):
448
                    potential_build_ancestors.update(build_ancestors[parent])
449
                if len(potential_build_ancestors) > self.snapshot_interval:
450
                    snapshots.add(version_id)
451
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
452
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
453
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
454
        return snapshots
455
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
456
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
457
        """Select snapshots for minimum output size"""
458
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
459
        new_snapshots = self.get_size_ranking()[-num:]
460
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
461
462
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
463
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
464
        versions = []
465
        new_snapshots = set()
466
        for version_id in self.versions():
467
            if version_id in self._snapshots:
468
                continue
469
            diff_len = self.get_diff(version_id).patch_len()
470
            snapshot_len = MultiParent([NewText(
471
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
472
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
473
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
474
        return versions
475
476
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
477
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
478
        for version_id in vf.versions():
479
            self.add_diff(vf.get_diff(version_id), version_id,
480
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
481
0.9.35 by Aaron Bentley
Add build ranking
482
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
483
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
484
        could_avoid = {}
485
        referenced_by = {}
486
        for version_id in topo_iter(self):
487
            could_avoid[version_id] = set()
488
            if version_id not in self._snapshots:
489
                for parent_id in self._parents[version_id]:
490
                    could_avoid[version_id].update(could_avoid[parent_id])
491
                could_avoid[version_id].update(self._parents)
492
                could_avoid[version_id].discard(version_id)
493
            for avoid_id in could_avoid[version_id]:
494
                referenced_by.setdefault(avoid_id, set()).add(version_id)
495
        available_versions = list(self.versions())
496
        ranking = []
497
        while len(available_versions) > 0:
498
            available_versions.sort(key=lambda x:
499
                len(could_avoid[x]) *
500
                len(referenced_by.get(x, [])))
501
            selected = available_versions.pop()
502
            ranking.append(selected)
503
            for version_id in referenced_by[selected]:
504
                could_avoid[version_id].difference_update(
505
                    could_avoid[selected])
506
            for version_id in could_avoid[selected]:
507
                referenced_by[version_id].difference_update(
508
                    referenced_by[selected]
509
                )
510
        return ranking
511
0.9.8 by Aaron Bentley
get add_version working
512
    def clear_cache(self):
513
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
514
515
    def get_line_list(self, version_ids):
516
        return [self.cache_version(v) for v in version_ids]
517
518
    def cache_version(self, version_id):
519
        try:
520
            return self._lines[version_id]
521
        except KeyError:
522
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
523
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
524
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
525
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
526
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
527
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
528
        return lines
529
0.9.33 by Aaron Bentley
Enable caching commandline param
530
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
531
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
532
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
533
534
    def __init__(self, snapshot_interval=25, max_snapshots=None):
535
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
536
        self._diffs = {}
537
538
    def add_diff(self, diff, version_id, parent_ids):
539
        self._diffs[version_id] = diff
540
        self._parents[version_id] = parent_ids
541
542
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
543
        try:
544
            return self._diffs[version_id]
545
        except KeyError:
546
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
547
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
548
    def destroy(self):
549
        self._diffs = {}
550
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
551
552
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
553
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
554
555
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
556
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
557
        self._filename = filename
558
        self._diff_offset = {}
559
560
    def get_diff(self, version_id):
561
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
562
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
563
        try:
564
            infile.seek(start)
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
565
            sio = BytesIO(infile.read(count))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
566
        finally:
567
            infile.close()
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
568
        zip_file = gzip.GzipFile(None, mode='rb', fileobj=sio)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
569
        try:
570
            file_version_id = zip_file.readline()
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
571
            content = zip_file.read()
572
            return MultiParent.from_patch(content)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
573
        finally:
574
            zip_file.close()
575
576
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
577
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
578
        try:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
579
            outfile.seek(0, 2)      # workaround for windows bug:
580
                                    # .tell() for files opened in 'ab' mode
581
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
582
            start = outfile.tell()
583
            try:
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
584
                zipfile = gzip.GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
585
                zipfile.writelines(itertools.chain(
586
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
587
            finally:
588
                zipfile.close()
589
            end = outfile.tell()
590
        finally:
591
            outfile.close()
592
        self._diff_offset[version_id] = (start, end-start)
593
        self._parents[version_id] = parent_ids
594
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
595
    def destroy(self):
596
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
597
            os.unlink(self._filename + '.mpknit')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
598
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
599
            if e.errno != errno.ENOENT:
600
                raise
601
        try:
602
            os.unlink(self._filename + '.mpidx')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
603
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
604
            if e.errno != errno.ENOENT:
605
                raise
606
607
    def save(self):
608
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
609
            (self._parents, list(self._snapshots), self._diff_offset)))
610
611
    def load(self):
612
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
613
            open(self._filename + '.mpidx', 'rb').read())
614
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
615
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
616
617
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
618
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
619
620
    def __init__(self, diffs, lines, parents):
621
        self.diffs = diffs
622
        self.lines = lines
623
        self.parents = parents
624
        self.cursor = {}
625
626
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
627
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
628
        parent_id = self.parents[version_id][parent_text.parent]
629
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
630
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
631
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
632
633
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
634
        """Append lines for the requested version_id range"""
635
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
636
        if req_start == req_end:
637
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
638
        pending_reqs = [(req_version_id, req_start, req_end)]
639
        while len(pending_reqs) > 0:
640
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
641
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
642
            if req_version_id in self.lines:
643
                lines.extend(self.lines[req_version_id][req_start:req_end])
644
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
645
            try:
646
                start, end, kind, data, iterator = self.cursor[req_version_id]
647
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
648
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
649
                start, end, kind, data = iterator.next()
0.9.22 by Aaron Bentley
Fix restoration bug
650
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
651
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
0.9.22 by Aaron Bentley
Fix restoration bug
652
                start, end, kind, data = iterator.next()
653
0.9.10 by Aaron Bentley
Text reconstruction seems to work
654
            # find the first hunk relevant to the request
655
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
656
                start, end, kind, data = iterator.next()
657
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
658
            # if the hunk can't satisfy the whole request, split it in two,
659
            # and leave the second half for later.
660
            if req_end > end:
661
                pending_reqs.append((req_version_id, end, req_end))
662
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
663
            if kind == 'new':
664
                lines.extend(data[req_start - start: (req_end - start)])
665
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
666
                # If the hunk is a ParentText, rewrite it as a range request
667
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
668
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
669
                new_version_id = self.parents[req_version_id][parent]
670
                new_start = parent_start + req_start - start
671
                new_end = parent_end + req_end - end
672
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
673
674
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
675
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
676
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
677
2520.4.6 by Aaron Bentley
Get installation started
678
0.9.25 by Aaron Bentley
More messy hacking
679
def gzip_string(lines):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
680
    sio = BytesIO()
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
681
    data_file = gzip.GzipFile(None, mode='wb', fileobj=sio)
0.9.25 by Aaron Bentley
More messy hacking
682
    data_file.writelines(lines)
683
    data_file.close()
684
    return sio.getvalue()