/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
1
# Copyright (C) 2007-2011 Canonical Ltd
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
18
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
19
import errno
20
import os
21
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
22
from .lazy_import import lazy_import
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
23
24
lazy_import(globals(), """
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
25
import gzip
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
26
import itertools
0.9.19 by Aaron Bentley
More tweakage
27
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
28
from breezy import (
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
29
    bencode,
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
30
    errors,
0.9.25 by Aaron Bentley
More messy hacking
31
    patiencediff,
32
    ui,
33
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
34
""")
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
35
from .sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
36
    BytesIO,
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
37
    range,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
38
    )
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
39
0.9.33 by Aaron Bentley
Enable caching commandline param
40
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
41
def topo_iter_keys(vf, keys=None):
42
    if keys is None:
43
        keys = vf.keys()
44
    parents = vf.get_parent_map(keys)
45
    return _topo_iter(parents, keys)
46
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
47
def topo_iter(vf, versions=None):
48
    if versions is None:
49
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
50
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
51
    return _topo_iter(parents, versions)
52
53
def _topo_iter(parents, versions):
54
    seen = set()
55
    descendants = {}
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
56
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
57
        if parents[version] is None:
58
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
59
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
60
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
61
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
62
        if parents[version_id] is None:
63
            # parentless
64
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
65
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
66
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
67
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
68
    while len(cur) > 0:
69
        next = []
70
        for version_id in cur:
71
            if version_id in seen:
72
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
73
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
74
                continue
75
            next.extend(descendants.get(version_id, []))
76
            yield version_id
77
            seen.add(version_id)
78
        cur = next
79
80
0.9.1 by Aaron Bentley
Get trivial case passing
81
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
82
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
83
5374.2.9 by John Arbash Meinel
change slots back to a list.
84
    __slots__ = ['hunks']
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
85
0.9.2 by Aaron Bentley
Get single-parent comparison working
86
    def __init__(self, hunks=None):
87
        if hunks is not None:
88
            self.hunks = hunks
89
        else:
90
            self.hunks = []
91
92
    def __repr__(self):
93
        return "MultiParent(%r)" % self.hunks
94
95
    def __eq__(self, other):
96
        if self.__class__ is not other.__class__:
97
            return False
98
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
99
100
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
101
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
102
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
103
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
104
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
105
                                                           text)
106
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
107
        if len(parents) > 0:
108
            if left_blocks is None:
109
                left_blocks = compare(parents[0])
110
            parent_comparisons = [left_blocks] + [compare(p) for p in
111
                                                  parents[1:]]
112
        else:
113
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
114
        cur_line = 0
115
        new_text = NewText([])
116
        parent_text = []
117
        block_iter = [iter(i) for i in parent_comparisons]
118
        diff = MultiParent([])
119
        def next_block(p):
120
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
121
                return next(block_iter[p])
0.9.2 by Aaron Bentley
Get single-parent comparison working
122
            except StopIteration:
123
                return None
124
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
125
        while cur_line < len(text):
126
            best_match = None
127
            for p, block in enumerate(cur_block):
128
                if block is None:
129
                    continue
130
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
131
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
132
                    block = cur_block[p] = next_block(p)
133
                    if block is None:
134
                        break
135
                    i, j, n = block
136
                if block is None:
137
                    continue
138
                if j > cur_line:
139
                    continue
140
                offset = cur_line - j
141
                i += offset
142
                j = cur_line
143
                n -= offset
144
                if n == 0:
145
                    continue
146
                if best_match is None or n > best_match.num_lines:
147
                    best_match = ParentText(p, i, j, n)
148
            if best_match is None:
149
                new_text.lines.append(text[cur_line])
150
                cur_line += 1
151
            else:
152
                if len(new_text.lines) > 0:
153
                    diff.hunks.append(new_text)
154
                    new_text = NewText([])
155
                diff.hunks.append(best_match)
156
                cur_line += best_match.num_lines
157
        if len(new_text.lines) > 0:
158
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
159
        return diff
160
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
161
    def get_matching_blocks(self, parent, parent_len):
162
        for hunk in self.hunks:
163
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
164
                continue
165
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
166
        yield parent_len, self.num_lines(), 0
167
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
168
    def to_lines(self, parents=()):
169
        """Contruct a fulltext from this diff and its parents"""
170
        mpvf = MultiMemoryVersionedFile()
171
        for num, parent in enumerate(parents):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
172
            mpvf.add_version(BytesIO(parent).readlines(), num, [])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
173
        mpvf.add_diff(self, 'a', list(range(len(parents))))
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
174
        return mpvf.get_line_list(['a'])[0]
175
0.9.1 by Aaron Bentley
Get trivial case passing
176
    @classmethod
177
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
178
        """Produce a MultiParent from a text and list of parent text"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
179
        return cls.from_lines(BytesIO(text).readlines(),
180
                              [BytesIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
181
0.9.4 by Aaron Bentley
Start supporting serialization
182
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
183
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
184
        for hunk in self.hunks:
185
            for line in hunk.to_patch():
186
                yield line
187
0.9.25 by Aaron Bentley
More messy hacking
188
    def patch_len(self):
189
        return len(''.join(self.to_patch()))
190
191
    def zipped_patch_len(self):
192
        return len(gzip_string(self.to_patch()))
193
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
194
    @classmethod
195
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
196
        """Create a MultiParent from its string form"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
197
        return cls._from_patch(BytesIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
198
0.9.18 by Aaron Bentley
Implement from_patch
199
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
200
    def _from_patch(lines):
201
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
202
        line_iter = iter(lines)
203
        hunks = []
204
        cur_line = None
205
        while(True):
206
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
207
                cur_line = next(line_iter)
0.9.18 by Aaron Bentley
Implement from_patch
208
            except StopIteration:
209
                break
210
            if cur_line[0] == 'i':
211
                num_lines = int(cur_line.split(' ')[1])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
212
                hunk_lines = [next(line_iter) for _ in range(num_lines)]
0.9.18 by Aaron Bentley
Implement from_patch
213
                hunk_lines[-1] = hunk_lines[-1][:-1]
214
                hunks.append(NewText(hunk_lines))
215
            elif cur_line[0] == '\n':
216
                hunks[-1].lines[-1] += '\n'
217
            else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
218
                if not (cur_line[0] == 'c'):
219
                    raise AssertionError(cur_line[0])
0.9.18 by Aaron Bentley
Implement from_patch
220
                parent, parent_pos, child_pos, num_lines =\
221
                    [int(v) for v in cur_line.split(' ')[1:]]
222
                hunks.append(ParentText(parent, parent_pos, child_pos,
223
                                        num_lines))
224
        return MultiParent(hunks)
225
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
226
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
227
        """Iterate through the hunks, with range indicated
228
229
        kind is "new" or "parent".
230
        for "new", data is a list of lines.
231
        for "parent", data is (parent, parent_start, parent_end)
232
        :return: a generator of (start, end, kind, data)
233
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
234
        start = 0
235
        for hunk in self.hunks:
236
            if isinstance(hunk, NewText):
237
                kind = 'new'
238
                end = start + len(hunk.lines)
239
                data = hunk.lines
240
            else:
241
                kind = 'parent'
242
                start = hunk.child_pos
243
                end = start + hunk.num_lines
244
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
245
                        hunk.num_lines)
246
            yield start, end, kind, data
247
            start = end
248
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
249
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
250
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
251
        extra_n = 0
252
        for hunk in reversed(self.hunks):
253
            if isinstance(hunk, ParentText):
254
               return hunk.child_pos + hunk.num_lines + extra_n
255
            extra_n += len(hunk.lines)
256
        return extra_n
257
0.9.25 by Aaron Bentley
More messy hacking
258
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
259
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
260
        if len(self.hunks) != 1:
261
            return False
262
        return (isinstance(self.hunks[0], NewText))
263
0.9.1 by Aaron Bentley
Get trivial case passing
264
265
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
266
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
267
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
268
    __slots__ = ['lines']
269
0.9.1 by Aaron Bentley
Get trivial case passing
270
    def __init__(self, lines):
271
        self.lines = lines
272
273
    def __eq__(self, other):
274
        if self.__class__ is not other.__class__:
275
            return False
276
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
277
278
    def __repr__(self):
279
        return 'NewText(%r)' % self.lines
280
0.9.4 by Aaron Bentley
Start supporting serialization
281
    def to_patch(self):
282
        yield 'i %d\n' % len(self.lines)
283
        for line in self.lines:
284
            yield line
285
        yield '\n'
286
0.9.2 by Aaron Bentley
Get single-parent comparison working
287
288
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
289
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
290
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
291
    __slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines']
292
0.9.2 by Aaron Bentley
Get single-parent comparison working
293
    def __init__(self, parent, parent_pos, child_pos, num_lines):
294
        self.parent = parent
295
        self.parent_pos = parent_pos
296
        self.child_pos = child_pos
297
        self.num_lines = num_lines
298
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
299
    def _as_dict(self):
300
        return dict(parent=self.parent, parent_pos=self.parent_pos,
301
                    child_pos=self.child_pos, num_lines=self.num_lines)
302
0.9.2 by Aaron Bentley
Get single-parent comparison working
303
    def __repr__(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
304
        return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'
305
                ' %(num_lines)r)' % self._as_dict())
0.9.2 by Aaron Bentley
Get single-parent comparison working
306
307
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
308
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
309
            return False
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
310
        return self._as_dict() == other._as_dict()
0.9.4 by Aaron Bentley
Start supporting serialization
311
312
    def to_patch(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
313
        yield ('c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'
314
               % self._as_dict())
0.9.8 by Aaron Bentley
get add_version working
315
316
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
317
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
318
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
319
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
320
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
321
        self._lines = {}
322
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
323
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
324
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
325
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
326
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
327
    def versions(self):
328
        return iter(self._parents)
329
2520.4.61 by Aaron Bentley
Do bulk insertion of records
330
    def has_version(self, version):
331
        return version in self._parents
332
0.9.12 by Aaron Bentley
Make benchmarks for mp
333
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
334
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
335
        if self.snapshot_interval is None:
336
            return False
337
        if self.max_snapshots is not None and\
338
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
339
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
340
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
341
            return True
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
342
        for ignored in range(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
343
            if len(parent_ids) == 0:
344
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
345
            version_ids = parent_ids
346
            parent_ids = []
347
            for version_id in version_ids:
348
                if version_id not in self._snapshots:
349
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
350
        else:
351
            return True
0.9.8 by Aaron Bentley
get add_version working
352
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
353
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
354
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
355
        """Add a version to the versionedfile
356
357
        :param lines: The list of lines to add.  Must be split on '\n'.
358
        :param version_id: The version_id of the version to add
359
        :param force_snapshot: If true, force this version to be added as a
360
            snapshot version.  If false, force this version to be added as a
361
            diff.  If none, determine this automatically.
362
        :param single_parent: If true, use a single parent, rather than
363
            multiple parents.
364
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
365
        if force_snapshot is None:
366
            do_snapshot = self.do_snapshot(version_id, parent_ids)
367
        else:
368
            do_snapshot = force_snapshot
369
        if do_snapshot:
370
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
371
            diff = MultiParent([NewText(lines)])
372
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
373
            if single_parent:
374
                parent_lines = self.get_line_list(parent_ids[:1])
375
            else:
376
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
377
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
378
            if diff.is_snapshot():
379
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
380
        self.add_diff(diff, version_id, parent_ids)
381
        self._lines[version_id] = lines
382
0.9.35 by Aaron Bentley
Add build ranking
383
    def get_parents(self, version_id):
384
        return self._parents[version_id]
385
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
386
    def make_snapshot(self, version_id):
387
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
388
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
389
        self._snapshots.add(version_id)
390
0.9.20 by Aaron Bentley
Convert to a plugin
391
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
392
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
393
        """Import all revisions of a versionedfile
394
395
        :param vf: The versionedfile to import
396
        :param snapshots: If provided, the revisions to make snapshots of.
397
            Otherwise, this will be auto-determined
398
        :param no_cache: If true, clear the cache after every add.
399
        :param single_parent: If true, omit all but one parent text, (but
400
            retain parent metadata).
401
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
402
        if not (no_cache or not verify):
403
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
404
        revisions = set(vf.versions())
405
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
406
        pb = ui.ui_factory.nested_progress_bar()
407
        try:
408
            while len(revisions) > 0:
409
                added = set()
410
                for revision in revisions:
411
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
412
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
413
                        continue
414
                    lines = [a + ' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
415
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
416
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
417
                        force_snapshot = None
418
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
419
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
420
                    self.add_version(lines, revision, parents, force_snapshot,
421
                                     single_parent)
422
                    added.add(revision)
423
                    if no_cache:
424
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
425
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
426
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
427
                            if not (lines == self.get_line_list([revision])[0]):
428
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
429
                            self.clear_cache()
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
430
                    pb.update(gettext('Importing revisions'),
0.9.20 by Aaron Bentley
Convert to a plugin
431
                              (total - len(revisions)) + len(added), total)
432
                revisions = [r for r in revisions if r not in added]
433
        finally:
434
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
435
0.9.23 by Aaron Bentley
handle snapshots all at once
436
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
437
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
438
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
439
        descendants = {}
440
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
441
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
442
            potential_build_ancestors = set(vf.get_parents(version_id))
443
            parents = vf.get_parents(version_id)
444
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
445
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
446
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
447
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
448
                for parent in vf.get_parents(version_id):
449
                    potential_build_ancestors.update(build_ancestors[parent])
450
                if len(potential_build_ancestors) > self.snapshot_interval:
451
                    snapshots.add(version_id)
452
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
453
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
454
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
455
        return snapshots
456
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
457
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
458
        """Select snapshots for minimum output size"""
459
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
460
        new_snapshots = self.get_size_ranking()[-num:]
461
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
462
463
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
464
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
465
        versions = []
466
        new_snapshots = set()
467
        for version_id in self.versions():
468
            if version_id in self._snapshots:
469
                continue
470
            diff_len = self.get_diff(version_id).patch_len()
471
            snapshot_len = MultiParent([NewText(
472
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
473
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
474
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
475
        return versions
476
477
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
478
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
479
        for version_id in vf.versions():
480
            self.add_diff(vf.get_diff(version_id), version_id,
481
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
482
0.9.35 by Aaron Bentley
Add build ranking
483
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
484
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
485
        could_avoid = {}
486
        referenced_by = {}
487
        for version_id in topo_iter(self):
488
            could_avoid[version_id] = set()
489
            if version_id not in self._snapshots:
490
                for parent_id in self._parents[version_id]:
491
                    could_avoid[version_id].update(could_avoid[parent_id])
492
                could_avoid[version_id].update(self._parents)
493
                could_avoid[version_id].discard(version_id)
494
            for avoid_id in could_avoid[version_id]:
495
                referenced_by.setdefault(avoid_id, set()).add(version_id)
496
        available_versions = list(self.versions())
497
        ranking = []
498
        while len(available_versions) > 0:
499
            available_versions.sort(key=lambda x:
500
                len(could_avoid[x]) *
501
                len(referenced_by.get(x, [])))
502
            selected = available_versions.pop()
503
            ranking.append(selected)
504
            for version_id in referenced_by[selected]:
505
                could_avoid[version_id].difference_update(
506
                    could_avoid[selected])
507
            for version_id in could_avoid[selected]:
508
                referenced_by[version_id].difference_update(
509
                    referenced_by[selected]
510
                )
511
        return ranking
512
0.9.8 by Aaron Bentley
get add_version working
513
    def clear_cache(self):
514
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
515
516
    def get_line_list(self, version_ids):
517
        return [self.cache_version(v) for v in version_ids]
518
519
    def cache_version(self, version_id):
520
        try:
521
            return self._lines[version_id]
522
        except KeyError:
523
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
524
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
525
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
526
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
527
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
528
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
529
        return lines
530
0.9.33 by Aaron Bentley
Enable caching commandline param
531
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
532
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
533
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
534
535
    def __init__(self, snapshot_interval=25, max_snapshots=None):
536
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
537
        self._diffs = {}
538
539
    def add_diff(self, diff, version_id, parent_ids):
540
        self._diffs[version_id] = diff
541
        self._parents[version_id] = parent_ids
542
543
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
544
        try:
545
            return self._diffs[version_id]
546
        except KeyError:
547
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
548
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
549
    def destroy(self):
550
        self._diffs = {}
551
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
552
553
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
554
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
555
556
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
557
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
558
        self._filename = filename
559
        self._diff_offset = {}
560
561
    def get_diff(self, version_id):
562
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
563
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
564
        try:
565
            infile.seek(start)
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
566
            sio = BytesIO(infile.read(count))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
567
        finally:
568
            infile.close()
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
569
        zip_file = gzip.GzipFile(None, mode='rb', fileobj=sio)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
570
        try:
571
            file_version_id = zip_file.readline()
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
572
            content = zip_file.read()
573
            return MultiParent.from_patch(content)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
574
        finally:
575
            zip_file.close()
576
577
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
578
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
579
        try:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
580
            outfile.seek(0, 2)      # workaround for windows bug:
581
                                    # .tell() for files opened in 'ab' mode
582
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
583
            start = outfile.tell()
584
            try:
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
585
                zipfile = gzip.GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
586
                zipfile.writelines(itertools.chain(
587
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
588
            finally:
589
                zipfile.close()
590
            end = outfile.tell()
591
        finally:
592
            outfile.close()
593
        self._diff_offset[version_id] = (start, end-start)
594
        self._parents[version_id] = parent_ids
595
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
596
    def destroy(self):
597
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
598
            os.unlink(self._filename + '.mpknit')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
599
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
600
            if e.errno != errno.ENOENT:
601
                raise
602
        try:
603
            os.unlink(self._filename + '.mpidx')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
604
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
605
            if e.errno != errno.ENOENT:
606
                raise
607
608
    def save(self):
609
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
610
            (self._parents, list(self._snapshots), self._diff_offset)))
611
612
    def load(self):
613
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
614
            open(self._filename + '.mpidx', 'rb').read())
615
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
616
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
617
618
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
619
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
620
621
    def __init__(self, diffs, lines, parents):
622
        self.diffs = diffs
623
        self.lines = lines
624
        self.parents = parents
625
        self.cursor = {}
626
627
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
628
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
629
        parent_id = self.parents[version_id][parent_text.parent]
630
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
631
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
632
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
633
634
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
635
        """Append lines for the requested version_id range"""
636
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
637
        if req_start == req_end:
638
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
639
        pending_reqs = [(req_version_id, req_start, req_end)]
640
        while len(pending_reqs) > 0:
641
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
642
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
643
            if req_version_id in self.lines:
644
                lines.extend(self.lines[req_version_id][req_start:req_end])
645
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
646
            try:
647
                start, end, kind, data, iterator = self.cursor[req_version_id]
648
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
649
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
650
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
651
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
652
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
653
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
654
0.9.10 by Aaron Bentley
Text reconstruction seems to work
655
            # find the first hunk relevant to the request
656
            while end <= req_start:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
657
                start, end, kind, data = next(iterator)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
658
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
659
            # if the hunk can't satisfy the whole request, split it in two,
660
            # and leave the second half for later.
661
            if req_end > end:
662
                pending_reqs.append((req_version_id, end, req_end))
663
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
664
            if kind == 'new':
665
                lines.extend(data[req_start - start: (req_end - start)])
666
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
667
                # If the hunk is a ParentText, rewrite it as a range request
668
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
669
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
670
                new_version_id = self.parents[req_version_id][parent]
671
                new_start = parent_start + req_start - start
672
                new_end = parent_end + req_end - end
673
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
674
675
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
676
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
677
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
678
2520.4.6 by Aaron Bentley
Get installation started
679
0.9.25 by Aaron Bentley
More messy hacking
680
def gzip_string(lines):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
681
    sio = BytesIO()
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
682
    data_file = gzip.GzipFile(None, mode='wb', fileobj=sio)
0.9.25 by Aaron Bentley
More messy hacking
683
    data_file.writelines(lines)
684
    data_file.close()
685
    return sio.getvalue()