/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
1
# Copyright (C) 2007-2011 Canonical Ltd
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2520.4.85 by Aaron Bentley
Get all test passing (which just proves there aren't enough tests!)
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
18
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
19
import errno
20
import os
21
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
22
from .lazy_import import lazy_import
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
23
24
lazy_import(globals(), """
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
25
import gzip
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
26
import itertools
0.9.19 by Aaron Bentley
More tweakage
27
6622.1.34 by Jelmer Vernooij
Rename brzlib => breezy.
28
from breezy import (
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
29
    bencode,
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
30
    errors,
0.9.25 by Aaron Bentley
More messy hacking
31
    patiencediff,
32
    ui,
33
    )
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
34
""")
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
35
from .sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
36
    BytesIO,
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
37
    range,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
38
    )
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
39
0.9.33 by Aaron Bentley
Enable caching commandline param
40
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
41
def topo_iter_keys(vf, keys=None):
42
    if keys is None:
43
        keys = vf.keys()
44
    parents = vf.get_parent_map(keys)
45
    return _topo_iter(parents, keys)
46
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
47
def topo_iter(vf, versions=None):
48
    if versions is None:
49
        versions = vf.versions()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
50
    parents = vf.get_parent_map(versions)
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
51
    return _topo_iter(parents, versions)
52
53
def _topo_iter(parents, versions):
54
    seen = set()
55
    descendants = {}
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
56
    def pending_parents(version):
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
57
        if parents[version] is None:
58
            return []
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
59
        return [v for v in parents[version] if v in versions and
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
60
                v not in seen]
2520.4.28 by Aaron Bentley
Force revisions to be topologically sorted
61
    for version_id in versions:
3350.6.4 by Robert Collins
First cut at pluralised VersionedFiles. Some rather massive API incompatabilities, primarily because of the difficulty of coherence among competing stores.
62
        if parents[version_id] is None:
63
            # parentless
64
            continue
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
65
        for parent_id in parents[version_id]:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
66
            descendants.setdefault(parent_id, []).append(version_id)
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
67
    cur = [v for v in versions if len(pending_parents(v)) == 0]
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
68
    while len(cur) > 0:
69
        next = []
70
        for version_id in cur:
71
            if version_id in seen:
72
                continue
2520.4.29 by Aaron Bentley
Reactivate some testing, fix topo_iter
73
            if len(pending_parents(version_id)) != 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
74
                continue
75
            next.extend(descendants.get(version_id, []))
76
            yield version_id
77
            seen.add(version_id)
78
        cur = next
79
80
0.9.1 by Aaron Bentley
Get trivial case passing
81
class MultiParent(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
82
    """A multi-parent diff"""
0.9.1 by Aaron Bentley
Get trivial case passing
83
5374.2.9 by John Arbash Meinel
change slots back to a list.
84
    __slots__ = ['hunks']
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
85
0.9.2 by Aaron Bentley
Get single-parent comparison working
86
    def __init__(self, hunks=None):
87
        if hunks is not None:
88
            self.hunks = hunks
89
        else:
90
            self.hunks = []
91
92
    def __repr__(self):
93
        return "MultiParent(%r)" % self.hunks
94
95
    def __eq__(self, other):
96
        if self.__class__ is not other.__class__:
97
            return False
98
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
99
100
    @staticmethod
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
101
    def from_lines(text, parents=(), left_blocks=None):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
102
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
103
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
104
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
105
                                                           text)
106
            return matcher.get_matching_blocks()
2520.4.41 by Aaron Bentley
Accelerate mpdiff generation
107
        if len(parents) > 0:
108
            if left_blocks is None:
109
                left_blocks = compare(parents[0])
110
            parent_comparisons = [left_blocks] + [compare(p) for p in
111
                                                  parents[1:]]
112
        else:
113
            parent_comparisons = []
0.9.2 by Aaron Bentley
Get single-parent comparison working
114
        cur_line = 0
115
        new_text = NewText([])
116
        parent_text = []
117
        block_iter = [iter(i) for i in parent_comparisons]
118
        diff = MultiParent([])
119
        def next_block(p):
120
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
121
                return next(block_iter[p])
0.9.2 by Aaron Bentley
Get single-parent comparison working
122
            except StopIteration:
123
                return None
124
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
125
        while cur_line < len(text):
126
            best_match = None
127
            for p, block in enumerate(cur_block):
128
                if block is None:
129
                    continue
130
                i, j, n = block
2520.4.138 by Aaron Bentley
Fix benign off-by-one error generating mpdiffs
131
                while j + n <= cur_line:
0.9.2 by Aaron Bentley
Get single-parent comparison working
132
                    block = cur_block[p] = next_block(p)
133
                    if block is None:
134
                        break
135
                    i, j, n = block
136
                if block is None:
137
                    continue
138
                if j > cur_line:
139
                    continue
140
                offset = cur_line - j
141
                i += offset
142
                j = cur_line
143
                n -= offset
144
                if n == 0:
145
                    continue
146
                if best_match is None or n > best_match.num_lines:
147
                    best_match = ParentText(p, i, j, n)
148
            if best_match is None:
149
                new_text.lines.append(text[cur_line])
150
                cur_line += 1
151
            else:
152
                if len(new_text.lines) > 0:
153
                    diff.hunks.append(new_text)
154
                    new_text = NewText([])
155
                diff.hunks.append(best_match)
156
                cur_line += best_match.num_lines
157
        if len(new_text.lines) > 0:
158
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
159
        return diff
160
2520.4.139 by Aaron Bentley
Support Multiparent.get_matching_blocks
161
    def get_matching_blocks(self, parent, parent_len):
162
        for hunk in self.hunks:
163
            if not isinstance(hunk, ParentText) or hunk.parent != parent:
164
                continue
165
            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
166
        yield parent_len, self.num_lines(), 0
167
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
168
    def to_lines(self, parents=()):
169
        """Contruct a fulltext from this diff and its parents"""
170
        mpvf = MultiMemoryVersionedFile()
171
        for num, parent in enumerate(parents):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
172
            mpvf.add_version(BytesIO(parent).readlines(), num, [])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
173
        mpvf.add_diff(self, 'a', list(range(len(parents))))
2520.4.103 by Aaron Bentley
Add MultiParent.to_lines
174
        return mpvf.get_line_list(['a'])[0]
175
0.9.1 by Aaron Bentley
Get trivial case passing
176
    @classmethod
177
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
178
        """Produce a MultiParent from a text and list of parent text"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
179
        return cls.from_lines(BytesIO(text).readlines(),
180
                              [BytesIO(p).readlines() for p in parents])
0.9.1 by Aaron Bentley
Get trivial case passing
181
0.9.4 by Aaron Bentley
Start supporting serialization
182
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
183
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
184
        for hunk in self.hunks:
185
            for line in hunk.to_patch():
186
                yield line
187
0.9.25 by Aaron Bentley
More messy hacking
188
    def patch_len(self):
189
        return len(''.join(self.to_patch()))
190
191
    def zipped_patch_len(self):
192
        return len(gzip_string(self.to_patch()))
193
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
194
    @classmethod
195
    def from_patch(cls, text):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
196
        """Create a MultiParent from its string form"""
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
197
        return cls._from_patch(BytesIO(text))
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
198
0.9.18 by Aaron Bentley
Implement from_patch
199
    @staticmethod
2520.4.30 by Aaron Bentley
Do our own line splitting for mp-diffs
200
    def _from_patch(lines):
201
        """This is private because it is essential to split lines on \n only"""
0.9.18 by Aaron Bentley
Implement from_patch
202
        line_iter = iter(lines)
203
        hunks = []
204
        cur_line = None
205
        while(True):
206
            try:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
207
                cur_line = next(line_iter)
0.9.18 by Aaron Bentley
Implement from_patch
208
            except StopIteration:
209
                break
210
            if cur_line[0] == 'i':
211
                num_lines = int(cur_line.split(' ')[1])
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
212
                hunk_lines = [next(line_iter) for _ in range(num_lines)]
0.9.18 by Aaron Bentley
Implement from_patch
213
                hunk_lines[-1] = hunk_lines[-1][:-1]
214
                hunks.append(NewText(hunk_lines))
215
            elif cur_line[0] == '\n':
216
                hunks[-1].lines[-1] += '\n'
217
            else:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
218
                if not (cur_line[0] == 'c'):
219
                    raise AssertionError(cur_line[0])
0.9.18 by Aaron Bentley
Implement from_patch
220
                parent, parent_pos, child_pos, num_lines =\
221
                    [int(v) for v in cur_line.split(' ')[1:]]
222
                hunks.append(ParentText(parent, parent_pos, child_pos,
223
                                        num_lines))
224
        return MultiParent(hunks)
225
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
226
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
227
        """Iterate through the hunks, with range indicated
228
229
        kind is "new" or "parent".
230
        for "new", data is a list of lines.
231
        for "parent", data is (parent, parent_start, parent_end)
232
        :return: a generator of (start, end, kind, data)
233
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
234
        start = 0
235
        for hunk in self.hunks:
236
            if isinstance(hunk, NewText):
237
                kind = 'new'
238
                end = start + len(hunk.lines)
239
                data = hunk.lines
240
            else:
241
                kind = 'parent'
242
                start = hunk.child_pos
243
                end = start + hunk.num_lines
244
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
245
                        hunk.num_lines)
246
            yield start, end, kind, data
247
            start = end
248
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
249
    def num_lines(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
250
        """The number of lines in the output text"""
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
251
        extra_n = 0
252
        for hunk in reversed(self.hunks):
253
            if isinstance(hunk, ParentText):
254
               return hunk.child_pos + hunk.num_lines + extra_n
255
            extra_n += len(hunk.lines)
256
        return extra_n
257
0.9.25 by Aaron Bentley
More messy hacking
258
    def is_snapshot(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
259
        """Return true of this hunk is effectively a fulltext"""
0.9.25 by Aaron Bentley
More messy hacking
260
        if len(self.hunks) != 1:
261
            return False
262
        return (isinstance(self.hunks[0], NewText))
263
0.9.1 by Aaron Bentley
Get trivial case passing
264
265
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
266
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
267
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
268
    __slots__ = ['lines']
269
0.9.1 by Aaron Bentley
Get trivial case passing
270
    def __init__(self, lines):
271
        self.lines = lines
272
273
    def __eq__(self, other):
274
        if self.__class__ is not other.__class__:
275
            return False
276
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
277
278
    def __repr__(self):
279
        return 'NewText(%r)' % self.lines
280
0.9.4 by Aaron Bentley
Start supporting serialization
281
    def to_patch(self):
282
        yield 'i %d\n' % len(self.lines)
283
        for line in self.lines:
284
            yield line
285
        yield '\n'
286
0.9.2 by Aaron Bentley
Get single-parent comparison working
287
288
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
289
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
290
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
291
    __slots__ = ['parent', 'parent_pos', 'child_pos', 'num_lines']
292
0.9.2 by Aaron Bentley
Get single-parent comparison working
293
    def __init__(self, parent, parent_pos, child_pos, num_lines):
294
        self.parent = parent
295
        self.parent_pos = parent_pos
296
        self.child_pos = child_pos
297
        self.num_lines = num_lines
298
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
299
    def _as_dict(self):
300
        return dict(parent=self.parent, parent_pos=self.parent_pos,
301
                    child_pos=self.child_pos, num_lines=self.num_lines)
302
0.9.2 by Aaron Bentley
Get single-parent comparison working
303
    def __repr__(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
304
        return ('ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'
305
                ' %(num_lines)r)' % self._as_dict())
0.9.2 by Aaron Bentley
Get single-parent comparison working
306
307
    def __eq__(self, other):
4088.3.1 by Benjamin Peterson
compare types with 'is' not ==
308
        if self.__class__ is not other.__class__:
0.9.2 by Aaron Bentley
Get single-parent comparison working
309
            return False
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
310
        return self._as_dict() == other._as_dict()
0.9.4 by Aaron Bentley
Start supporting serialization
311
312
    def to_patch(self):
5374.2.1 by John Arbash Meinel
Do some memory shrinking for multiparent.
313
        yield ('c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'
314
               % self._as_dict())
0.9.8 by Aaron Bentley
get add_version working
315
316
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
317
class BaseVersionedFile(object):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
318
    """Pseudo-VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
319
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
320
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
321
        self._lines = {}
322
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
323
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
324
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
325
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
326
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
327
    def versions(self):
328
        return iter(self._parents)
329
2520.4.61 by Aaron Bentley
Do bulk insertion of records
330
    def has_version(self, version):
331
        return version in self._parents
332
0.9.12 by Aaron Bentley
Make benchmarks for mp
333
    def do_snapshot(self, version_id, parent_ids):
4031.3.1 by Frank Aspell
Fixing various typos
334
        """Determine whether to perform a snapshot for this version"""
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
335
        if self.snapshot_interval is None:
336
            return False
337
        if self.max_snapshots is not None and\
338
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
339
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
340
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
341
            return True
6651.2.2 by Martin
Apply 2to3 xrange fix and fix up with sixish range
342
        for ignored in range(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
343
            if len(parent_ids) == 0:
344
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
345
            version_ids = parent_ids
346
            parent_ids = []
347
            for version_id in version_ids:
348
                if version_id not in self._snapshots:
349
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
350
        else:
351
            return True
0.9.8 by Aaron Bentley
get add_version working
352
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
353
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
354
                    force_snapshot=None, single_parent=False):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
355
        """Add a version to the versionedfile
356
357
        :param lines: The list of lines to add.  Must be split on '\n'.
358
        :param version_id: The version_id of the version to add
359
        :param force_snapshot: If true, force this version to be added as a
360
            snapshot version.  If false, force this version to be added as a
361
            diff.  If none, determine this automatically.
362
        :param single_parent: If true, use a single parent, rather than
363
            multiple parents.
364
        """
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
365
        if force_snapshot is None:
366
            do_snapshot = self.do_snapshot(version_id, parent_ids)
367
        else:
368
            do_snapshot = force_snapshot
369
        if do_snapshot:
370
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
371
            diff = MultiParent([NewText(lines)])
372
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
373
            if single_parent:
374
                parent_lines = self.get_line_list(parent_ids[:1])
375
            else:
376
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
377
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.25 by Aaron Bentley
More messy hacking
378
            if diff.is_snapshot():
379
                self._snapshots.add(version_id)
0.9.8 by Aaron Bentley
get add_version working
380
        self.add_diff(diff, version_id, parent_ids)
381
        self._lines[version_id] = lines
382
0.9.35 by Aaron Bentley
Add build ranking
383
    def get_parents(self, version_id):
384
        return self._parents[version_id]
385
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
386
    def make_snapshot(self, version_id):
387
        snapdiff = MultiParent([NewText(self.cache_version(version_id))])
0.9.36 by Aaron Bentley
merge changes
388
        self.add_diff(snapdiff, version_id, self._parents[version_id])
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
389
        self._snapshots.add(version_id)
390
0.9.20 by Aaron Bentley
Convert to a plugin
391
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
392
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
393
        """Import all revisions of a versionedfile
394
395
        :param vf: The versionedfile to import
396
        :param snapshots: If provided, the revisions to make snapshots of.
397
            Otherwise, this will be auto-determined
398
        :param no_cache: If true, clear the cache after every add.
399
        :param single_parent: If true, omit all but one parent text, (but
400
            retain parent metadata).
401
        """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
402
        if not (no_cache or not verify):
403
            raise ValueError()
0.9.19 by Aaron Bentley
More tweakage
404
        revisions = set(vf.versions())
405
        total = len(revisions)
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
406
        with ui.ui_factory.nested_progress_bar() as pb:
0.9.20 by Aaron Bentley
Convert to a plugin
407
            while len(revisions) > 0:
408
                added = set()
409
                for revision in revisions:
410
                    parents = vf.get_parents(revision)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
411
                    if [p for p in parents if p not in self._parents] != []:
0.9.20 by Aaron Bentley
Convert to a plugin
412
                        continue
413
                    lines = [a + ' ' + l for a, l in
3316.2.13 by Robert Collins
* ``VersionedFile.annotate_iter`` is deprecated. While in principal this
414
                             vf.annotate(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
415
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
416
                        force_snapshot = None
417
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
418
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
419
                    self.add_version(lines, revision, parents, force_snapshot,
420
                                     single_parent)
421
                    added.add(revision)
422
                    if no_cache:
423
                        self.clear_cache()
0.9.25 by Aaron Bentley
More messy hacking
424
                        vf.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
425
                        if verify:
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
426
                            if not (lines == self.get_line_list([revision])[0]):
427
                                raise AssertionError()
0.9.22 by Aaron Bentley
Fix restoration bug
428
                            self.clear_cache()
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
429
                    pb.update(gettext('Importing revisions'),
0.9.20 by Aaron Bentley
Convert to a plugin
430
                              (total - len(revisions)) + len(added), total)
431
                revisions = [r for r in revisions if r not in added]
0.9.19 by Aaron Bentley
More tweakage
432
0.9.23 by Aaron Bentley
handle snapshots all at once
433
    def select_snapshots(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
434
        """Determine which versions to add as snapshots"""
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
435
        build_ancestors = {}
0.9.23 by Aaron Bentley
handle snapshots all at once
436
        descendants = {}
437
        snapshots = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
438
        for version_id in topo_iter(vf):
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
439
            potential_build_ancestors = set(vf.get_parents(version_id))
440
            parents = vf.get_parents(version_id)
441
            if len(parents) == 0:
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
442
                snapshots.add(version_id)
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
443
                build_ancestors[version_id] = set()
0.9.26 by Aaron Bentley
Move topological iteration into an iterator
444
            else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
445
                for parent in vf.get_parents(version_id):
446
                    potential_build_ancestors.update(build_ancestors[parent])
447
                if len(potential_build_ancestors) > self.snapshot_interval:
448
                    snapshots.add(version_id)
449
                    build_ancestors[version_id] = set()
0.9.23 by Aaron Bentley
handle snapshots all at once
450
                else:
0.9.28 by Aaron Bentley
Update snapshot-picking to use sets of ancestors
451
                    build_ancestors[version_id] = potential_build_ancestors
0.9.23 by Aaron Bentley
handle snapshots all at once
452
        return snapshots
453
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
454
    def select_by_size(self, num):
0.9.35 by Aaron Bentley
Add build ranking
455
        """Select snapshots for minimum output size"""
456
        num -= len(self._snapshots)
0.9.36 by Aaron Bentley
merge changes
457
        new_snapshots = self.get_size_ranking()[-num:]
458
        return [v for n, v in new_snapshots]
0.9.35 by Aaron Bentley
Add build ranking
459
460
    def get_size_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
461
        """Get versions ranked by size"""
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
462
        versions = []
463
        new_snapshots = set()
464
        for version_id in self.versions():
465
            if version_id in self._snapshots:
466
                continue
467
            diff_len = self.get_diff(version_id).patch_len()
468
            snapshot_len = MultiParent([NewText(
469
                self.cache_version(version_id))]).patch_len()
0.9.36 by Aaron Bentley
merge changes
470
            versions.append((snapshot_len - diff_len, version_id))
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
471
        versions.sort()
0.9.36 by Aaron Bentley
merge changes
472
        return versions
473
474
    def import_diffs(self, vf):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
475
        """Import the diffs from another pseudo-versionedfile"""
0.9.36 by Aaron Bentley
merge changes
476
        for version_id in vf.versions():
477
            self.add_diff(vf.get_diff(version_id), version_id,
478
                          vf._parents[version_id])
0.9.23 by Aaron Bentley
handle snapshots all at once
479
0.9.35 by Aaron Bentley
Add build ranking
480
    def get_build_ranking(self):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
481
        """Return revisions sorted by how much they reduce build complexity"""
0.9.35 by Aaron Bentley
Add build ranking
482
        could_avoid = {}
483
        referenced_by = {}
484
        for version_id in topo_iter(self):
485
            could_avoid[version_id] = set()
486
            if version_id not in self._snapshots:
487
                for parent_id in self._parents[version_id]:
488
                    could_avoid[version_id].update(could_avoid[parent_id])
489
                could_avoid[version_id].update(self._parents)
490
                could_avoid[version_id].discard(version_id)
491
            for avoid_id in could_avoid[version_id]:
492
                referenced_by.setdefault(avoid_id, set()).add(version_id)
493
        available_versions = list(self.versions())
494
        ranking = []
495
        while len(available_versions) > 0:
496
            available_versions.sort(key=lambda x:
497
                len(could_avoid[x]) *
498
                len(referenced_by.get(x, [])))
499
            selected = available_versions.pop()
500
            ranking.append(selected)
501
            for version_id in referenced_by[selected]:
502
                could_avoid[version_id].difference_update(
503
                    could_avoid[selected])
504
            for version_id in could_avoid[selected]:
505
                referenced_by[version_id].difference_update(
506
                    referenced_by[selected]
507
                )
508
        return ranking
509
0.9.8 by Aaron Bentley
get add_version working
510
    def clear_cache(self):
511
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
512
513
    def get_line_list(self, version_ids):
514
        return [self.cache_version(v) for v in version_ids]
515
516
    def cache_version(self, version_id):
517
        try:
518
            return self._lines[version_id]
519
        except KeyError:
520
            pass
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
521
        diff = self.get_diff(version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
522
        lines = []
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
523
        reconstructor = _Reconstructor(self, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
524
        reconstructor.reconstruct_version(lines, version_id)
0.9.33 by Aaron Bentley
Enable caching commandline param
525
        self._lines[version_id] = lines
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
526
        return lines
527
0.9.33 by Aaron Bentley
Enable caching commandline param
528
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
529
class MultiMemoryVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
530
    """Memory-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
531
532
    def __init__(self, snapshot_interval=25, max_snapshots=None):
533
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
534
        self._diffs = {}
535
536
    def add_diff(self, diff, version_id, parent_ids):
537
        self._diffs[version_id] = diff
538
        self._parents[version_id] = parent_ids
539
540
    def get_diff(self, version_id):
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
541
        try:
542
            return self._diffs[version_id]
543
        except KeyError:
544
            raise errors.RevisionNotPresent(version_id, self)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
545
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
546
    def destroy(self):
547
        self._diffs = {}
548
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
549
550
class MultiVersionedFile(BaseVersionedFile):
2520.4.124 by Aaron Bentley
Add docs to multiparent.py
551
    """Disk-backed pseudo-versionedfile"""
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
552
553
    def __init__(self, filename, snapshot_interval=25, max_snapshots=None):
554
        BaseVersionedFile.__init__(self, snapshot_interval, max_snapshots)
555
        self._filename = filename
556
        self._diff_offset = {}
557
558
    def get_diff(self, version_id):
559
        start, count = self._diff_offset[version_id]
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
560
        infile = open(self._filename + '.mpknit', 'rb')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
561
        try:
562
            infile.seek(start)
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
563
            sio = BytesIO(infile.read(count))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
564
        finally:
565
            infile.close()
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
566
        zip_file = gzip.GzipFile(None, mode='rb', fileobj=sio)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
567
        try:
568
            file_version_id = zip_file.readline()
5590.1.1 by John Arbash Meinel
Stop using tuned_gzip, it seems to give incorrect results on python 2.7
569
            content = zip_file.read()
570
            return MultiParent.from_patch(content)
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
571
        finally:
572
            zip_file.close()
573
574
    def add_diff(self, diff, version_id, parent_ids):
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
575
        outfile = open(self._filename + '.mpknit', 'ab')
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
576
        try:
2839.4.1 by Alexander Belchenko
multiparent.py: workaround for windows bug: .tell() for files opened in 'ab' mode before any write returns 0
577
            outfile.seek(0, 2)      # workaround for windows bug:
578
                                    # .tell() for files opened in 'ab' mode
579
                                    # before any write returns 0
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
580
            start = outfile.tell()
581
            try:
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
582
                zipfile = gzip.GzipFile(None, mode='ab', fileobj=outfile)
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
583
                zipfile.writelines(itertools.chain(
584
                    ['version %s\n' % version_id], diff.to_patch()))
0.9.30 by Aaron Bentley
Split into MultiVersionedFile and MultiMemoryVersionedFile
585
            finally:
586
                zipfile.close()
587
            end = outfile.tell()
588
        finally:
589
            outfile.close()
590
        self._diff_offset[version_id] = (start, end-start)
591
        self._parents[version_id] = parent_ids
592
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
593
    def destroy(self):
594
        try:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
595
            os.unlink(self._filename + '.mpknit')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
596
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
597
            if e.errno != errno.ENOENT:
598
                raise
599
        try:
600
            os.unlink(self._filename + '.mpidx')
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
601
        except OSError as e:
0.9.34 by Aaron Bentley
Implement save, load, snapshot-by-size
602
            if e.errno != errno.ENOENT:
603
                raise
604
605
    def save(self):
606
        open(self._filename + '.mpidx', 'wb').write(bencode.bencode(
607
            (self._parents, list(self._snapshots), self._diff_offset)))
608
609
    def load(self):
610
        self._parents, snapshots, self._diff_offset = bencode.bdecode(
611
            open(self._filename + '.mpidx', 'rb').read())
612
        self._snapshots = set(snapshots)
0.9.31 by Aaron Bentley
Allow selecting MemoryVersionedFile from commandline
613
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
614
615
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
616
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
617
618
    def __init__(self, diffs, lines, parents):
619
        self.diffs = diffs
620
        self.lines = lines
621
        self.parents = parents
622
        self.cursor = {}
623
624
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
625
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
626
        parent_id = self.parents[version_id][parent_text.parent]
627
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
628
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
629
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
630
631
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
632
        """Append lines for the requested version_id range"""
633
        # stack of pending range requests
2520.4.16 by Aaron Bentley
Handle empty versions correctly
634
        if req_start == req_end:
635
            return
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
636
        pending_reqs = [(req_version_id, req_start, req_end)]
637
        while len(pending_reqs) > 0:
638
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
639
            # lazily allocate cursors for versions
2520.4.144 by Aaron Bentley
Make Reconstructor use cached versions
640
            if req_version_id in self.lines:
641
                lines.extend(self.lines[req_version_id][req_start:req_end])
642
                continue
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
643
            try:
644
                start, end, kind, data, iterator = self.cursor[req_version_id]
645
            except KeyError:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
646
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
647
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
648
            if start > req_start:
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
649
                iterator = self.diffs.get_diff(req_version_id).range_iterator()
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
650
                start, end, kind, data = next(iterator)
0.9.22 by Aaron Bentley
Fix restoration bug
651
0.9.10 by Aaron Bentley
Text reconstruction seems to work
652
            # find the first hunk relevant to the request
653
            while end <= req_start:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
654
                start, end, kind, data = next(iterator)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
655
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
656
            # if the hunk can't satisfy the whole request, split it in two,
657
            # and leave the second half for later.
658
            if req_end > end:
659
                pending_reqs.append((req_version_id, end, req_end))
660
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
661
            if kind == 'new':
662
                lines.extend(data[req_start - start: (req_end - start)])
663
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
664
                # If the hunk is a ParentText, rewrite it as a range request
665
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
666
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
667
                new_version_id = self.parents[req_version_id][parent]
668
                new_start = parent_start + req_start - start
669
                new_end = parent_end + req_end - end
670
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
671
672
    def reconstruct_version(self, lines, version_id):
0.9.29 by Aaron Bentley
Support using disk for knit reconstruction
673
        length = self.diffs.get_diff(version_id).num_lines()
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
674
        return self._reconstruct(lines, version_id, 0, length)
0.9.25 by Aaron Bentley
More messy hacking
675
2520.4.6 by Aaron Bentley
Get installation started
676
0.9.25 by Aaron Bentley
More messy hacking
677
def gzip_string(lines):
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
678
    sio = BytesIO()
5753.2.2 by Jelmer Vernooij
Remove some unnecessary imports, clean up lazy imports.
679
    data_file = gzip.GzipFile(None, mode='wb', fileobj=sio)
0.9.25 by Aaron Bentley
More messy hacking
680
    data_file.writelines(lines)
681
    data_file.close()
682
    return sio.getvalue()