/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.9.2 by Aaron Bentley
Get single-parent comparison working
1
from difflib import SequenceMatcher
0.9.19 by Aaron Bentley
More tweakage
2
import sys
3
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
4
from bzrlib import patiencediff
0.9.20 by Aaron Bentley
Convert to a plugin
5
from bzrlib import ui
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
6
0.9.1 by Aaron Bentley
Get trivial case passing
7
class MultiParent(object):
8
0.9.2 by Aaron Bentley
Get single-parent comparison working
9
    def __init__(self, hunks=None):
10
        if hunks is not None:
11
            self.hunks = hunks
12
        else:
13
            self.hunks = []
14
15
    def __repr__(self):
16
        return "MultiParent(%r)" % self.hunks
17
18
    def __eq__(self, other):
19
        if self.__class__ is not other.__class__:
20
            return False
21
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
22
23
    @staticmethod
24
    def from_lines(text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
25
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
26
        def compare(parent):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
27
            matcher = patiencediff.PatienceSequenceMatcher(None, parent,
28
                                                           text)
29
            return matcher.get_matching_blocks()
0.9.2 by Aaron Bentley
Get single-parent comparison working
30
        parent_comparisons = [compare(p) for p in parents]
31
        cur_line = 0
32
        new_text = NewText([])
33
        parent_text = []
34
        block_iter = [iter(i) for i in parent_comparisons]
35
        diff = MultiParent([])
36
        def next_block(p):
37
            try:
38
                return block_iter[p].next()
39
            except StopIteration:
40
                return None
41
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
42
        while cur_line < len(text):
43
            best_match = None
44
            for p, block in enumerate(cur_block):
45
                if block is None:
46
                    continue
47
                i, j, n = block
48
                while j + n < cur_line:
49
                    block = cur_block[p] = next_block(p)
50
                    if block is None:
51
                        break
52
                    i, j, n = block
53
                if block is None:
54
                    continue
55
                if j > cur_line:
56
                    continue
57
                offset = cur_line - j
58
                i += offset
59
                j = cur_line
60
                n -= offset
61
                if n == 0:
62
                    continue
63
                if best_match is None or n > best_match.num_lines:
64
                    best_match = ParentText(p, i, j, n)
65
            if best_match is None:
66
                new_text.lines.append(text[cur_line])
67
                cur_line += 1
68
            else:
69
                if len(new_text.lines) > 0:
70
                    diff.hunks.append(new_text)
71
                    new_text = NewText([])
72
                diff.hunks.append(best_match)
73
                cur_line += best_match.num_lines
74
        if len(new_text.lines) > 0:
75
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
76
        return diff
77
78
    @classmethod
79
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
80
        """Produce a MultiParent from a text and list of parent text"""
0.9.1 by Aaron Bentley
Get trivial case passing
81
        return cls.from_lines(text.splitlines(True),
82
                              [p.splitlines(True) for p in parents])
83
0.9.4 by Aaron Bentley
Start supporting serialization
84
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
85
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
86
        for hunk in self.hunks:
87
            for line in hunk.to_patch():
88
                yield line
89
0.9.18 by Aaron Bentley
Implement from_patch
90
    @staticmethod
91
    def from_patch(lines):
0.9.19 by Aaron Bentley
More tweakage
92
        """Produce a MultiParent from a sequence of lines"""
0.9.18 by Aaron Bentley
Implement from_patch
93
        line_iter = iter(lines)
94
        hunks = []
95
        cur_line = None
96
        while(True):
97
            try:
98
                cur_line = line_iter.next()
99
            except StopIteration:
100
                break
101
            if cur_line[0] == 'i':
102
                num_lines = int(cur_line.split(' ')[1])
103
                hunk_lines = [line_iter.next() for x in xrange(num_lines)]
104
                hunk_lines[-1] = hunk_lines[-1][:-1]
105
                hunks.append(NewText(hunk_lines))
106
            elif cur_line[0] == '\n':
107
                hunks[-1].lines[-1] += '\n'
108
            else:
109
                assert cur_line[0] == 'c', cur_line[0]
110
                parent, parent_pos, child_pos, num_lines =\
111
                    [int(v) for v in cur_line.split(' ')[1:]]
112
                hunks.append(ParentText(parent, parent_pos, child_pos,
113
                                        num_lines))
114
        return MultiParent(hunks)
115
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
116
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
117
        """Iterate through the hunks, with range indicated
118
119
        kind is "new" or "parent".
120
        for "new", data is a list of lines.
121
        for "parent", data is (parent, parent_start, parent_end)
122
        :return: a generator of (start, end, kind, data)
123
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
124
        start = 0
125
        for hunk in self.hunks:
126
            if isinstance(hunk, NewText):
127
                kind = 'new'
128
                end = start + len(hunk.lines)
129
                data = hunk.lines
130
            else:
131
                kind = 'parent'
132
                start = hunk.child_pos
133
                end = start + hunk.num_lines
134
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
135
                        hunk.num_lines)
136
            yield start, end, kind, data
137
            start = end
138
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
139
    def num_lines(self):
140
        extra_n = 0
141
        for hunk in reversed(self.hunks):
142
            if isinstance(hunk, ParentText):
143
               return hunk.child_pos + hunk.num_lines + extra_n
144
            extra_n += len(hunk.lines)
145
        return extra_n
146
0.9.1 by Aaron Bentley
Get trivial case passing
147
148
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
149
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
150
151
    def __init__(self, lines):
152
        self.lines = lines
153
154
    def __eq__(self, other):
155
        if self.__class__ is not other.__class__:
156
            return False
157
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
158
159
    def __repr__(self):
160
        return 'NewText(%r)' % self.lines
161
0.9.4 by Aaron Bentley
Start supporting serialization
162
    def to_patch(self):
163
        yield 'i %d\n' % len(self.lines)
164
        for line in self.lines:
165
            yield line
166
        yield '\n'
167
0.9.2 by Aaron Bentley
Get single-parent comparison working
168
169
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
170
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
171
172
    def __init__(self, parent, parent_pos, child_pos, num_lines):
173
        self.parent = parent
174
        self.parent_pos = parent_pos
175
        self.child_pos = child_pos
176
        self.num_lines = num_lines
177
178
    def __repr__(self):
179
        return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\
180
            ' %(num_lines)r)' % self.__dict__
181
182
    def __eq__(self, other):
183
        if self.__class__ != other.__class__:
184
            return False
185
        return (self.__dict__ == other.__dict__)
0.9.4 by Aaron Bentley
Start supporting serialization
186
187
    def to_patch(self):
188
        yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\
189
            % self.__dict__
0.9.8 by Aaron Bentley
get add_version working
190
191
192
class MultiVersionedFile(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
193
    """VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
194
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
195
    def __init__(self, snapshot_interval=25, max_snapshots=None):
0.9.8 by Aaron Bentley
get add_version working
196
        self._diffs = {}
197
        self._lines = {}
198
        self._parents = {}
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
199
        self._snapshots = set()
0.9.12 by Aaron Bentley
Make benchmarks for mp
200
        self.snapshot_interval = snapshot_interval
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
201
        self.max_snapshots = max_snapshots
0.9.12 by Aaron Bentley
Make benchmarks for mp
202
203
    def do_snapshot(self, version_id, parent_ids):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
204
        if self.snapshot_interval is None:
205
            return False
206
        if self.max_snapshots is not None and\
207
            len(self._snapshots) == self.max_snapshots:
0.9.14 by Aaron Bentley
Temporarily force snapshots to 44
208
            return False
0.9.12 by Aaron Bentley
Make benchmarks for mp
209
        if len(parent_ids) == 0:
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
210
            return True
211
        for ignored in xrange(self.snapshot_interval):
0.9.12 by Aaron Bentley
Make benchmarks for mp
212
            if len(parent_ids) == 0:
213
                return False
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
214
            version_ids = parent_ids
215
            parent_ids = []
216
            for version_id in version_ids:
217
                if version_id not in self._snapshots:
218
                    parent_ids.extend(self._parents[version_id])
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
219
        else:
220
            return True
0.9.8 by Aaron Bentley
get add_version working
221
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
222
    def add_version(self, lines, version_id, parent_ids,
0.9.20 by Aaron Bentley
Convert to a plugin
223
                    force_snapshot=None, single_parent=False):
0.9.16 by Aaron Bentley
More control over snapshotting, disable caching for inventory
224
        if force_snapshot is None:
225
            do_snapshot = self.do_snapshot(version_id, parent_ids)
226
        else:
227
            do_snapshot = force_snapshot
228
        if do_snapshot:
229
            self._snapshots.add(version_id)
0.9.12 by Aaron Bentley
Make benchmarks for mp
230
            diff = MultiParent([NewText(lines)])
231
        else:
0.9.20 by Aaron Bentley
Convert to a plugin
232
            if single_parent:
233
                parent_lines = self.get_line_list(parent_ids[:1])
234
            else:
235
                parent_lines = self.get_line_list(parent_ids)
0.9.12 by Aaron Bentley
Make benchmarks for mp
236
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.8 by Aaron Bentley
get add_version working
237
        self.add_diff(diff, version_id, parent_ids)
238
        self._lines[version_id] = lines
239
240
    def add_diff(self, diff, version_id, parent_ids):
241
        self._diffs[version_id] = diff
242
        self._parents[version_id] = parent_ids
243
0.9.20 by Aaron Bentley
Convert to a plugin
244
    def import_versionedfile(self, vf, snapshots, no_cache=True,
0.9.22 by Aaron Bentley
Fix restoration bug
245
                             single_parent=False, verify=False):
0.9.20 by Aaron Bentley
Convert to a plugin
246
        """Import all revisions of a versionedfile
247
248
        :param vf: The versionedfile to import
249
        :param snapshots: If provided, the revisions to make snapshots of.
250
            Otherwise, this will be auto-determined
251
        :param no_cache: If true, clear the cache after every add.
252
        :param single_parent: If true, omit all but one parent text, (but
253
            retain parent metadata).
254
        """
0.9.22 by Aaron Bentley
Fix restoration bug
255
        assert no_cache or not verify
0.9.19 by Aaron Bentley
More tweakage
256
        revisions = set(vf.versions())
257
        total = len(revisions)
0.9.20 by Aaron Bentley
Convert to a plugin
258
        pb = ui.ui_factory.nested_progress_bar()
259
        try:
260
            while len(revisions) > 0:
261
                added = set()
262
                for revision in revisions:
263
                    parents = vf.get_parents(revision)
264
                    if [p for p in parents if p not in self._diffs] != []:
265
                        continue
266
                    lines = [a + ' ' + l for a, l in
267
                             vf.annotate_iter(revision)]
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
268
                    if snapshots is None:
0.9.20 by Aaron Bentley
Convert to a plugin
269
                        force_snapshot = None
270
                    else:
0.9.21 by Aaron Bentley
finish converting ft_ to snapshots
271
                        force_snapshot = (revision in snapshots)
0.9.20 by Aaron Bentley
Convert to a plugin
272
                    self.add_version(lines, revision, parents, force_snapshot,
273
                                     single_parent)
274
                    added.add(revision)
275
                    if no_cache:
276
                        self.clear_cache()
0.9.22 by Aaron Bentley
Fix restoration bug
277
                        if verify:
278
                            assert lines == self.get_line_list([revision])[0]
279
                            self.clear_cache()
0.9.20 by Aaron Bentley
Convert to a plugin
280
                    pb.update('Importing revisions',
281
                              (total - len(revisions)) + len(added), total)
282
                revisions = [r for r in revisions if r not in added]
283
        finally:
284
            pb.finished()
0.9.19 by Aaron Bentley
More tweakage
285
0.9.23 by Aaron Bentley
handle snapshots all at once
286
    def select_snapshots(self, vf):
287
        distances = {}
288
        descendants = {}
289
        snapshots = set()
290
        for version_id in vf.versions():
291
            for parent_id in vf.get_parents(version_id):
292
                descendants.setdefault(parent_id, []).append(version_id)
293
        cur = [v for v in vf.versions() if len(vf.get_parents(v)) == 0]
294
        while len(cur) > 0:
295
            next = []
296
            for version_id in cur:
297
                if version_id in distances:
298
                    continue
299
                parents = vf.get_parents(version_id)
300
                p_distances = [distances.get(p) for p in parents]
301
                if None in p_distances:
302
                    continue
303
                next.extend(descendants.get(version_id, []))
304
                if len(p_distances) == 0:
305
                    snapshots.add(version_id)
306
                    distances[version_id] = 0
307
                else:
308
                    max_distance = max(p_distances)
309
                    if max_distance + 1 > self.snapshot_interval:
310
                        snapshots.add(version_id)
311
                        distances[version_id] = 0
312
                    else:
313
                        distances[version_id] = max_distance + 1
314
            cur = next
315
        return snapshots
316
317
0.9.8 by Aaron Bentley
get add_version working
318
    def clear_cache(self):
319
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
320
321
    def get_line_list(self, version_ids):
322
        return [self.cache_version(v) for v in version_ids]
323
324
    def cache_version(self, version_id):
325
        try:
326
            return self._lines[version_id]
327
        except KeyError:
328
            pass
329
        diff = self._diffs[version_id]
330
        lines = []
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
331
        reconstructor = _Reconstructor(self._diffs, self._lines,
332
                                       self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
333
        reconstructor.reconstruct_version(lines, version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
334
        self._lines[version_id] = lines
335
        return lines
336
337
338
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
339
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
340
341
    def __init__(self, diffs, lines, parents):
342
        self.diffs = diffs
343
        self.lines = lines
344
        self.parents = parents
345
        self.cursor = {}
346
347
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
348
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
349
        parent_id = self.parents[version_id][parent_text.parent]
350
        end = parent_text.parent_pos + parent_text.num_lines
0.9.17 by Aaron Bentley
Dynamically select snapshots based on all parents
351
        return self._reconstruct(lines, parent_id, parent_text.parent_pos,
352
                                 end)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
353
354
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
355
        """Append lines for the requested version_id range"""
356
        # stack of pending range requests
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
357
        pending_reqs = [(req_version_id, req_start, req_end)]
358
        while len(pending_reqs) > 0:
359
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
360
            # lazily allocate cursors for versions
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
361
            try:
362
                start, end, kind, data, iterator = self.cursor[req_version_id]
363
            except KeyError:
364
                iterator = self.diffs[req_version_id].range_iterator()
365
                start, end, kind, data = iterator.next()
0.9.22 by Aaron Bentley
Fix restoration bug
366
            if start > req_start:
367
                iterator = self.diffs[req_version_id].range_iterator()
368
                start, end, kind, data = iterator.next()
369
0.9.10 by Aaron Bentley
Text reconstruction seems to work
370
            # find the first hunk relevant to the request
371
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
372
                start, end, kind, data = iterator.next()
373
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
374
            # if the hunk can't satisfy the whole request, split it in two,
375
            # and leave the second half for later.
376
            if req_end > end:
377
                pending_reqs.append((req_version_id, end, req_end))
378
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
379
            if kind == 'new':
380
                lines.extend(data[req_start - start: (req_end - start)])
381
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
382
                # If the hunk is a ParentText, rewrite it as a range request
383
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
384
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
385
                new_version_id = self.parents[req_version_id][parent]
386
                new_start = parent_start + req_start - start
387
                new_end = parent_end + req_end - end
388
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
389
390
    def reconstruct_version(self, lines, version_id):
391
        length = self.diffs[version_id].num_lines()
392
        return self._reconstruct(lines, version_id, 0, length)