/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.9.2 by Aaron Bentley
Get single-parent comparison working
1
from difflib import SequenceMatcher
2
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
3
0.9.1 by Aaron Bentley
Get trivial case passing
4
class MultiParent(object):
5
0.9.2 by Aaron Bentley
Get single-parent comparison working
6
    def __init__(self, hunks=None):
7
        if hunks is not None:
8
            self.hunks = hunks
9
        else:
10
            self.hunks = []
11
12
    def __repr__(self):
13
        return "MultiParent(%r)" % self.hunks
14
15
    def __eq__(self, other):
16
        if self.__class__ is not other.__class__:
17
            return False
18
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
19
20
    @staticmethod
21
    def from_lines(text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
22
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
23
        def compare(parent):
24
            return SequenceMatcher(None, parent, text).get_matching_blocks()
25
        parent_comparisons = [compare(p) for p in parents]
26
        cur_line = 0
27
        new_text = NewText([])
28
        parent_text = []
29
        block_iter = [iter(i) for i in parent_comparisons]
30
        diff = MultiParent([])
31
        def next_block(p):
32
            try:
33
                return block_iter[p].next()
34
            except StopIteration:
35
                return None
36
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
37
        while cur_line < len(text):
38
            best_match = None
39
            for p, block in enumerate(cur_block):
40
                if block is None:
41
                    continue
42
                i, j, n = block
43
                while j + n < cur_line:
44
                    block = cur_block[p] = next_block(p)
45
                    if block is None:
46
                        break
47
                    i, j, n = block
48
                if block is None:
49
                    continue
50
                if j > cur_line:
51
                    continue
52
                offset = cur_line - j
53
                i += offset
54
                j = cur_line
55
                n -= offset
56
                if n == 0:
57
                    continue
58
                if best_match is None or n > best_match.num_lines:
59
                    best_match = ParentText(p, i, j, n)
60
            if best_match is None:
61
                new_text.lines.append(text[cur_line])
62
                cur_line += 1
63
            else:
64
                if len(new_text.lines) > 0:
65
                    diff.hunks.append(new_text)
66
                    new_text = NewText([])
67
                diff.hunks.append(best_match)
68
                cur_line += best_match.num_lines
69
        if len(new_text.lines) > 0:
70
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
71
        return diff
72
73
    @classmethod
74
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
75
        """Produce a MultiParent from a text and list of parent text"""
0.9.1 by Aaron Bentley
Get trivial case passing
76
        return cls.from_lines(text.splitlines(True),
77
                              [p.splitlines(True) for p in parents])
78
0.9.4 by Aaron Bentley
Start supporting serialization
79
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
80
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
81
        for hunk in self.hunks:
82
            for line in hunk.to_patch():
83
                yield line
84
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
85
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
86
        """Iterate through the hunks, with range indicated
87
88
        kind is "new" or "parent".
89
        for "new", data is a list of lines.
90
        for "parent", data is (parent, parent_start, parent_end)
91
        :return: a generator of (start, end, kind, data)
92
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
93
        start = 0
94
        for hunk in self.hunks:
95
            if isinstance(hunk, NewText):
96
                kind = 'new'
97
                end = start + len(hunk.lines)
98
                data = hunk.lines
99
            else:
100
                kind = 'parent'
101
                start = hunk.child_pos
102
                end = start + hunk.num_lines
103
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
104
                        hunk.num_lines)
105
            yield start, end, kind, data
106
            start = end
107
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
108
    def num_lines(self):
109
        extra_n = 0
110
        for hunk in reversed(self.hunks):
111
            if isinstance(hunk, ParentText):
112
               return hunk.child_pos + hunk.num_lines + extra_n
113
            extra_n += len(hunk.lines)
114
        return extra_n
115
0.9.1 by Aaron Bentley
Get trivial case passing
116
117
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
118
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
119
120
    def __init__(self, lines):
121
        self.lines = lines
122
123
    def __eq__(self, other):
124
        if self.__class__ is not other.__class__:
125
            return False
126
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
127
128
    def __repr__(self):
129
        return 'NewText(%r)' % self.lines
130
0.9.4 by Aaron Bentley
Start supporting serialization
131
    def to_patch(self):
132
        yield 'i %d\n' % len(self.lines)
133
        for line in self.lines:
134
            yield line
135
        yield '\n'
136
0.9.2 by Aaron Bentley
Get single-parent comparison working
137
138
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
139
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
140
141
    def __init__(self, parent, parent_pos, child_pos, num_lines):
142
        self.parent = parent
143
        self.parent_pos = parent_pos
144
        self.child_pos = child_pos
145
        self.num_lines = num_lines
146
147
    def __repr__(self):
148
        return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\
149
            ' %(num_lines)r)' % self.__dict__
150
151
    def __eq__(self, other):
152
        if self.__class__ != other.__class__:
153
            return False
154
        return (self.__dict__ == other.__dict__)
0.9.4 by Aaron Bentley
Start supporting serialization
155
156
    def to_patch(self):
157
        yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\
158
            % self.__dict__
0.9.8 by Aaron Bentley
get add_version working
159
160
161
class MultiVersionedFile(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
162
    """VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
163
0.9.12 by Aaron Bentley
Make benchmarks for mp
164
    def __init__(self, snapshot_interval=25):
0.9.8 by Aaron Bentley
get add_version working
165
        self._diffs = {}
166
        self._lines = {}
167
        self._parents = {}
0.9.12 by Aaron Bentley
Make benchmarks for mp
168
        self._snapshots = {}
169
        self.snapshot_interval = snapshot_interval
170
171
    def do_snapshot(self, version_id, parent_ids):
172
        if len(parent_ids) == 0:
173
            return False
174
        counter = 0
175
        while counter < self.snapshot_interval:
176
            if len(parent_ids) == 0:
177
                return False
178
            version_id = parent_ids[0]
179
            if self._snapshots[version_id]:
180
                return False
181
            counter += 1
182
            parent_ids = self._parents[version_id]
183
        return True
0.9.8 by Aaron Bentley
get add_version working
184
185
    def add_version(self, lines, version_id, parent_ids):
0.9.12 by Aaron Bentley
Make benchmarks for mp
186
        if self.do_snapshot(version_id, parent_ids):
187
            self._snapshots[version_id] = True
188
            diff = MultiParent([NewText(lines)])
189
        else:
190
            self._snapshots[version_id] = False
191
            parent_lines = [self._lines[p] for p in parent_ids]
192
            diff = MultiParent.from_lines(lines, parent_lines)
0.9.8 by Aaron Bentley
get add_version working
193
        self.add_diff(diff, version_id, parent_ids)
194
        self._lines[version_id] = lines
195
196
    def add_diff(self, diff, version_id, parent_ids):
197
        self._diffs[version_id] = diff
198
        self._parents[version_id] = parent_ids
199
200
    def clear_cache(self):
201
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
202
203
    def get_line_list(self, version_ids):
204
        return [self.cache_version(v) for v in version_ids]
205
206
    def cache_version(self, version_id):
207
        try:
208
            return self._lines[version_id]
209
        except KeyError:
210
            pass
211
        diff = self._diffs[version_id]
212
        lines = []
213
        reconstructor = _Reconstructor(self._diffs, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
214
        reconstructor.reconstruct_version(lines, version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
215
        self._lines[version_id] = lines
216
        return lines
217
218
219
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
220
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
221
222
    def __init__(self, diffs, lines, parents):
223
        self.diffs = diffs
224
        self.lines = lines
225
        self.parents = parents
226
        self.cursor = {}
227
228
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
229
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
230
        parent_id = self.parents[version_id][parent_text.parent]
231
        end = parent_text.parent_pos + parent_text.num_lines
232
        return self._reconstruct(lines, parent_id, parent_text.parent_pos, end)
233
234
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
235
        """Append lines for the requested version_id range"""
236
        # stack of pending range requests
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
237
        pending_reqs = [(req_version_id, req_start, req_end)]
238
        while len(pending_reqs) > 0:
239
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
240
            # lazily allocate cursors for versions
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
241
            try:
242
                start, end, kind, data, iterator = self.cursor[req_version_id]
243
            except KeyError:
244
                iterator = self.diffs[req_version_id].range_iterator()
245
                start, end, kind, data = iterator.next()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
246
            # find the first hunk relevant to the request
247
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
248
                start, end, kind, data = iterator.next()
249
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
250
            # if the hunk can't satisfy the whole request, split it in two,
251
            # and leave the second half for later.
252
            if req_end > end:
253
                pending_reqs.append((req_version_id, end, req_end))
254
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
255
            if kind == 'new':
256
                lines.extend(data[req_start - start: (req_end - start)])
257
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
258
                # If the hunk is a ParentText, rewrite it as a range request
259
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
260
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
261
                new_version_id = self.parents[req_version_id][parent]
262
                new_start = parent_start + req_start - start
263
                new_end = parent_end + req_end - end
264
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
265
266
    def reconstruct_version(self, lines, version_id):
267
        length = self.diffs[version_id].num_lines()
268
        return self._reconstruct(lines, version_id, 0, length)