/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.9.2 by Aaron Bentley
Get single-parent comparison working
1
from difflib import SequenceMatcher
2
0.9.3 by Aaron Bentley
Get three-parent comparisions under test
3
0.9.1 by Aaron Bentley
Get trivial case passing
4
class MultiParent(object):
5
0.9.2 by Aaron Bentley
Get single-parent comparison working
6
    def __init__(self, hunks=None):
7
        if hunks is not None:
8
            self.hunks = hunks
9
        else:
10
            self.hunks = []
11
12
    def __repr__(self):
13
        return "MultiParent(%r)" % self.hunks
14
15
    def __eq__(self, other):
16
        if self.__class__ is not other.__class__:
17
            return False
18
        return (self.hunks == other.hunks)
0.9.1 by Aaron Bentley
Get trivial case passing
19
20
    @staticmethod
21
    def from_lines(text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
22
        """Produce a MultiParent from a list of lines and parents"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
23
        def compare(parent):
24
            return SequenceMatcher(None, parent, text).get_matching_blocks()
25
        parent_comparisons = [compare(p) for p in parents]
26
        cur_line = 0
27
        new_text = NewText([])
28
        parent_text = []
29
        block_iter = [iter(i) for i in parent_comparisons]
30
        diff = MultiParent([])
31
        def next_block(p):
32
            try:
33
                return block_iter[p].next()
34
            except StopIteration:
35
                return None
36
        cur_block = [next_block(p) for p, i in enumerate(block_iter)]
37
        while cur_line < len(text):
38
            best_match = None
39
            for p, block in enumerate(cur_block):
40
                if block is None:
41
                    continue
42
                i, j, n = block
43
                while j + n < cur_line:
44
                    block = cur_block[p] = next_block(p)
45
                    if block is None:
46
                        break
47
                    i, j, n = block
48
                if block is None:
49
                    continue
50
                if j > cur_line:
51
                    continue
52
                offset = cur_line - j
53
                i += offset
54
                j = cur_line
55
                n -= offset
56
                if n == 0:
57
                    continue
58
                if best_match is None or n > best_match.num_lines:
59
                    best_match = ParentText(p, i, j, n)
60
            if best_match is None:
61
                new_text.lines.append(text[cur_line])
62
                cur_line += 1
63
            else:
64
                if len(new_text.lines) > 0:
65
                    diff.hunks.append(new_text)
66
                    new_text = NewText([])
67
                diff.hunks.append(best_match)
68
                cur_line += best_match.num_lines
69
        if len(new_text.lines) > 0:
70
            diff.hunks.append(new_text)
0.9.1 by Aaron Bentley
Get trivial case passing
71
        return diff
72
73
    @classmethod
74
    def from_texts(cls, text, parents=()):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
75
        """Produce a MultiParent from a text and list of parent text"""
0.9.1 by Aaron Bentley
Get trivial case passing
76
        return cls.from_lines(text.splitlines(True),
77
                              [p.splitlines(True) for p in parents])
78
0.9.4 by Aaron Bentley
Start supporting serialization
79
    def to_patch(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
80
        """Yield text lines for a patch"""
0.9.4 by Aaron Bentley
Start supporting serialization
81
        for hunk in self.hunks:
82
            for line in hunk.to_patch():
83
                yield line
84
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
85
    def range_iterator(self):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
86
        """Iterate through the hunks, with range indicated
87
88
        kind is "new" or "parent".
89
        for "new", data is a list of lines.
90
        for "parent", data is (parent, parent_start, parent_end)
91
        :return: a generator of (start, end, kind, data)
92
        """
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
93
        start = 0
94
        for hunk in self.hunks:
95
            if isinstance(hunk, NewText):
96
                kind = 'new'
97
                end = start + len(hunk.lines)
98
                data = hunk.lines
99
            else:
100
                kind = 'parent'
101
                start = hunk.child_pos
102
                end = start + hunk.num_lines
103
                data = (hunk.parent, hunk.parent_pos, hunk.parent_pos +
104
                        hunk.num_lines)
105
            yield start, end, kind, data
106
            start = end
107
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
108
    def num_lines(self):
109
        extra_n = 0
110
        for hunk in reversed(self.hunks):
111
            if isinstance(hunk, ParentText):
112
               return hunk.child_pos + hunk.num_lines + extra_n
113
            extra_n += len(hunk.lines)
114
        return extra_n
115
0.9.1 by Aaron Bentley
Get trivial case passing
116
117
class NewText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
118
    """The contents of text that is introduced by this text"""
0.9.1 by Aaron Bentley
Get trivial case passing
119
120
    def __init__(self, lines):
121
        self.lines = lines
122
123
    def __eq__(self, other):
124
        if self.__class__ is not other.__class__:
125
            return False
126
        return (other.lines == self.lines)
0.9.2 by Aaron Bentley
Get single-parent comparison working
127
128
    def __repr__(self):
129
        return 'NewText(%r)' % self.lines
130
0.9.4 by Aaron Bentley
Start supporting serialization
131
    def to_patch(self):
132
        yield 'i %d\n' % len(self.lines)
133
        for line in self.lines:
134
            yield line
135
        yield '\n'
136
0.9.2 by Aaron Bentley
Get single-parent comparison working
137
138
class ParentText(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
139
    """A reference to text present in a parent text"""
0.9.2 by Aaron Bentley
Get single-parent comparison working
140
141
    def __init__(self, parent, parent_pos, child_pos, num_lines):
142
        self.parent = parent
143
        self.parent_pos = parent_pos
144
        self.child_pos = child_pos
145
        self.num_lines = num_lines
146
147
    def __repr__(self):
148
        return 'ParentText(%(parent)r, %(parent_pos)r, %(child_pos)r,'\
149
            ' %(num_lines)r)' % self.__dict__
150
151
    def __eq__(self, other):
152
        if self.__class__ != other.__class__:
153
            return False
154
        return (self.__dict__ == other.__dict__)
0.9.4 by Aaron Bentley
Start supporting serialization
155
156
    def to_patch(self):
157
        yield 'c %(parent)d %(parent_pos)d %(child_pos)d %(num_lines)d\n'\
158
            % self.__dict__
0.9.8 by Aaron Bentley
get add_version working
159
160
161
class MultiVersionedFile(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
162
    """VersionedFile skeleton for MultiParent"""
0.9.8 by Aaron Bentley
get add_version working
163
164
    def __init__(self):
165
        self._diffs = {}
166
        self._lines = {}
167
        self._parents = {}
168
169
    def add_version(self, lines, version_id, parent_ids):
170
        parent_lines = [self._lines[p] for p in parent_ids]
171
        diff = MultiParent.from_lines(lines, parent_lines)
172
        self.add_diff(diff, version_id, parent_ids)
173
        self._lines[version_id] = lines
174
175
    def add_diff(self, diff, version_id, parent_ids):
176
        self._diffs[version_id] = diff
177
        self._parents[version_id] = parent_ids
178
179
    def clear_cache(self):
180
        self._lines.clear()
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
181
182
    def get_line_list(self, version_ids):
183
        return [self.cache_version(v) for v in version_ids]
184
185
    def cache_version(self, version_id):
186
        try:
187
            return self._lines[version_id]
188
        except KeyError:
189
            pass
190
        diff = self._diffs[version_id]
191
        lines = []
192
        reconstructor = _Reconstructor(self._diffs, self._lines, self._parents)
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
193
        reconstructor.reconstruct_version(lines, version_id)
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
194
        self._lines[version_id] = lines
195
        return lines
196
197
198
class _Reconstructor(object):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
199
    """Build a text from the diffs, ancestry graph and cached lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
200
201
    def __init__(self, diffs, lines, parents):
202
        self.diffs = diffs
203
        self.lines = lines
204
        self.parents = parents
205
        self.cursor = {}
206
207
    def reconstruct(self, lines, parent_text, version_id):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
208
        """Append the lines referred to by a ParentText to lines"""
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
209
        parent_id = self.parents[version_id][parent_text.parent]
210
        end = parent_text.parent_pos + parent_text.num_lines
211
        return self._reconstruct(lines, parent_id, parent_text.parent_pos, end)
212
213
    def _reconstruct(self, lines, req_version_id, req_start, req_end):
0.9.10 by Aaron Bentley
Text reconstruction seems to work
214
        """Append lines for the requested version_id range"""
215
        # stack of pending range requests
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
216
        pending_reqs = [(req_version_id, req_start, req_end)]
217
        while len(pending_reqs) > 0:
218
            req_version_id, req_start, req_end = pending_reqs.pop()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
219
            # lazily allocate cursors for versions
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
220
            try:
221
                start, end, kind, data, iterator = self.cursor[req_version_id]
222
            except KeyError:
223
                iterator = self.diffs[req_version_id].range_iterator()
224
                start, end, kind, data = iterator.next()
0.9.10 by Aaron Bentley
Text reconstruction seems to work
225
            # find the first hunk relevant to the request
226
            while end <= req_start:
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
227
                start, end, kind, data = iterator.next()
228
            self.cursor[req_version_id] = start, end, kind, data, iterator
0.9.10 by Aaron Bentley
Text reconstruction seems to work
229
            # if the hunk can't satisfy the whole request, split it in two,
230
            # and leave the second half for later.
231
            if req_end > end:
232
                pending_reqs.append((req_version_id, end, req_end))
233
                req_end = end
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
234
            if kind == 'new':
235
                lines.extend(data[req_start - start: (req_end - start)])
236
            else:
0.9.10 by Aaron Bentley
Text reconstruction seems to work
237
                # If the hunk is a ParentText, rewrite it as a range request
238
                # for the parent, and make it the next pending request.
0.9.9 by Aaron Bentley
Much progress on non-naive text reconstruction
239
                parent, parent_start, parent_end = data
0.9.10 by Aaron Bentley
Text reconstruction seems to work
240
                new_version_id = self.parents[req_version_id][parent]
241
                new_start = parent_start + req_start - start
242
                new_end = parent_end + req_end - end
243
                pending_reqs.append((new_version_id, new_start, new_end))
0.9.11 by Aaron Bentley
Implement reconstruct_version, handle all hunks through that
244
245
    def reconstruct_version(self, lines, version_id):
246
        length = self.diffs[version_id].num_lines()
247
        return self._reconstruct(lines, version_id, 0, length)