/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
1
#!/usr/bin/env python
2
"""\
3
Read in a changeset output, and process it into a Changeset object.
4
"""
5
6
import bzrlib, bzrlib.changeset
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
7
import pprint
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
8
import common
9
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
10
class BadChangeset(Exception): pass
11
class MalformedHeader(BadChangeset): pass
12
class MalformedPatches(BadChangeset): pass
13
class MalformedFooter(BadChangeset): pass
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
14
0.5.11 by John Arbash Meinel
Working on properly representing renames.
15
def _unescape(name):
16
    """Now we want to find the filename effected.
17
    Unfortunately the filename is written out as
18
    repr(filename), which means that it surrounds
19
    the name with quotes which may be single or double
20
    (single is preferred unless there is a single quote in
21
    the filename). And some characters will be escaped.
22
23
    TODO:   There has to be some pythonic way of undo-ing the
24
            representation of a string rather than using eval.
25
    """
26
    delimiter = name[0]
27
    if name[-1] != delimiter:
28
        raise BadChangeset('Could not properly parse the'
29
                ' filename: %r' % name)
30
    # We need to handle escaped hexadecimals too.
31
    return name[1:-1].replace('\"', '"').replace("\'", "'")
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
32
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
33
class RevisionInfo(object):
34
    """Gets filled out for each revision object that is read.
35
    """
36
    def __init__(self, rev_id):
37
        self.rev_id = rev_id
38
        self.sha1 = None
39
        self.committer = None
40
        self.timestamp = None
41
        self.timezone = None
42
        self.inventory_id = None
43
        self.inventory_sha1 = None
44
45
        self.parents = None
46
        self.message = None
47
48
    def __str__(self):
49
        return pprint.pformat(self.__dict__)
50
51
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
52
class ChangesetInfo(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
53
    """This is the intermediate class that gets filled out as
54
    the file is read.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
55
    """
56
    def __init__(self):
57
        self.committer = None
58
        self.date = None
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
59
        self.message = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
60
        self.base = None
61
        self.base_sha1 = None
62
63
        self.revisions = []
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
64
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
65
        self.timestamp = None
66
        self.timezone = None
67
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
68
        self.tree_root_id = None
69
        self.file_ids = None
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
70
        self.old_file_ids = None
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
71
72
        self.actions = [] #this is the list of things that happened
73
        self.id2path = {} # A mapping from file id to path name
74
        self.path2id = {} # The reverse mapping
75
        self.id2parent = {} # A mapping from a given id to it's parent id
76
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
77
        self.old_id2path = {}
78
        self.old_path2id = {}
79
        self.old_id2parent = {}
80
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
81
    def __str__(self):
82
        return pprint.pformat(self.__dict__)
83
84
    def create_maps(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
85
        """Go through the individual id sections, and generate the 
86
        id2path and path2id maps.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
87
        """
0.5.8 by John Arbash Meinel
Added some extra work into changeset, created some dummy files for testing.
88
        # Rather than use an empty path, the changeset code seems 
89
        # to like to use "./." for the tree root.
90
        self.id2path[self.tree_root_id] = './.'
91
        self.path2id['./.'] = self.tree_root_id
92
        self.id2parent[self.tree_root_id] = bzrlib.changeset.NULL_ID
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
93
        self.old_id2path = self.id2path.copy()
94
        self.old_path2id = self.path2id.copy()
95
        self.old_id2parent = self.id2parent.copy()
96
97
        if self.file_ids:
98
            for info in self.file_ids:
99
                path, f_id, parent_id = info.split('\t')
100
                self.id2path[f_id] = path
101
                self.path2id[path] = f_id
102
                self.id2parent[f_id] = parent_id
103
        if self.old_file_ids:
104
            for info in self.old_file_ids:
105
                path, f_id, parent_id = info.split('\t')
106
                self.old_id2path[f_id] = path
107
                self.old_path2id[path] = f_id
108
                self.old_id2parent[f_id] = parent_id
109
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
110
    def get_changeset(self):
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
111
        """Create a changeset from the data contained within."""
112
        from bzrlib.changeset import Changeset, ChangesetEntry, \
113
            PatchApply, ReplaceContents
114
        cset = Changeset()
115
        
0.5.18 by John Arbash Meinel
Some minor fixups
116
        entry = ChangesetEntry(self.tree_root_id, 
117
                bzrlib.changeset.NULL_ID, './.')
118
        cset.add_entry(entry)
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
119
        for info, lines in self.actions:
120
            parts = info.split(' ')
121
            action = parts[0]
122
            kind = parts[1]
123
            extra = ' '.join(parts[2:])
124
            if action == 'renamed':
125
                old_path, new_path = extra.split(' => ')
126
                old_path = _unescape(old_path)
127
                new_path = _unescape(new_path)
128
129
                new_id = self.path2id[new_path]
130
                old_id = self.old_path2id[old_path]
131
                assert old_id == new_id
132
133
                new_parent = self.id2parent[new_id]
134
                old_parent = self.old_id2parent[old_id]
135
136
                entry = ChangesetEntry(old_id, old_parent, old_path)
137
                entry.new_path = new_path
138
                entry.new_parent = new_parent
139
                if lines:
140
                    entry.contents_change = PatchApply(''.join(lines))
141
            elif action == 'removed':
142
                old_path = _unescape(extra)
143
                old_id = self.old_path2id[old_path]
144
                old_parent = self.old_id2parent[old_id]
145
                entry = ChangesetEntry(old_id, old_parent, old_path)
146
                entry.new_path = None
147
                entry.new_parent = None
148
                if lines:
149
                    # Technically a removed should be a ReplaceContents()
150
                    # Where you need to have the old contents
151
                    # But at most we have a remove style patch.
152
                    #entry.contents_change = ReplaceContents()
153
                    pass
154
            elif action == 'added':
155
                new_path = _unescape(extra)
156
                new_id = self.path2id[new_path]
157
                new_parent = self.id2parent[new_id]
158
                entry = ChangesetEntry(new_id, new_parent, new_path)
159
                entry.path = None
160
                entry.parent = None
161
                if lines:
162
                    # Technically an added should be a ReplaceContents()
163
                    # Where you need to have the old contents
164
                    # But at most we have an add style patch.
165
                    #entry.contents_change = ReplaceContents()
166
                    entry.contents_change = PatchApply(''.join(lines))
167
            elif action == 'modified':
168
                new_path = _unescape(extra)
169
                new_id = self.path2id[new_path]
170
                new_parent = self.id2parent[new_id]
171
                entry = ChangesetEntry(new_id, new_parent, new_path)
172
                entry.path = None
173
                entry.parent = None
174
                if lines:
175
                    # Technically an added should be a ReplaceContents()
176
                    # Where you need to have the old contents
177
                    # But at most we have an add style patch.
178
                    #entry.contents_change = ReplaceContents()
179
                    entry.contents_change = PatchApply(''.join(lines))
180
            else:
181
                raise BadChangeset('Unrecognized action: %r' % action)
182
            cset.add_entry(entry)
183
        return cset
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
184
185
class ChangesetReader(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
186
    """This class reads in a changeset from a file, and returns
187
    a Changeset object, which can then be applied against a tree.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
188
    """
189
    def __init__(self, from_file):
190
        """Read in the changeset from the file.
191
192
        :param from_file: A file-like object (must have iterator support).
193
        """
194
        object.__init__(self)
195
        self.from_file = from_file
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
196
        self._next_line = None
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
197
        
198
        self.info = ChangesetInfo()
199
        # We put the actual inventory ids in the footer, so that the patch
200
        # is easier to read for humans.
201
        # Unfortunately, that means we need to read everything before we
202
        # can create a proper changeset.
203
        self._read_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
204
        self._read_patches()
205
        self._read_footer()
206
207
    def _next(self):
208
        """yield the next line, but secretly
209
        keep 1 extra line for peeking.
210
        """
211
        for line in self.from_file:
212
            last = self._next_line
213
            self._next_line = line
214
            if last is not None:
215
                yield last
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
216
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
217
    def get_info(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
218
        """Create the actual changeset object.
219
        """
220
        self.info.create_maps()
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
221
        return self.info
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
222
223
    def _read_header(self):
224
        """Read the bzr header"""
225
        header = common.get_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
226
        found = False
227
        for line in self._next():
228
            if found:
229
                if (line[:2] != '# ' or line[-1:] != '\n'
230
                        or line[2:-1] != header[0]):
231
                    raise MalformedHeader('Found a header, but it'
232
                        ' was improperly formatted')
233
                header.pop(0) # We read this line.
234
                if not header:
235
                    break # We found everything.
236
            elif (line[:1] == '#' and line[-1:] == '\n'):
237
                line = line[1:-1].strip()
238
                if line[:len(common.header_str)] == common.header_str:
239
                    if line == header[0]:
240
                        found = True
241
                    else:
242
                        raise MalformedHeader('Found what looks like'
243
                                ' a header, but did not match')
244
                    header.pop(0)
245
        else:
246
            raise MalformedHeader('Did not find an opening header')
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
247
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
248
        for line in self._next():
249
            # The bzr header is terminated with a blank line
250
            # which does not start with '#'
251
            if line == '\n':
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
252
                break
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
253
            self._handle_next(line)
254
255
    def _read_next_entry(self, line, indent=1):
256
        """Read in a key-value pair
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
257
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
258
        if line[:1] != '#':
259
            raise MalformedHeader('Bzr header did not start with #')
260
        line = line[1:-1] # Remove the '#' and '\n'
261
        if line[:indent] == ' '*indent:
262
            line = line[indent:]
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
263
        if not line:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
264
            return None, None# Ignore blank lines
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
265
266
        loc = line.find(': ')
267
        if loc != -1:
268
            key = line[:loc]
269
            value = line[loc+2:]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
270
            if not value:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
271
                value = self._read_many(indent=indent+3)
272
        elif line[-1:] == ':':
273
            key = line[:-1]
274
            value = self._read_many(indent=indent+3)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
275
        else:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
276
            raise MalformedHeader('While looking for key: value pairs,'
277
                    ' did not find the colon %r' % (line))
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
278
279
        key = key.replace(' ', '_')
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
280
        return key, value
281
282
    def _handle_next(self, line):
283
        key, value = self._read_next_entry(line, indent=1)
284
        if key is None:
285
            return
286
287
        if key == 'revision':
288
            self._read_revision(value)
289
        elif hasattr(self.info, key):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
290
            if getattr(self.info, key) is None:
291
                setattr(self.info, key, value)
292
            else:
293
                raise MalformedHeader('Duplicated Key: %s' % key)
294
        else:
295
            # What do we do with a key we don't recognize
296
            raise MalformedHeader('Unknown Key: %s' % key)
297
        
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
298
    def _read_many(self, indent):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
299
        """If a line ends with no entry, that means that it should be
300
        followed with multiple lines of values.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
301
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
302
        This detects the end of the list, because it will be a line that
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
303
        does not start properly indented.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
304
        """
305
        values = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
306
        start = '#' + (' '*indent)
307
308
        if self._next_line[:len(start)] != start:
309
            return values
310
311
        for line in self._next():
312
            values.append(line[len(start):-1])
313
            if self._next_line[:len(start)] != start:
314
                break
315
        return values
316
317
    def _read_one_patch(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
318
        """Read in one patch, return the complete patch, along with
319
        the next line.
320
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
321
        :return: action, lines, do_continue
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
322
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
323
        # Peek and see if there are no patches
324
        if self._next_line[:1] == '#':
325
            return None, [], False
326
327
        line = self._next().next()
328
        if line[:3] != '***':
329
            raise MalformedPatches('The first line of all patches'
330
                ' should be a bzr meta line "***"')
331
        action = line[4:-1]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
332
333
        lines = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
334
        for line in self._next():
335
            lines.append(line)
336
337
            if self._next_line[:3] == '***':
338
                return action, lines, True
339
            elif self._next_line[:1] == '#':
340
                return action, lines, False
341
        return action, lines, False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
342
            
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
343
    def _read_patches(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
344
        do_continue = True
345
        while do_continue:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
346
            action, lines, do_continue = self._read_one_patch()
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
347
            if action is not None:
348
                self.info.actions.append((action, lines))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
349
350
    def _read_revision(self, rev_id):
351
        """Revision entries have extra information associated.
352
        """
353
        rev_info = RevisionInfo(rev_id)
354
        start = '#    '
355
        for line in self._next():
356
            key,value = self._read_next_entry(line, indent=4)
357
            #if key is None:
358
            #    continue
359
            if hasattr(rev_info, key):
360
                if getattr(rev_info, key) is None:
361
                    setattr(rev_info, key, value)
362
                else:
363
                    raise MalformedHeader('Duplicated Key: %s' % key)
364
            else:
365
                # What do we do with a key we don't recognize
366
                raise MalformedHeader('Unknown Key: %s' % key)
367
368
            if self._next_line[:len(start)] != start:
369
                break
370
371
        self.info.revisions.append(rev_info)
372
373
    def _read_footer(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
374
        """Read the rest of the meta information.
375
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
376
        :param first_line:  The previous step iterates past what it
377
                            can handle. That extra line is given here.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
378
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
379
        line = self._next().next()
380
        if line != '# BEGIN BZR FOOTER\n':
381
            raise MalformedFooter('Footer did not begin with BEGIN BZR FOOTER')
382
383
        for line in self._next():
384
            if line == '# END BZR FOOTER\n':
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
385
                return
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
386
            self._handle_next(line)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
387
388
def read_changeset(from_file):
389
    """Read in a changeset from a filelike object (must have "readline" support), and
390
    parse it into a Changeset object.
391
    """
392
    cr = ChangesetReader(from_file)
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
393
    info = cr.get_info()
394
    return info
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
395
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
396
if __name__ == '__main__':
397
    import sys
398
    print read_changeset(sys.stdin)