/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
1
#!/usr/bin/env python
2
"""\
3
Read in a changeset output, and process it into a Changeset object.
4
"""
5
6
import bzrlib, bzrlib.changeset
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
7
import pprint
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
8
import common
9
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
10
class BadChangeset(Exception): pass
11
class MalformedHeader(BadChangeset): pass
12
class MalformedPatches(BadChangeset): pass
13
class MalformedFooter(BadChangeset): pass
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
14
0.5.11 by John Arbash Meinel
Working on properly representing renames.
15
def _unescape(name):
16
    """Now we want to find the filename effected.
17
    Unfortunately the filename is written out as
18
    repr(filename), which means that it surrounds
19
    the name with quotes which may be single or double
20
    (single is preferred unless there is a single quote in
21
    the filename). And some characters will be escaped.
22
23
    TODO:   There has to be some pythonic way of undo-ing the
24
            representation of a string rather than using eval.
25
    """
26
    delimiter = name[0]
27
    if name[-1] != delimiter:
28
        raise BadChangeset('Could not properly parse the'
29
                ' filename: %r' % name)
30
    # We need to handle escaped hexadecimals too.
31
    return name[1:-1].replace('\"', '"').replace("\'", "'")
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
32
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
33
class RevisionInfo(object):
34
    """Gets filled out for each revision object that is read.
35
    """
36
    def __init__(self, rev_id):
37
        self.rev_id = rev_id
38
        self.sha1 = None
39
        self.committer = None
40
        self.timestamp = None
41
        self.timezone = None
42
        self.inventory_id = None
43
        self.inventory_sha1 = None
44
45
        self.parents = None
46
        self.message = None
47
48
    def __str__(self):
49
        return pprint.pformat(self.__dict__)
50
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
51
    def as_revision(self):
52
        from bzrlib.revision import Revision, RevisionReference
53
        rev = Revision(revision_id=self.rev_id,
54
            committer=self.committer,
55
            timestamp=float(self.timestamp),
56
            timezone=int(self.timezone),
57
            inventory_id=self.inventory_id,
58
            inventory_sha1=self.inventory_sha1,
59
            message='\n'.join(self.message))
60
61
        for parent in self.parents:
62
            rev_id, sha1 = parent.split('\t')
63
            rev.parents.append(RevisionReference(rev_id, sha1))
64
65
        return rev
66
67
68
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
69
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
70
class ChangesetInfo(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
71
    """This is the intermediate class that gets filled out as
72
    the file is read.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
73
    """
74
    def __init__(self):
75
        self.committer = None
76
        self.date = None
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
77
        self.message = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
78
        self.base = None
79
        self.base_sha1 = None
80
81
        self.revisions = []
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
82
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
83
        self.timestamp = None
84
        self.timezone = None
85
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
86
        self.tree_root_id = None
87
        self.file_ids = None
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
88
        self.old_file_ids = None
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
89
90
        self.actions = [] #this is the list of things that happened
91
        self.id2path = {} # A mapping from file id to path name
92
        self.path2id = {} # The reverse mapping
93
        self.id2parent = {} # A mapping from a given id to it's parent id
94
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
95
        self.old_id2path = {}
96
        self.old_path2id = {}
97
        self.old_id2parent = {}
98
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
99
    def __str__(self):
100
        return pprint.pformat(self.__dict__)
101
102
    def create_maps(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
103
        """Go through the individual id sections, and generate the 
104
        id2path and path2id maps.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
105
        """
0.5.8 by John Arbash Meinel
Added some extra work into changeset, created some dummy files for testing.
106
        # Rather than use an empty path, the changeset code seems 
107
        # to like to use "./." for the tree root.
108
        self.id2path[self.tree_root_id] = './.'
109
        self.path2id['./.'] = self.tree_root_id
110
        self.id2parent[self.tree_root_id] = bzrlib.changeset.NULL_ID
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
111
        self.old_id2path = self.id2path.copy()
112
        self.old_path2id = self.path2id.copy()
113
        self.old_id2parent = self.id2parent.copy()
114
115
        if self.file_ids:
116
            for info in self.file_ids:
117
                path, f_id, parent_id = info.split('\t')
118
                self.id2path[f_id] = path
119
                self.path2id[path] = f_id
120
                self.id2parent[f_id] = parent_id
121
        if self.old_file_ids:
122
            for info in self.old_file_ids:
123
                path, f_id, parent_id = info.split('\t')
124
                self.old_id2path[f_id] = path
125
                self.old_path2id[path] = f_id
126
                self.old_id2parent[f_id] = parent_id
127
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
128
    def get_changeset(self):
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
129
        """Create a changeset from the data contained within."""
130
        from bzrlib.changeset import Changeset, ChangesetEntry, \
131
            PatchApply, ReplaceContents
132
        cset = Changeset()
133
        
0.5.18 by John Arbash Meinel
Some minor fixups
134
        entry = ChangesetEntry(self.tree_root_id, 
135
                bzrlib.changeset.NULL_ID, './.')
136
        cset.add_entry(entry)
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
137
        for info, lines in self.actions:
138
            parts = info.split(' ')
139
            action = parts[0]
140
            kind = parts[1]
141
            extra = ' '.join(parts[2:])
142
            if action == 'renamed':
143
                old_path, new_path = extra.split(' => ')
144
                old_path = _unescape(old_path)
145
                new_path = _unescape(new_path)
146
147
                new_id = self.path2id[new_path]
148
                old_id = self.old_path2id[old_path]
149
                assert old_id == new_id
150
151
                new_parent = self.id2parent[new_id]
152
                old_parent = self.old_id2parent[old_id]
153
154
                entry = ChangesetEntry(old_id, old_parent, old_path)
155
                entry.new_path = new_path
156
                entry.new_parent = new_parent
157
                if lines:
158
                    entry.contents_change = PatchApply(''.join(lines))
159
            elif action == 'removed':
160
                old_path = _unescape(extra)
161
                old_id = self.old_path2id[old_path]
162
                old_parent = self.old_id2parent[old_id]
163
                entry = ChangesetEntry(old_id, old_parent, old_path)
164
                entry.new_path = None
165
                entry.new_parent = None
166
                if lines:
167
                    # Technically a removed should be a ReplaceContents()
168
                    # Where you need to have the old contents
169
                    # But at most we have a remove style patch.
170
                    #entry.contents_change = ReplaceContents()
171
                    pass
172
            elif action == 'added':
173
                new_path = _unescape(extra)
174
                new_id = self.path2id[new_path]
175
                new_parent = self.id2parent[new_id]
176
                entry = ChangesetEntry(new_id, new_parent, new_path)
177
                entry.path = None
178
                entry.parent = None
179
                if lines:
180
                    # Technically an added should be a ReplaceContents()
181
                    # Where you need to have the old contents
182
                    # But at most we have an add style patch.
183
                    #entry.contents_change = ReplaceContents()
184
                    entry.contents_change = PatchApply(''.join(lines))
185
            elif action == 'modified':
186
                new_path = _unescape(extra)
187
                new_id = self.path2id[new_path]
188
                new_parent = self.id2parent[new_id]
189
                entry = ChangesetEntry(new_id, new_parent, new_path)
190
                entry.path = None
191
                entry.parent = None
192
                if lines:
193
                    # Technically an added should be a ReplaceContents()
194
                    # Where you need to have the old contents
195
                    # But at most we have an add style patch.
196
                    #entry.contents_change = ReplaceContents()
197
                    entry.contents_change = PatchApply(''.join(lines))
198
            else:
199
                raise BadChangeset('Unrecognized action: %r' % action)
200
            cset.add_entry(entry)
201
        return cset
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
202
203
class ChangesetReader(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
204
    """This class reads in a changeset from a file, and returns
205
    a Changeset object, which can then be applied against a tree.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
206
    """
207
    def __init__(self, from_file):
208
        """Read in the changeset from the file.
209
210
        :param from_file: A file-like object (must have iterator support).
211
        """
212
        object.__init__(self)
213
        self.from_file = from_file
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
214
        self._next_line = None
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
215
        
216
        self.info = ChangesetInfo()
217
        # We put the actual inventory ids in the footer, so that the patch
218
        # is easier to read for humans.
219
        # Unfortunately, that means we need to read everything before we
220
        # can create a proper changeset.
221
        self._read_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
222
        self._read_patches()
223
        self._read_footer()
224
225
    def _next(self):
226
        """yield the next line, but secretly
227
        keep 1 extra line for peeking.
228
        """
229
        for line in self.from_file:
230
            last = self._next_line
231
            self._next_line = line
232
            if last is not None:
233
                yield last
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
234
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
235
    def get_info(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
236
        """Create the actual changeset object.
237
        """
238
        self.info.create_maps()
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
239
        return self.info
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
240
241
    def _read_header(self):
242
        """Read the bzr header"""
243
        header = common.get_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
244
        found = False
245
        for line in self._next():
246
            if found:
247
                if (line[:2] != '# ' or line[-1:] != '\n'
248
                        or line[2:-1] != header[0]):
249
                    raise MalformedHeader('Found a header, but it'
250
                        ' was improperly formatted')
251
                header.pop(0) # We read this line.
252
                if not header:
253
                    break # We found everything.
254
            elif (line[:1] == '#' and line[-1:] == '\n'):
255
                line = line[1:-1].strip()
256
                if line[:len(common.header_str)] == common.header_str:
257
                    if line == header[0]:
258
                        found = True
259
                    else:
260
                        raise MalformedHeader('Found what looks like'
261
                                ' a header, but did not match')
262
                    header.pop(0)
263
        else:
264
            raise MalformedHeader('Did not find an opening header')
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
265
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
266
        for line in self._next():
267
            # The bzr header is terminated with a blank line
268
            # which does not start with '#'
269
            if line == '\n':
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
270
                break
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
271
            self._handle_next(line)
272
273
    def _read_next_entry(self, line, indent=1):
274
        """Read in a key-value pair
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
275
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
276
        if line[:1] != '#':
277
            raise MalformedHeader('Bzr header did not start with #')
278
        line = line[1:-1] # Remove the '#' and '\n'
279
        if line[:indent] == ' '*indent:
280
            line = line[indent:]
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
281
        if not line:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
282
            return None, None# Ignore blank lines
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
283
284
        loc = line.find(': ')
285
        if loc != -1:
286
            key = line[:loc]
287
            value = line[loc+2:]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
288
            if not value:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
289
                value = self._read_many(indent=indent+3)
290
        elif line[-1:] == ':':
291
            key = line[:-1]
292
            value = self._read_many(indent=indent+3)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
293
        else:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
294
            raise MalformedHeader('While looking for key: value pairs,'
295
                    ' did not find the colon %r' % (line))
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
296
297
        key = key.replace(' ', '_')
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
298
        return key, value
299
300
    def _handle_next(self, line):
301
        key, value = self._read_next_entry(line, indent=1)
302
        if key is None:
303
            return
304
305
        if key == 'revision':
306
            self._read_revision(value)
307
        elif hasattr(self.info, key):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
308
            if getattr(self.info, key) is None:
309
                setattr(self.info, key, value)
310
            else:
311
                raise MalformedHeader('Duplicated Key: %s' % key)
312
        else:
313
            # What do we do with a key we don't recognize
314
            raise MalformedHeader('Unknown Key: %s' % key)
315
        
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
316
    def _read_many(self, indent):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
317
        """If a line ends with no entry, that means that it should be
318
        followed with multiple lines of values.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
319
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
320
        This detects the end of the list, because it will be a line that
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
321
        does not start properly indented.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
322
        """
323
        values = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
324
        start = '#' + (' '*indent)
325
326
        if self._next_line[:len(start)] != start:
327
            return values
328
329
        for line in self._next():
330
            values.append(line[len(start):-1])
331
            if self._next_line[:len(start)] != start:
332
                break
333
        return values
334
335
    def _read_one_patch(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
336
        """Read in one patch, return the complete patch, along with
337
        the next line.
338
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
339
        :return: action, lines, do_continue
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
340
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
341
        # Peek and see if there are no patches
342
        if self._next_line[:1] == '#':
343
            return None, [], False
344
345
        line = self._next().next()
346
        if line[:3] != '***':
347
            raise MalformedPatches('The first line of all patches'
348
                ' should be a bzr meta line "***"')
349
        action = line[4:-1]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
350
351
        lines = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
352
        for line in self._next():
353
            lines.append(line)
354
355
            if self._next_line[:3] == '***':
356
                return action, lines, True
357
            elif self._next_line[:1] == '#':
358
                return action, lines, False
359
        return action, lines, False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
360
            
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
361
    def _read_patches(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
362
        do_continue = True
363
        while do_continue:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
364
            action, lines, do_continue = self._read_one_patch()
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
365
            if action is not None:
366
                self.info.actions.append((action, lines))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
367
368
    def _read_revision(self, rev_id):
369
        """Revision entries have extra information associated.
370
        """
371
        rev_info = RevisionInfo(rev_id)
372
        start = '#    '
373
        for line in self._next():
374
            key,value = self._read_next_entry(line, indent=4)
375
            #if key is None:
376
            #    continue
377
            if hasattr(rev_info, key):
378
                if getattr(rev_info, key) is None:
379
                    setattr(rev_info, key, value)
380
                else:
381
                    raise MalformedHeader('Duplicated Key: %s' % key)
382
            else:
383
                # What do we do with a key we don't recognize
384
                raise MalformedHeader('Unknown Key: %s' % key)
385
386
            if self._next_line[:len(start)] != start:
387
                break
388
389
        self.info.revisions.append(rev_info)
390
391
    def _read_footer(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
392
        """Read the rest of the meta information.
393
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
394
        :param first_line:  The previous step iterates past what it
395
                            can handle. That extra line is given here.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
396
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
397
        line = self._next().next()
398
        if line != '# BEGIN BZR FOOTER\n':
399
            raise MalformedFooter('Footer did not begin with BEGIN BZR FOOTER')
400
401
        for line in self._next():
402
            if line == '# END BZR FOOTER\n':
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
403
                return
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
404
            self._handle_next(line)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
405
406
def read_changeset(from_file):
407
    """Read in a changeset from a filelike object (must have "readline" support), and
408
    parse it into a Changeset object.
409
    """
410
    cr = ChangesetReader(from_file)
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
411
    info = cr.get_info()
412
    return info
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
413
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
414
if __name__ == '__main__':
415
    import sys
416
    print read_changeset(sys.stdin)