/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
1
#!/usr/bin/env python
2
"""\
3
Read in a changeset output, and process it into a Changeset object.
4
"""
5
6
import bzrlib, bzrlib.changeset
7
import common
8
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
9
class BadChangeset(Exception): pass
10
class MalformedHeader(BadChangeset): pass
11
class MalformedPatches(BadChangeset): pass
12
class MalformedFooter(BadChangeset): pass
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
13
0.5.11 by John Arbash Meinel
Working on properly representing renames.
14
def _unescape(name):
15
    """Now we want to find the filename effected.
16
    Unfortunately the filename is written out as
17
    repr(filename), which means that it surrounds
18
    the name with quotes which may be single or double
19
    (single is preferred unless there is a single quote in
20
    the filename). And some characters will be escaped.
21
22
    TODO:   There has to be some pythonic way of undo-ing the
23
            representation of a string rather than using eval.
24
    """
25
    delimiter = name[0]
26
    if name[-1] != delimiter:
27
        raise BadChangeset('Could not properly parse the'
28
                ' filename: %r' % name)
29
    # We need to handle escaped hexadecimals too.
30
    return name[1:-1].replace('\"', '"').replace("\'", "'")
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
31
32
class ChangesetInfo(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
33
    """This is the intermediate class that gets filled out as
34
    the file is read.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
35
    """
36
    def __init__(self):
37
        self.committer = None
38
        self.date = None
39
        self.revno = None
40
        self.revision = None
41
        self.revision_sha1 = None
42
        self.precursor = None
43
        self.precursor_sha1 = None
44
        self.precursor_revno = None
45
46
        self.tree_root_id = None
47
        self.file_ids = None
48
        self.directory_ids = None
49
        self.parent_ids = None
50
51
        self.actions = [] #this is the list of things that happened
52
        self.id2path = {} # A mapping from file id to path name
53
        self.path2id = {} # The reverse mapping
54
        self.id2parent = {} # A mapping from a given id to it's parent id
55
56
    def __str__(self):
57
        import pprint
58
        return pprint.pformat(self.__dict__)
59
60
    def create_maps(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
61
        """Go through the individual id sections, and generate the 
62
        id2path and path2id maps.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
63
        """
0.5.8 by John Arbash Meinel
Added some extra work into changeset, created some dummy files for testing.
64
        # Rather than use an empty path, the changeset code seems 
65
        # to like to use "./." for the tree root.
66
        self.id2path[self.tree_root_id] = './.'
67
        self.path2id['./.'] = self.tree_root_id
68
        self.id2parent[self.tree_root_id] = bzrlib.changeset.NULL_ID
69
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
70
        for var in (self.file_ids, self.directory_ids, self.parent_ids):
71
            if var is not None:
72
                for info in var:
73
                    path, f_id, parent_id = info.split('\t')
74
                    self.id2path[f_id] = path
75
                    self.path2id[path] = f_id
76
                    self.id2parent[f_id] = parent_id
77
78
class ChangesetReader(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
79
    """This class reads in a changeset from a file, and returns
80
    a Changeset object, which can then be applied against a tree.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
81
    """
82
    def __init__(self, from_file):
83
        """Read in the changeset from the file.
84
85
        :param from_file: A file-like object (must have iterator support).
86
        """
87
        object.__init__(self)
88
        self.from_file = from_file
89
        
90
        self.info = ChangesetInfo()
91
        # We put the actual inventory ids in the footer, so that the patch
92
        # is easier to read for humans.
93
        # Unfortunately, that means we need to read everything before we
94
        # can create a proper changeset.
95
        self._read_header()
96
        next_line = self._read_patches()
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
97
        if next_line is not None:
98
            self._read_footer(next_line)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
99
100
    def get_changeset(self):
101
        """Create the actual changeset object.
102
        """
103
        self.info.create_maps()
0.5.11 by John Arbash Meinel
Working on properly representing renames.
104
        cset = bzrlib.changeset.Changeset()
105
        
106
        for info, lines in self.info.actions:
107
            parts = info.split(' ')
108
            action = parts[0]
109
            kind = parts[1]
110
            extra = ' '.join(parts[2:])
111
            if action == 'renamed':
112
                old_name, new_name = extra.split(' => ')
113
                old_name = _unescape(old_name)
114
                new_name = _unescape(new_name)
115
            else:
116
                new_name = _unescape(extra)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
117
        return self.info
118
119
    def _read_header(self):
120
        """Read the bzr header"""
121
        header = common.get_header()
122
        for head_line, line in zip(header, self.from_file):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
123
            if (line[:2] != '# '
124
                    or line[-1] != '\n'
125
                    or line[2:-1] != head_line):
126
                raise MalformedHeader('Did not read the opening'
127
                    ' header information.')
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
128
129
        for line in self.from_file:
130
            if self._handle_info_line(line) is not None:
131
                break
132
133
    def _handle_info_line(self, line, in_footer=False):
134
        """Handle reading a single line.
135
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
136
        This may call itself, in the case that we read_multi,
137
        and then had a dangling line on the end.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
138
        """
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
139
        # The bzr header is terminated with a blank line
140
        # which does not start with #
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
141
        next_line = None
142
        if line[:1] == '\n':
143
            return 'break'
144
        if line[:2] != '# ':
145
            raise MalformedHeader('Opening bzr header did not start with #')
146
147
        line = line[2:-1] # Remove the '# '
148
        if not line:
149
            return # Ignore blank lines
150
151
        if in_footer and line in ('BEGIN BZR FOOTER', 'END BZR FOOTER'):
152
            return
153
154
        loc = line.find(': ')
155
        if loc != -1:
156
            key = line[:loc]
157
            value = line[loc+2:]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
158
            if not value:
159
                value, next_line = self._read_many()
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
160
        else:
161
            if line[-1:] == ':':
162
                key = line[:-1]
163
                value, next_line = self._read_many()
164
            else:
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
165
                raise MalformedHeader('While looking for key: value pairs,'
166
                        ' did not find the colon %r' % (line))
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
167
168
        key = key.replace(' ', '_')
169
        if hasattr(self.info, key):
170
            if getattr(self.info, key) is None:
171
                setattr(self.info, key, value)
172
            else:
173
                raise MalformedHeader('Duplicated Key: %s' % key)
174
        else:
175
            # What do we do with a key we don't recognize
176
            raise MalformedHeader('Unknown Key: %s' % key)
177
        
178
        if next_line:
179
            self._handle_info_line(next_line, in_footer=in_footer)
180
181
    def _read_many(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
182
        """If a line ends with no entry, that means that it should be
183
        followed with multiple lines of values.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
184
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
185
        This detects the end of the list, because it will be a line that
186
        does not start with '#    '. Because it has to read that extra
187
        line, it returns the tuple: (values, next_line)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
188
        """
189
        values = []
190
        for line in self.from_file:
191
            if line[:5] != '#    ':
192
                return values, line
193
            values.append(line[5:-1])
194
        return values, None
195
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
196
    def _read_one_patch(self, first_line=None):
197
        """Read in one patch, return the complete patch, along with
198
        the next line.
199
200
        :return: action, lines, next_line, do_continue
201
        """
202
        first = True
203
        action = None
204
205
        def parse_firstline(line):
206
            if line[:1] == '#':
207
                return None
208
            if line[:3] != '***':
209
                raise MalformedPatches('The first line of all patches'
210
                    ' should be a bzr meta line "***"')
211
            return line[4:-1]
212
213
        if first_line is not None:
214
            action = parse_firstline(first_line)
215
            first = False
216
            if action is None:
217
                return None, [], first_line, False
218
219
        lines = []
220
        for line in self.from_file:
221
            if first:
222
                action = parse_firstline(line)
223
                first = False
224
                if action is None:
225
                    return None, [], line, False
226
            else:
227
                if line[:3] == '***':
228
                    return action, lines, line, True
229
                elif line[:1] == '#':
230
                    return action, lines, line, False
231
                lines.append(line)
232
        return action, lines, None, False
233
            
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
234
    def _read_patches(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
235
        next_line = None
236
        do_continue = True
237
        while do_continue:
238
            action, lines, next_line, do_continue = \
239
                    self._read_one_patch(next_line)
240
            if action is not None:
241
                self.info.actions.append((action, lines))
242
        return next_line
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
243
244
    def _read_footer(self, first_line=None):
245
        """Read the rest of the meta information.
246
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
247
        :param first_line:  The previous step iterates past what it
248
                            can handle. That extra line is given here.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
249
        """
250
        if first_line is not None:
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
251
            if self._handle_info_line(first_line, in_footer=True) is not None:
252
                return
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
253
        for line in self.from_file:
254
            if self._handle_info_line(line, in_footer=True) is not None:
255
                break
256
257
258
def read_changeset(from_file):
259
    """Read in a changeset from a filelike object (must have "readline" support), and
260
    parse it into a Changeset object.
261
    """
262
    cr = ChangesetReader(from_file)
263
    print cr.get_changeset()
264