/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
1
#!/usr/bin/env python
2
"""\
3
Read in a changeset output, and process it into a Changeset object.
4
"""
5
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
6
import os
7
import pprint
8
from cStringIO import StringIO
9
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
10
from bzrlib.tree import Tree
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
11
from bzrlib.trace import mutter, warning
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
12
from bzrlib.errors import BzrError
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
13
from bzrlib.xml5 import serializer_v5
14
from bzrlib.osutils import sha_file, sha_string
15
from bzrlib.revision import Revision
16
from bzrlib.inventory import Inventory, InventoryEntry
17
18
from common import decode, get_header, header_str
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
19
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
20
class BadChangeset(Exception): pass
21
class MalformedHeader(BadChangeset): pass
22
class MalformedPatches(BadChangeset): pass
23
class MalformedFooter(BadChangeset): pass
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
24
0.5.11 by John Arbash Meinel
Working on properly representing renames.
25
def _unescape(name):
26
    """Now we want to find the filename effected.
27
    Unfortunately the filename is written out as
28
    repr(filename), which means that it surrounds
29
    the name with quotes which may be single or double
30
    (single is preferred unless there is a single quote in
31
    the filename). And some characters will be escaped.
32
33
    TODO:   There has to be some pythonic way of undo-ing the
34
            representation of a string rather than using eval.
35
    """
36
    delimiter = name[0]
37
    if name[-1] != delimiter:
38
        raise BadChangeset('Could not properly parse the'
39
                ' filename: %r' % name)
40
    # We need to handle escaped hexadecimals too.
41
    return name[1:-1].replace('\"', '"').replace("\'", "'")
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
42
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
43
class RevisionInfo(object):
44
    """Gets filled out for each revision object that is read.
45
    """
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
46
    def __init__(self, revision_id):
47
        self.revision_id = revision_id
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
48
        self.sha1 = None
49
        self.committer = None
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
50
        self.date = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
51
        self.timestamp = None
52
        self.timezone = None
53
        self.inventory_sha1 = None
54
55
        self.parents = None
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
56
        self.parent_sha1s = {}
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
57
        self.message = None
58
59
    def __str__(self):
60
        return pprint.pformat(self.__dict__)
61
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
62
    def as_revision(self):
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
63
        rev = Revision(revision_id=self.revision_id,
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
64
            committer=self.committer,
65
            timestamp=float(self.timestamp),
66
            timezone=int(self.timezone),
67
            inventory_sha1=self.inventory_sha1,
68
            message='\n'.join(self.message))
69
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
70
        if self.parents:
71
            for parent in self.parents:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
72
                revision_id, sha1 = parent.split()
73
                rev.parent_ids.append(revision_id)
74
                self.parent_sha1s[revision_id] = sha1
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
75
76
        return rev
77
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
78
class ChangesetInfo(object):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
79
    """This contains the meta information. Stuff that allows you to
80
    recreate the revision or inventory XML.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
81
    """
82
    def __init__(self):
83
        self.committer = None
84
        self.date = None
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
85
        self.message = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
86
        self.base = None
87
        self.base_sha1 = None
88
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
89
        # A list of RevisionInfo objects
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
90
        self.revisions = []
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
91
92
        self.actions = []
93
94
        # The next entries are created during complete_info() and
95
        # other post-read functions.
96
97
        # A list of real Revision objects
98
        self.real_revisions = []
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
99
100
        self.timestamp = None
101
        self.timezone = None
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
102
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
103
    def __str__(self):
104
        return pprint.pformat(self.__dict__)
105
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
106
    def complete_info(self):
107
        """This makes sure that all information is properly
108
        split up, based on the assumptions that can be made
109
        when information is missing.
110
        """
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
111
        from common import unpack_highres_date
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
112
        # Put in all of the guessable information.
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
113
        if not self.timestamp and self.date:
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
114
            self.timestamp, self.timezone = unpack_highres_date(self.date)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
115
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
116
        self.real_revisions = []
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
117
        for rev in self.revisions:
0.5.60 by John Arbash Meinel
read_changeset now parses the date: subheader of revisions correctly.
118
            if rev.timestamp is None:
119
                if rev.date is not None:
120
                    rev.timestamp, rev.timezone = \
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
121
                            unpack_highres_date(rev.date)
0.5.60 by John Arbash Meinel
read_changeset now parses the date: subheader of revisions correctly.
122
                else:
123
                    rev.timestamp = self.timestamp
124
                    rev.timezone = self.timezone
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
125
            if rev.message is None and self.message:
126
                rev.message = self.message
127
            if rev.committer is None and self.committer:
128
                rev.committer = self.committer
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
129
            self.real_revisions.append(rev.as_revision())
130
131
        if self.base is None:
132
            # When we don't have a base, then the real base
133
            # is the first parent of the first revision listed
134
            rev = self.real_revisions[0]
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
135
            if len(rev.parent_ids) == 0:
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
136
                # There is no base listed, and
137
                # the lowest revision doesn't have a parent
138
                # so this is probably against the empty tree
139
                # and thus base truly is None
140
                self.base = None
141
                self.base_sha1 = None
142
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
143
                self.base = rev.parent_ids[0]
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
144
                # In general, if self.base is None, self.base_sha1 should
145
                # also be None
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
146
                raise NotImplementedError('Cannot determine self.base_sha1, need some sort of branch')
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
147
0.5.67 by John Arbash Meinel
Working on apply_changeset
148
    def _get_target(self):
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
149
        """Return the target revision."""
0.5.67 by John Arbash Meinel
Working on apply_changeset
150
        if len(self.real_revisions) > 0:
151
            return self.real_revisions[-1].revision_id
152
        elif len(self.revisions) > 0:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
153
            return self.revisions[-1].revision_id
0.5.67 by John Arbash Meinel
Working on apply_changeset
154
        return None
155
156
    target = property(_get_target, doc='The target revision id')
157
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
158
class ChangesetReader(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
159
    """This class reads in a changeset from a file, and returns
160
    a Changeset object, which can then be applied against a tree.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
161
    """
162
    def __init__(self, from_file):
163
        """Read in the changeset from the file.
164
165
        :param from_file: A file-like object (must have iterator support).
166
        """
167
        object.__init__(self)
168
        self.from_file = from_file
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
169
        self._next_line = None
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
170
        
171
        self.info = ChangesetInfo()
172
        # We put the actual inventory ids in the footer, so that the patch
173
        # is easier to read for humans.
174
        # Unfortunately, that means we need to read everything before we
175
        # can create a proper changeset.
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
176
        self._read()
177
        self._validate()
178
179
    def _read(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
180
        self._read_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
181
        self._read_patches()
182
        self._read_footer()
183
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
184
    def _validate(self):
185
        """Make sure that the information read in makes sense
186
        and passes appropriate checksums.
187
        """
188
        # Fill in all the missing blanks for the revisions
189
        # and generate the real_revisions list.
190
        self.info.complete_info()
191
        self._validate_revisions()
192
193
    def _validate_revisions(self):
194
        """Make sure all revision entries match their checksum."""
195
196
        # This is a mapping from each revision id to it's sha hash
197
        rev_to_sha1 = {}
198
199
        for rev, rev_info in zip(self.info.real_revisions, self.info.revisions):
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
200
            assert rev.revision_id == rev_info.revision_id
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
201
            sio = StringIO()
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
202
            serializer_v5.write_revision(rev, sio)
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
203
            sio.seek(0)
204
            sha1 = sha_file(sio)
205
            if sha1 != rev_info.sha1:
206
                raise BzrError('Revision checksum mismatch.'
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
207
                    ' For revision_id {%s} supplied sha1 (%s) != measured (%s)'
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
208
                    % (rev.revision_id, rev_info.sha1, sha1))
209
            if rev_to_sha1.has_key(rev.revision_id):
210
                raise BzrError('Revision {%s} given twice in the list'
211
                        % (rev.revision_id))
212
            rev_to_sha1[rev.revision_id] = sha1
213
214
        # Now that we've checked all the sha1 sums, we can make sure that
215
        # at least for the small list we have, all of the references are
216
        # valid.
217
        for rev in self.info.real_revisions:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
218
            for p_id in rev.parent_ids:
219
                if p_id in rev_to_sha1:
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
220
                    if parent.revision_sha1 != rev_to_sha1[parent.revision_id]:
221
                        raise BzrError('Parent revision checksum mismatch.'
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
222
                                ' A parent was referenced with an'
223
                                ' incorrect checksum'
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
224
                                ': {%r} %s != %s' % (parent.revision_id,
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
225
                                            parent.revision_sha1,
226
                                            rev_to_sha1[parent.revision_id]))
227
228
    def _validate_references_from_branch(self, branch):
229
        """Now that we have a branch which should have some of the
230
        revisions we care about, go through and validate all of them
231
        that we can.
232
        """
233
        rev_to_sha = {}
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
234
        inv_to_sha = {}
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
235
        def add_sha(d, revision_id, sha1):
236
            if revision_id is None:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
237
                if sha1 is not None:
238
                    raise BzrError('A Null revision should always'
239
                        'have a null sha1 hash')
240
                return
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
241
            if revision_id in d:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
242
                # This really should have been validated as part
243
                # of _validate_revisions but lets do it again
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
244
                if sha1 != d[revision_id]:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
245
                    raise BzrError('** Revision %r referenced with 2 different'
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
246
                            ' sha hashes %s != %s' % (revision_id,
247
                                sha1, d[revision_id]))
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
248
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
249
                d[revision_id] = sha1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
250
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
251
        add_sha(rev_to_sha, self.info.base, self.info.base_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
252
        # All of the contained revisions were checked
253
        # in _validate_revisions
254
        checked = {}
255
        for rev_info in self.info.revisions:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
256
            checked[rev_info.revision_id] = True
257
            add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
258
                
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
259
        for (rev, rev_info) in zip(self.info.real_revisions, self.info.revisions):
260
            add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
261
            for p_id, sha1 in rev_info.parent_sha1s.iteritems():
262
                add_sha(rev_to_sha, p_id, sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
263
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
264
        count = 0
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
265
        missing = {}
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
266
        for revision_id, sha1 in rev_to_sha.iteritems():
267
            if branch.has_revision(revision_id):
268
                local_sha1 = branch.get_revision_sha1(revision_id)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
269
                if sha1 != local_sha1:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
270
                    raise BzrError('sha1 mismatch. For revision id {%s}' 
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
271
                            'local: %s, cset: %s' % (revision_id, local_sha1, sha1))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
272
                else:
273
                    count += 1
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
274
            elif revision_id not in checked:
275
                missing[revision_id] = sha1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
276
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
277
        for inv_id, sha1 in inv_to_sha.iteritems():
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
278
            if branch.has_revision(inv_id):
279
                # TODO: Currently branch.get_inventory_sha1() just returns the value
280
                # that is stored in the revision text. Which is *really* bogus, because
281
                # that means we aren't validating the actual text, just that we wrote 
282
                # and read the string. But for now, what the hell.
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
283
                local_sha1 = branch.get_inventory_sha1(inv_id)
284
                if sha1 != local_sha1:
285
                    raise BzrError('sha1 mismatch. For inventory id {%s}' 
286
                            'local: %s, cset: %s' % (inv_id, local_sha1, sha1))
287
                else:
288
                    count += 1
289
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
290
        if len(missing) > 0:
291
            # I don't know if this is an error yet
292
            warning('Not all revision hashes could be validated.'
293
                    ' Unable validate %d hashes' % len(missing))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
294
        mutter('Verified %d sha hashes for the changeset.' % count)
295
296
    def _validate_inventory(self, inv):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
297
        """At this point we should have generated the ChangesetTree,
298
        so build up an inventory, and make sure the hashes match.
299
        """
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
300
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
301
        assert inv is not None
302
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
303
        # Now we should have a complete inventory entry.
304
        sio = StringIO()
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
305
        serializer_v5.write_inventory(inv, sio)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
306
        sio.seek(0)
307
        sha1 = sha_file(sio)
308
        # Target revision is the last entry in the real_revisions list
309
        rev = self.info.real_revisions[-1]
310
        if sha1 != rev.inventory_sha1:
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
311
            open(',,bogus-inv', 'wb').write(sio.getvalue())
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
312
            raise BzrError('Inventory sha hash mismatch.')
313
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
314
        
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
315
    def get_changeset(self, branch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
316
        """Return the meta information, and a Changeset tree which can
317
        be used to populate the local stores and working tree, respectively.
318
        """
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
319
        self._validate_references_from_branch(branch)
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
320
        cset_tree = ChangesetTree(branch.revision_tree(self.info.base))
321
        self._update_tree(cset_tree)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
322
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
323
        inv = cset_tree.inventory
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
324
        self._validate_inventory(inv)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
325
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
326
        return self.info, cset_tree
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
327
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
328
    def _next(self):
329
        """yield the next line, but secretly
330
        keep 1 extra line for peeking.
331
        """
332
        for line in self.from_file:
333
            last = self._next_line
334
            self._next_line = line
335
            if last is not None:
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
336
                #mutter('yielding line: %r' % last)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
337
                yield last
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
338
        last = self._next_line
339
        self._next_line = None
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
340
        #mutter('yielding line: %r' % last)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
341
        yield last
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
342
343
    def _read_header(self):
344
        """Read the bzr header"""
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
345
        header = get_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
346
        found = False
347
        for line in self._next():
348
            if found:
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
349
                # not all mailers will keep trailing whitespace
350
                if line == '#\n':
351
                    line = '# \n'
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
352
                if (not line.startswith('# ') or not line.endswith('\n')
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
353
                        or decode(line[2:-1]) != header[0]):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
354
                    raise MalformedHeader('Found a header, but it'
355
                        ' was improperly formatted')
356
                header.pop(0) # We read this line.
357
                if not header:
358
                    break # We found everything.
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
359
            elif (line.startswith('#') and line.endswith('\n')):
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
360
                line = decode(line[1:-1].strip())
361
                if line[:len(header_str)] == header_str:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
362
                    if line == header[0]:
363
                        found = True
364
                    else:
365
                        raise MalformedHeader('Found what looks like'
366
                                ' a header, but did not match')
367
                    header.pop(0)
368
        else:
369
            raise MalformedHeader('Did not find an opening header')
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
370
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
371
        for line in self._next():
372
            # The bzr header is terminated with a blank line
373
            # which does not start with '#'
374
            if line == '\n':
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
375
                break
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
376
            self._handle_next(line)
377
378
    def _read_next_entry(self, line, indent=1):
379
        """Read in a key-value pair
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
380
        """
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
381
        if not line.startswith('#'):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
382
            raise MalformedHeader('Bzr header did not start with #')
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
383
        line = decode(line[1:-1]) # Remove the '#' and '\n'
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
384
        if line[:indent] == ' '*indent:
385
            line = line[indent:]
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
386
        if not line:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
387
            return None, None# Ignore blank lines
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
388
389
        loc = line.find(': ')
390
        if loc != -1:
391
            key = line[:loc]
392
            value = line[loc+2:]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
393
            if not value:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
394
                value = self._read_many(indent=indent+3)
395
        elif line[-1:] == ':':
396
            key = line[:-1]
397
            value = self._read_many(indent=indent+3)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
398
        else:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
399
            raise MalformedHeader('While looking for key: value pairs,'
400
                    ' did not find the colon %r' % (line))
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
401
402
        key = key.replace(' ', '_')
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
403
        #mutter('found %s: %s' % (key, value))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
404
        return key, value
405
406
    def _handle_next(self, line):
407
        key, value = self._read_next_entry(line, indent=1)
408
        if key is None:
409
            return
410
411
        if key == 'revision':
412
            self._read_revision(value)
413
        elif hasattr(self.info, key):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
414
            if getattr(self.info, key) is None:
415
                setattr(self.info, key, value)
416
            else:
417
                raise MalformedHeader('Duplicated Key: %s' % key)
418
        else:
419
            # What do we do with a key we don't recognize
420
            raise MalformedHeader('Unknown Key: %s' % key)
421
        
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
422
    def _read_many(self, indent):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
423
        """If a line ends with no entry, that means that it should be
424
        followed with multiple lines of values.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
425
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
426
        This detects the end of the list, because it will be a line that
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
427
        does not start properly indented.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
428
        """
429
        values = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
430
        start = '#' + (' '*indent)
431
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
432
        if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
433
            return values
434
435
        for line in self._next():
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
436
            values.append(decode(line[len(start):-1]))
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
437
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
438
                break
439
        return values
440
441
    def _read_one_patch(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
442
        """Read in one patch, return the complete patch, along with
443
        the next line.
444
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
445
        :return: action, lines, do_continue
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
446
        """
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
447
        #mutter('_read_one_patch: %r' % self._next_line)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
448
        # Peek and see if there are no patches
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
449
        if self._next_line is None or self._next_line.startswith('#'):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
450
            return None, [], False
451
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
452
        first = True
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
453
        lines = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
454
        for line in self._next():
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
455
            if first:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
456
                if not line.startswith('==='):
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
457
                    raise MalformedPatches('The first line of all patches'
0.5.100 by John Arbash Meinel
Switching from *** to ===
458
                        ' should be a bzr meta line "==="'
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
459
                        ': %r' % line)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
460
                action = decode(line[4:-1])
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
461
            if self._next_line is not None and self._next_line.startswith('==='):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
462
                return action, lines, True
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
463
            elif self._next_line is None or self._next_line.startswith('#'):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
464
                return action, lines, False
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
465
466
            if first:
467
                first = False
468
            else:
469
                lines.append(line)
470
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
471
        return action, lines, False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
472
            
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
473
    def _read_patches(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
474
        do_continue = True
475
        while do_continue:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
476
            action, lines, do_continue = self._read_one_patch()
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
477
            if action is not None:
478
                self.info.actions.append((action, lines))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
479
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
480
    def _read_revision(self, revision_id):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
481
        """Revision entries have extra information associated.
482
        """
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
483
        rev_info = RevisionInfo(revision_id)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
484
        start = '#    '
485
        for line in self._next():
486
            key,value = self._read_next_entry(line, indent=4)
487
            #if key is None:
488
            #    continue
489
            if hasattr(rev_info, key):
490
                if getattr(rev_info, key) is None:
491
                    setattr(rev_info, key, value)
492
                else:
493
                    raise MalformedHeader('Duplicated Key: %s' % key)
494
            else:
495
                # What do we do with a key we don't recognize
496
                raise MalformedHeader('Unknown Key: %s' % key)
497
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
498
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
499
                break
500
501
        self.info.revisions.append(rev_info)
502
503
    def _read_footer(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
504
        """Read the rest of the meta information.
505
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
506
        :param first_line:  The previous step iterates past what it
507
                            can handle. That extra line is given here.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
508
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
509
        for line in self._next():
510
            self._handle_next(line)
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
511
            if self._next_line is None or self._next_line.startswith('#'):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
512
                break
513
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
514
    def _update_tree(self, cset_tree):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
515
        """This fills out a ChangesetTree based on the information
516
        that was read in.
517
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
518
        :param cset_tree: A ChangesetTree to update with the new information.
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
519
        """
520
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
521
        def get_rev_id(info, file_id, kind):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
522
            if info is not None:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
523
                if not info.starts_with('last-changed:'):
524
                    raise BzrError("Last changed revision should start with 'last-changed:'"
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
525
                        ': %r' % info)
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
526
                revision_id = decode(info[13:])
527
            elif cset_tree._last_changed_revision_ids.has_key(file_id):
528
                return cset_tree._last_changed_revision_ids[file_id]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
529
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
530
                revision_id = self.info.revisions[-1].revision_id
531
            cset_tree.note_last_changed(file_id, revision_id)
532
            return revision_id
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
533
534
        def renamed(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
535
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
536
            if len(info) < 2:
537
                raise BzrError('renamed action lines need both a from and to'
538
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
539
            old_path = info[0]
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
540
            if info[1].startswith('=> '):
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
541
                new_path = info[1][3:]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
542
            else:
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
543
                new_path = info[1]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
544
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
545
            file_id = cset_tree.path2id(old_path)
0.5.103 by John Arbash Meinel
Updated to having a changeset specific message.
546
            # print '%r %r %r' % (old_path, new_path, file_id)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
547
            if len(info) > 2:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
548
                revision = get_rev_id(info[2], file_id, kind)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
549
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
550
                revision = get_rev_id(None, file_id, kind)
551
            cset_tree.note_rename(old_path, new_path)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
552
            if lines:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
553
                cset_tree.note_patch(new_path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
554
555
        def removed(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
556
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
557
            if len(info) > 1:
558
                # TODO: in the future we might allow file ids to be
559
                # given for removed entries
560
                raise BzrError('removed action lines should only have the path'
561
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
562
            path = info[0]
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
563
            cset_tree.note_deletion(path)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
564
565
        def added(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
566
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
567
            if len(info) <= 1:
568
                raise BzrError('add action lines require the path and file id'
569
                        ': %r' % extra)
570
            elif len(info) > 3:
571
                raise BzrError('add action lines have fewer than 3 entries.'
572
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
573
            path = info[0]
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
574
            if not info[1].startswith('file-id:'):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
575
                raise BzrError('The file-id should follow the path for an add'
576
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
577
            file_id = info[1][8:]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
578
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
579
            cset_tree.note_id(file_id, path, kind)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
580
            if kind == 'directory':
581
                return
582
            if len(info) > 2:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
583
                revision = get_rev_id(info[2], file_id, kind)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
584
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
585
                revision = get_rev_id(None, file_id, kind)
586
            cset_tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
587
588
        def modified(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
589
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
590
            if len(info) < 1:
591
                raise BzrError('modified action lines have at least'
592
                        'the path in them: %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
593
            path = info[0]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
594
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
595
            file_id = cset_tree.path2id(path)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
596
            if len(info) > 1:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
597
                revision = get_rev_id(info[1], file_id, kind)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
598
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
599
                revision = get_rev_id(None, file_id, kind)
600
            cset_tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
601
            
602
603
        valid_actions = {
604
            'renamed':renamed,
605
            'removed':removed,
606
            'added':added,
607
            'modified':modified
608
        }
609
        for action_line, lines in self.info.actions:
610
            first = action_line.find(' ')
611
            if first == -1:
612
                raise BzrError('Bogus action line'
613
                        ' (no opening space): %r' % action_line)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
614
            second = action_line.find(' ', first+1)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
615
            if second == -1:
616
                raise BzrError('Bogus action line'
617
                        ' (missing second space): %r' % action_line)
618
            action = action_line[:first]
619
            kind = action_line[first+1:second]
620
            if kind not in ('file', 'directory'):
621
                raise BzrError('Bogus action line'
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
622
                        ' (invalid object kind %r): %r' % (kind, action_line))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
623
            extra = action_line[second+1:]
624
625
            if action not in valid_actions:
626
                raise BzrError('Bogus action line'
627
                        ' (unrecognized action): %r' % action_line)
628
            valid_actions[action](kind, extra, lines)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
629
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
630
def read_changeset(from_file, branch):
631
    """Read in a changeset from a iterable object (such as a file object)
632
633
    :param from_file: A file-like object to read the changeset information.
634
    :param branch: This will be used to build the changeset tree, it needs
635
                   to contain the base of the changeset. (Which you probably
636
                   won't know about until after the changeset is parsed.)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
637
    """
638
    cr = ChangesetReader(from_file)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
639
    return cr.get_changeset(branch)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
640
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
641
class ChangesetTree(Tree):
642
    def __init__(self, base_tree):
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
643
        self.base_tree = base_tree
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
644
        self._renamed = {} # Mapping from old_path => new_path
645
        self._renamed_r = {} # new_path => old_path
646
        self._new_id = {} # new_path => new_id
647
        self._new_id_r = {} # new_id => new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
648
        self._kinds = {} # new_id => kind
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
649
        self._last_changed_revision_ids = {} # new_id => revision_id
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
650
        self.patches = {}
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
651
        self.deleted = []
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
652
        self.contents_by_id = True
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
653
        self._inventory = None
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
654
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
655
    def __str__(self):
656
        return pprint.pformat(self.__dict__)
657
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
658
    def note_rename(self, old_path, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
659
        """A file/directory has been renamed from old_path => new_path"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
660
        assert not self._renamed.has_key(old_path)
661
        assert not self._renamed_r.has_key(new_path)
662
        self._renamed[new_path] = old_path
663
        self._renamed_r[old_path] = new_path
664
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
665
    def note_id(self, new_id, new_path, kind='file'):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
666
        """Files that don't exist in base need a new id."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
667
        self._new_id[new_path] = new_id
668
        self._new_id_r[new_id] = new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
669
        self._kinds[new_id] = kind
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
670
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
671
    def note_last_changed(self, file_id, revision_id):
672
        if (self._last_changed_revision_ids.has_key(file_id)
673
                and self._last_changed_revision_ids[file_id] != revision_id):
674
            raise BzrError('Mismatched last-changed revision for file_id {%s}'
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
675
                    ': %s != %s' % (file_id,
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
676
                                    self._last_changed_revision_ids[file_id],
677
                                    revision_id))
678
        self._last_changed_revision_ids[file_id] = revision_id
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
679
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
680
    def note_patch(self, new_path, patch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
681
        """There is a patch for a given filename."""
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
682
        self.patches[new_path] = patch
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
683
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
684
    def note_deletion(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
685
        """The file at old_path has been deleted."""
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
686
        self.deleted.append(old_path)
687
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
688
    def old_path(self, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
689
        """Get the old_path (path in the base_tree) for the file at new_path"""
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
690
        assert new_path[:1] not in ('\\', '/')
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
691
        old_path = self._renamed.get(new_path)
692
        if old_path is not None:
693
            return old_path
694
        dirname,basename = os.path.split(new_path)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
695
        # dirname is not '' doesn't work, because
696
        # dirname may be a unicode entry, and is
697
        # requires the objects to be identical
698
        if dirname != '':
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
699
            old_dir = self.old_path(dirname)
700
            if old_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
701
                old_path = None
702
            else:
703
                old_path = os.path.join(old_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
704
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
705
            old_path = new_path
706
        #If the new path wasn't in renamed, the old one shouldn't be in
707
        #renamed_r
708
        if self._renamed_r.has_key(old_path):
709
            return None
710
        return old_path 
711
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
712
    def new_path(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
713
        """Get the new_path (path in the target_tree) for the file at old_path
714
        in the base tree.
715
        """
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
716
        assert old_path[:1] not in ('\\', '/')
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
717
        new_path = self._renamed_r.get(old_path)
718
        if new_path is not None:
719
            return new_path
720
        if self._renamed.has_key(new_path):
721
            return None
722
        dirname,basename = os.path.split(old_path)
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
723
        if dirname != '':
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
724
            new_dir = self.new_path(dirname)
725
            if new_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
726
                new_path = None
727
            else:
728
                new_path = os.path.join(new_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
729
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
730
            new_path = old_path
731
        #If the old path wasn't in renamed, the new one shouldn't be in
732
        #renamed_r
733
        if self._renamed.has_key(new_path):
734
            return None
735
        return new_path 
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
736
737
    def path2id(self, path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
738
        """Return the id of the file present at path in the target tree."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
739
        file_id = self._new_id.get(path)
740
        if file_id is not None:
741
            return file_id
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
742
        old_path = self.old_path(path)
743
        if old_path is None:
744
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
745
        if old_path in self.deleted:
746
            return None
0.5.66 by John Arbash Meinel
Refactoring, moving test code into test (switching back to assert is None)
747
        if hasattr(self.base_tree, 'path2id'):
748
            return self.base_tree.path2id(old_path)
749
        else:
750
            return self.base_tree.inventory.path2id(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
751
752
    def id2path(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
753
        """Return the new path in the target tree of the file with id file_id"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
754
        path = self._new_id_r.get(file_id)
755
        if path is not None:
756
            return path
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
757
        old_path = self.base_tree.id2path(file_id)
758
        if old_path is None:
759
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
760
        if old_path in self.deleted:
761
            return None
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
762
        return self.new_path(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
763
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
764
    def old_contents_id(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
765
        """Return the id in the base_tree for the given file_id,
766
        or None if the file did not exist in base.
767
768
        FIXME:  Something doesn't seem right here. It seems like this function
769
                should always either return None or file_id. Even if
770
                you are doing the by-path lookup, you are doing a
771
                id2path lookup, just to do the reverse path2id lookup.
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
772
773
        Notice that you're doing the path2id on a different tree!
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
774
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
775
        if self.contents_by_id:
776
            if self.base_tree.has_id(file_id):
777
                return file_id
778
            else:
779
                return None
780
        new_path = self.id2path(file_id)
781
        return self.base_tree.path2id(new_path)
782
        
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
783
    def get_file(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
784
        """Return a file-like object containing the new contents of the
785
        file given by file_id.
786
787
        TODO:   It might be nice if this actually generated an entry
788
                in the text-store, so that the file contents would
789
                then be cached.
790
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
791
        base_id = self.old_contents_id(file_id)
0.5.50 by aaron.bentley at utoronto
Evaluate patches against file paths, not file ids
792
        if base_id is not None:
793
            patch_original = self.base_tree.get_file(base_id)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
794
        else:
795
            patch_original = None
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
796
        file_patch = self.patches.get(self.id2path(file_id))
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
797
        if file_patch is None:
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
798
            return patch_original
0.5.94 by Aaron Bentley
Switched to native patch application, added tests for terminating newlines
799
800
        assert not file_patch.startswith('\\'), \
801
            'Malformed patch for %s, %r' % (file_id, file_patch)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
802
        return patched_file(file_patch, patch_original)
803
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
804
    def get_kind(self, file_id):
805
        if file_id in self._kinds:
806
            return self._kinds[file_id]
807
        return self.base_tree.inventory[file_id].kind
808
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
809
    def get_last_changed(self, file_id):
810
        if file_id in self._last_changed_revision_ids:
811
            return self._last_changed_revision_ids[file_id]
812
        return self.base_tree.inventory[file_id].revision
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
813
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
814
    def get_size_and_sha1(self, file_id):
815
        """Return the size and sha1 hash of the given file id.
816
        If the file was not locally modified, this is extracted
817
        from the base_tree. Rather than re-reading the file.
818
        """
819
        new_path = self.id2path(file_id)
820
        if new_path is None:
821
            return None, None
822
        if new_path not in self.patches:
823
            # If the entry does not have a patch, then the
824
            # contents must be the same as in the base_tree
825
            ie = self.base_tree.inventory[file_id]
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
826
            if ie.text_size is None:
827
                return ie.text_size, ie.text_sha1
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
828
            return int(ie.text_size), ie.text_sha1
0.5.94 by Aaron Bentley
Switched to native patch application, added tests for terminating newlines
829
        fileobj = self.get_file(file_id)
830
        content = fileobj.read()
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
831
        return len(content), sha_string(content)
832
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
833
834
    def _get_inventory(self):
835
        """Build up the inventory entry for the ChangesetTree.
836
837
        This need to be called before ever accessing self.inventory
838
        """
839
        from os.path import dirname, basename
840
841
        assert self.base_tree is not None
842
        base_inv = self.base_tree.inventory
843
        root_id = base_inv.root.file_id
844
        try:
845
            # New inventories have a unique root_id
846
            inv = Inventory(root_id)
847
        except TypeError:
848
            inv = Inventory()
849
850
        def add_entry(file_id):
851
            path = self.id2path(file_id)
852
            if path is None:
853
                return
854
            parent_path = dirname(path)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
855
            if parent_path == '':
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
856
                parent_id = root_id
857
            else:
858
                parent_id = self.path2id(parent_path)
859
860
            kind = self.get_kind(file_id)
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
861
            revision_id = self.get_last_changed(file_id)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
862
863
            name = basename(path)
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
864
            ie = InventoryEntry(file_id, name, kind, parent_id)
865
            ie.revision = revision_id
866
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
867
            if kind == 'directory':
868
                ie.text_size, ie.text_sha1 = None, None
869
            else:
870
                ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
871
            if (ie.text_size is None) and (kind != 'directory'):
872
                raise BzrError('Got a text_size of None for file_id %r' % file_id)
873
            inv.add(ie)
874
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
875
        sorted_entries = self.sorted_path_id()
876
        for path, file_id in sorted_entries:
877
            if file_id == inv.root.file_id:
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
878
                continue
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
879
            add_entry(file_id)
880
881
        return inv
882
883
    # Have to overload the inherited inventory property
884
    # because _get_inventory is only called in the parent.
885
    # Reading the docs, property() objects do not use
886
    # overloading, they use the function as it was defined
887
    # at that instant
888
    inventory = property(_get_inventory)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
889
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
890
    def __iter__(self):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
891
        for path, entry in self.inventory.iter_entries():
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
892
            yield entry.file_id
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
893
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
894
    def sorted_path_id(self):
895
        paths = []
896
        for result in self._new_id.iteritems():
897
            paths.append(result)
898
        for id in self.base_tree:
899
            path = self.id2path(id)
900
            if path is None:
901
                continue
902
            paths.append((path, id))
903
        paths.sort()
904
        return paths
905
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
906
def patched_file(file_patch, original):
0.5.94 by Aaron Bentley
Switched to native patch application, added tests for terminating newlines
907
    """Produce a file-like object with the patched version of a text"""
908
    from patches import iter_patched
909
    from iterablefile import IterableFile
910
    if file_patch == "":
911
        return IterableFile(())
912
    return IterableFile(iter_patched(original, file_patch.splitlines(True)))
913