/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
1
#!/usr/bin/env python
2
"""\
3
Read in a changeset output, and process it into a Changeset object.
4
"""
5
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
6
import os
7
import pprint
8
from cStringIO import StringIO
9
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
10
from bzrlib.tree import Tree
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
11
from bzrlib.trace import mutter, warning
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
12
from bzrlib.errors import BzrError
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
13
from bzrlib.xml5 import serializer_v5
14
from bzrlib.osutils import sha_file, sha_string
15
from bzrlib.revision import Revision
0.5.116 by John Arbash Meinel
Fixed a bug based on the new InventoryEntry separation.
16
from bzrlib.inventory import (Inventory, InventoryEntry,
17
                              InventoryDirectory, InventoryFile,
18
                              InventoryLink)
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
19
20
from common import decode, get_header, header_str
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
21
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
22
class BadChangeset(Exception): pass
23
class MalformedHeader(BadChangeset): pass
24
class MalformedPatches(BadChangeset): pass
25
class MalformedFooter(BadChangeset): pass
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
26
0.5.11 by John Arbash Meinel
Working on properly representing renames.
27
def _unescape(name):
28
    """Now we want to find the filename effected.
29
    Unfortunately the filename is written out as
30
    repr(filename), which means that it surrounds
31
    the name with quotes which may be single or double
32
    (single is preferred unless there is a single quote in
33
    the filename). And some characters will be escaped.
34
35
    TODO:   There has to be some pythonic way of undo-ing the
36
            representation of a string rather than using eval.
37
    """
38
    delimiter = name[0]
39
    if name[-1] != delimiter:
40
        raise BadChangeset('Could not properly parse the'
41
                ' filename: %r' % name)
42
    # We need to handle escaped hexadecimals too.
43
    return name[1:-1].replace('\"', '"').replace("\'", "'")
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
44
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
45
class RevisionInfo(object):
46
    """Gets filled out for each revision object that is read.
47
    """
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
48
    def __init__(self, revision_id):
49
        self.revision_id = revision_id
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
50
        self.sha1 = None
51
        self.committer = None
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
52
        self.date = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
53
        self.timestamp = None
54
        self.timezone = None
55
        self.inventory_sha1 = None
56
57
        self.parents = None
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
58
        self.parent_sha1s = {}
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
59
        self.message = None
60
61
    def __str__(self):
62
        return pprint.pformat(self.__dict__)
63
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
64
    def as_revision(self):
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
65
        rev = Revision(revision_id=self.revision_id,
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
66
            committer=self.committer,
67
            timestamp=float(self.timestamp),
68
            timezone=int(self.timezone),
69
            inventory_sha1=self.inventory_sha1,
70
            message='\n'.join(self.message))
71
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
72
        if self.parents:
73
            for parent in self.parents:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
74
                revision_id, sha1 = parent.split()
75
                rev.parent_ids.append(revision_id)
76
                self.parent_sha1s[revision_id] = sha1
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
77
78
        return rev
79
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
80
class ChangesetInfo(object):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
81
    """This contains the meta information. Stuff that allows you to
82
    recreate the revision or inventory XML.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
83
    """
84
    def __init__(self):
85
        self.committer = None
86
        self.date = None
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
87
        self.message = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
88
        self.base = None
89
        self.base_sha1 = None
90
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
91
        # A list of RevisionInfo objects
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
92
        self.revisions = []
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
93
94
        self.actions = []
95
96
        # The next entries are created during complete_info() and
97
        # other post-read functions.
98
99
        # A list of real Revision objects
100
        self.real_revisions = []
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
101
102
        self.timestamp = None
103
        self.timezone = None
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
104
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
105
    def __str__(self):
106
        return pprint.pformat(self.__dict__)
107
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
108
    def complete_info(self):
109
        """This makes sure that all information is properly
110
        split up, based on the assumptions that can be made
111
        when information is missing.
112
        """
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
113
        from common import unpack_highres_date
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
114
        # Put in all of the guessable information.
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
115
        if not self.timestamp and self.date:
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
116
            self.timestamp, self.timezone = unpack_highres_date(self.date)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
117
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
118
        self.real_revisions = []
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
119
        for rev in self.revisions:
0.5.60 by John Arbash Meinel
read_changeset now parses the date: subheader of revisions correctly.
120
            if rev.timestamp is None:
121
                if rev.date is not None:
122
                    rev.timestamp, rev.timezone = \
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
123
                            unpack_highres_date(rev.date)
0.5.60 by John Arbash Meinel
read_changeset now parses the date: subheader of revisions correctly.
124
                else:
125
                    rev.timestamp = self.timestamp
126
                    rev.timezone = self.timezone
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
127
            if rev.message is None and self.message:
128
                rev.message = self.message
129
            if rev.committer is None and self.committer:
130
                rev.committer = self.committer
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
131
            self.real_revisions.append(rev.as_revision())
132
133
        if self.base is None:
134
            # When we don't have a base, then the real base
135
            # is the first parent of the first revision listed
136
            rev = self.real_revisions[0]
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
137
            if len(rev.parent_ids) == 0:
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
138
                # There is no base listed, and
139
                # the lowest revision doesn't have a parent
140
                # so this is probably against the empty tree
141
                # and thus base truly is None
142
                self.base = None
143
                self.base_sha1 = None
144
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
145
                self.base = rev.parent_ids[0]
0.5.117 by John Arbash Meinel
Almost there. Just need to track down a few remaining bugs.
146
                self.base_sha1 = self.revisions[0].parent_sha1s[self.base]
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
147
0.5.67 by John Arbash Meinel
Working on apply_changeset
148
    def _get_target(self):
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
149
        """Return the target revision."""
0.5.67 by John Arbash Meinel
Working on apply_changeset
150
        if len(self.real_revisions) > 0:
151
            return self.real_revisions[-1].revision_id
152
        elif len(self.revisions) > 0:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
153
            return self.revisions[-1].revision_id
0.5.67 by John Arbash Meinel
Working on apply_changeset
154
        return None
155
156
    target = property(_get_target, doc='The target revision id')
157
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
158
class ChangesetReader(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
159
    """This class reads in a changeset from a file, and returns
160
    a Changeset object, which can then be applied against a tree.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
161
    """
162
    def __init__(self, from_file):
163
        """Read in the changeset from the file.
164
165
        :param from_file: A file-like object (must have iterator support).
166
        """
167
        object.__init__(self)
168
        self.from_file = from_file
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
169
        self._next_line = None
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
170
        
171
        self.info = ChangesetInfo()
172
        # We put the actual inventory ids in the footer, so that the patch
173
        # is easier to read for humans.
174
        # Unfortunately, that means we need to read everything before we
175
        # can create a proper changeset.
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
176
        self._read()
177
        self._validate()
178
179
    def _read(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
180
        self._read_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
181
        self._read_patches()
182
        self._read_footer()
183
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
184
    def _validate(self):
185
        """Make sure that the information read in makes sense
186
        and passes appropriate checksums.
187
        """
188
        # Fill in all the missing blanks for the revisions
189
        # and generate the real_revisions list.
190
        self.info.complete_info()
191
        self._validate_revisions()
192
193
    def _validate_revisions(self):
194
        """Make sure all revision entries match their checksum."""
195
196
        # This is a mapping from each revision id to it's sha hash
197
        rev_to_sha1 = {}
198
199
        for rev, rev_info in zip(self.info.real_revisions, self.info.revisions):
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
200
            assert rev.revision_id == rev_info.revision_id
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
201
            sio = StringIO()
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
202
            serializer_v5.write_revision(rev, sio)
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
203
            sio.seek(0)
204
            sha1 = sha_file(sio)
205
            if sha1 != rev_info.sha1:
206
                raise BzrError('Revision checksum mismatch.'
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
207
                    ' For revision_id {%s} supplied sha1 (%s) != measured (%s)'
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
208
                    % (rev.revision_id, rev_info.sha1, sha1))
209
            if rev_to_sha1.has_key(rev.revision_id):
210
                raise BzrError('Revision {%s} given twice in the list'
211
                        % (rev.revision_id))
212
            rev_to_sha1[rev.revision_id] = sha1
213
214
        # Now that we've checked all the sha1 sums, we can make sure that
215
        # at least for the small list we have, all of the references are
216
        # valid.
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
217
        ## TODO: Bring this back
218
        ## for rev in self.info.real_revisions:
219
        ##     for p_id in rev.parent_ids:
220
        ##         if p_id in rev_to_sha1:
221
        ##             if parent.revision_sha1 != rev_to_sha1[p_id]:
222
        ##                 raise BzrError('Parent revision checksum mismatch.'
223
        ##                         ' A parent was referenced with an'
224
        ##                         ' incorrect checksum'
225
        ##                         ': {%r} %s != %s' % (parent.revision_id,
226
        ##                                     parent.revision_sha1,
227
        ##                                     rev_to_sha1[parent.revision_id]))
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
228
229
    def _validate_references_from_branch(self, branch):
230
        """Now that we have a branch which should have some of the
231
        revisions we care about, go through and validate all of them
232
        that we can.
233
        """
234
        rev_to_sha = {}
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
235
        inv_to_sha = {}
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
236
        def add_sha(d, revision_id, sha1):
237
            if revision_id is None:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
238
                if sha1 is not None:
239
                    raise BzrError('A Null revision should always'
240
                        'have a null sha1 hash')
241
                return
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
242
            if revision_id in d:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
243
                # This really should have been validated as part
244
                # of _validate_revisions but lets do it again
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
245
                if sha1 != d[revision_id]:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
246
                    raise BzrError('** Revision %r referenced with 2 different'
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
247
                            ' sha hashes %s != %s' % (revision_id,
248
                                sha1, d[revision_id]))
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
249
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
250
                d[revision_id] = sha1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
251
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
252
        add_sha(rev_to_sha, self.info.base, self.info.base_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
253
        # All of the contained revisions were checked
254
        # in _validate_revisions
255
        checked = {}
256
        for rev_info in self.info.revisions:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
257
            checked[rev_info.revision_id] = True
258
            add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
259
                
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
260
        for (rev, rev_info) in zip(self.info.real_revisions, self.info.revisions):
261
            add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
262
            for p_id, sha1 in rev_info.parent_sha1s.iteritems():
263
                add_sha(rev_to_sha, p_id, sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
264
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
265
        count = 0
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
266
        missing = {}
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
267
        for revision_id, sha1 in rev_to_sha.iteritems():
268
            if branch.has_revision(revision_id):
269
                local_sha1 = branch.get_revision_sha1(revision_id)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
270
                if sha1 != local_sha1:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
271
                    raise BzrError('sha1 mismatch. For revision id {%s}' 
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
272
                            'local: %s, cset: %s' % (revision_id, local_sha1, sha1))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
273
                else:
274
                    count += 1
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
275
            elif revision_id not in checked:
276
                missing[revision_id] = sha1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
277
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
278
        for inv_id, sha1 in inv_to_sha.iteritems():
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
279
            if branch.has_revision(inv_id):
280
                # TODO: Currently branch.get_inventory_sha1() just returns the value
281
                # that is stored in the revision text. Which is *really* bogus, because
282
                # that means we aren't validating the actual text, just that we wrote 
283
                # and read the string. But for now, what the hell.
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
284
                local_sha1 = branch.get_inventory_sha1(inv_id)
285
                if sha1 != local_sha1:
286
                    raise BzrError('sha1 mismatch. For inventory id {%s}' 
287
                            'local: %s, cset: %s' % (inv_id, local_sha1, sha1))
288
                else:
289
                    count += 1
290
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
291
        if len(missing) > 0:
292
            # I don't know if this is an error yet
293
            warning('Not all revision hashes could be validated.'
294
                    ' Unable validate %d hashes' % len(missing))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
295
        mutter('Verified %d sha hashes for the changeset.' % count)
296
297
    def _validate_inventory(self, inv):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
298
        """At this point we should have generated the ChangesetTree,
299
        so build up an inventory, and make sure the hashes match.
300
        """
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
301
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
302
        assert inv is not None
303
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
304
        # Now we should have a complete inventory entry.
0.5.117 by John Arbash Meinel
Almost there. Just need to track down a few remaining bugs.
305
        s = serializer_v5.write_inventory_to_string(inv)
306
        sha1 = sha_string(s)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
307
        # Target revision is the last entry in the real_revisions list
308
        rev = self.info.real_revisions[-1]
309
        if sha1 != rev.inventory_sha1:
0.5.117 by John Arbash Meinel
Almost there. Just need to track down a few remaining bugs.
310
            open(',,bogus-inv', 'wb').write(s)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
311
            raise BzrError('Inventory sha hash mismatch.')
312
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
313
        
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
314
    def get_changeset(self, branch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
315
        """Return the meta information, and a Changeset tree which can
316
        be used to populate the local stores and working tree, respectively.
317
        """
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
318
        self._validate_references_from_branch(branch)
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
319
        cset_tree = ChangesetTree(branch.revision_tree(self.info.base))
320
        self._update_tree(cset_tree)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
321
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
322
        inv = cset_tree.inventory
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
323
        self._validate_inventory(inv)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
324
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
325
        return self.info, cset_tree
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
326
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
327
    def _next(self):
328
        """yield the next line, but secretly
329
        keep 1 extra line for peeking.
330
        """
331
        for line in self.from_file:
332
            last = self._next_line
333
            self._next_line = line
334
            if last is not None:
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
335
                #mutter('yielding line: %r' % last)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
336
                yield last
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
337
        last = self._next_line
338
        self._next_line = None
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
339
        #mutter('yielding line: %r' % last)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
340
        yield last
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
341
342
    def _read_header(self):
343
        """Read the bzr header"""
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
344
        header = get_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
345
        found = False
346
        for line in self._next():
347
            if found:
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
348
                # not all mailers will keep trailing whitespace
349
                if line == '#\n':
350
                    line = '# \n'
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
351
                if (not line.startswith('# ') or not line.endswith('\n')
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
352
                        or decode(line[2:-1]) != header[0]):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
353
                    raise MalformedHeader('Found a header, but it'
354
                        ' was improperly formatted')
355
                header.pop(0) # We read this line.
356
                if not header:
357
                    break # We found everything.
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
358
            elif (line.startswith('#') and line.endswith('\n')):
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
359
                line = decode(line[1:-1].strip())
360
                if line[:len(header_str)] == header_str:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
361
                    if line == header[0]:
362
                        found = True
363
                    else:
364
                        raise MalformedHeader('Found what looks like'
365
                                ' a header, but did not match')
366
                    header.pop(0)
367
        else:
368
            raise MalformedHeader('Did not find an opening header')
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
369
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
370
        for line in self._next():
371
            # The bzr header is terminated with a blank line
372
            # which does not start with '#'
373
            if line == '\n':
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
374
                break
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
375
            self._handle_next(line)
376
377
    def _read_next_entry(self, line, indent=1):
378
        """Read in a key-value pair
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
379
        """
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
380
        if not line.startswith('#'):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
381
            raise MalformedHeader('Bzr header did not start with #')
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
382
        line = decode(line[1:-1]) # Remove the '#' and '\n'
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
383
        if line[:indent] == ' '*indent:
384
            line = line[indent:]
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
385
        if not line:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
386
            return None, None# Ignore blank lines
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
387
388
        loc = line.find(': ')
389
        if loc != -1:
390
            key = line[:loc]
391
            value = line[loc+2:]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
392
            if not value:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
393
                value = self._read_many(indent=indent+3)
394
        elif line[-1:] == ':':
395
            key = line[:-1]
396
            value = self._read_many(indent=indent+3)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
397
        else:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
398
            raise MalformedHeader('While looking for key: value pairs,'
399
                    ' did not find the colon %r' % (line))
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
400
401
        key = key.replace(' ', '_')
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
402
        #mutter('found %s: %s' % (key, value))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
403
        return key, value
404
405
    def _handle_next(self, line):
406
        key, value = self._read_next_entry(line, indent=1)
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
407
        mutter('_handle_next %r => %r' % (key, value))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
408
        if key is None:
409
            return
410
411
        if key == 'revision':
412
            self._read_revision(value)
413
        elif hasattr(self.info, key):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
414
            if getattr(self.info, key) is None:
415
                setattr(self.info, key, value)
416
            else:
417
                raise MalformedHeader('Duplicated Key: %s' % key)
418
        else:
419
            # What do we do with a key we don't recognize
420
            raise MalformedHeader('Unknown Key: %s' % key)
421
        
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
422
    def _read_many(self, indent):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
423
        """If a line ends with no entry, that means that it should be
424
        followed with multiple lines of values.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
425
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
426
        This detects the end of the list, because it will be a line that
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
427
        does not start properly indented.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
428
        """
429
        values = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
430
        start = '#' + (' '*indent)
431
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
432
        if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
433
            return values
434
435
        for line in self._next():
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
436
            values.append(decode(line[len(start):-1]))
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
437
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
438
                break
439
        return values
440
441
    def _read_one_patch(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
442
        """Read in one patch, return the complete patch, along with
443
        the next line.
444
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
445
        :return: action, lines, do_continue
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
446
        """
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
447
        #mutter('_read_one_patch: %r' % self._next_line)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
448
        # Peek and see if there are no patches
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
449
        if self._next_line is None or self._next_line.startswith('#'):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
450
            return None, [], False
451
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
452
        first = True
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
453
        lines = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
454
        for line in self._next():
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
455
            if first:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
456
                if not line.startswith('==='):
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
457
                    raise MalformedPatches('The first line of all patches'
0.5.100 by John Arbash Meinel
Switching from *** to ===
458
                        ' should be a bzr meta line "==="'
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
459
                        ': %r' % line)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
460
                action = decode(line[4:-1])
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
461
            if self._next_line is not None and self._next_line.startswith('==='):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
462
                return action, lines, True
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
463
            elif self._next_line is None or self._next_line.startswith('#'):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
464
                return action, lines, False
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
465
466
            if first:
467
                first = False
468
            else:
469
                lines.append(line)
470
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
471
        return action, lines, False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
472
            
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
473
    def _read_patches(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
474
        do_continue = True
475
        while do_continue:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
476
            action, lines, do_continue = self._read_one_patch()
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
477
            if action is not None:
478
                self.info.actions.append((action, lines))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
479
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
480
    def _read_revision(self, revision_id):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
481
        """Revision entries have extra information associated.
482
        """
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
483
        rev_info = RevisionInfo(revision_id)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
484
        start = '#    '
485
        for line in self._next():
486
            key,value = self._read_next_entry(line, indent=4)
487
            #if key is None:
488
            #    continue
489
            if hasattr(rev_info, key):
490
                if getattr(rev_info, key) is None:
491
                    setattr(rev_info, key, value)
492
                else:
493
                    raise MalformedHeader('Duplicated Key: %s' % key)
494
            else:
495
                # What do we do with a key we don't recognize
496
                raise MalformedHeader('Unknown Key: %s' % key)
497
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
498
            if self._next_line is None or not self._next_line.startswith(start):
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
499
                break
500
501
        self.info.revisions.append(rev_info)
502
503
    def _read_footer(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
504
        """Read the rest of the meta information.
505
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
506
        :param first_line:  The previous step iterates past what it
507
                            can handle. That extra line is given here.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
508
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
509
        for line in self._next():
510
            self._handle_next(line)
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
511
            if self._next_line is None or not self._next_line.startswith('#'):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
512
                break
513
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
514
    def _update_tree(self, cset_tree):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
515
        """This fills out a ChangesetTree based on the information
516
        that was read in.
517
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
518
        :param cset_tree: A ChangesetTree to update with the new information.
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
519
        """
520
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
521
        def get_rev_id(info, file_id, kind):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
522
            if info is not None:
0.5.117 by John Arbash Meinel
Almost there. Just need to track down a few remaining bugs.
523
                if not info.startswith('last-changed:'):
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
524
                    raise BzrError("Last changed revision should start with 'last-changed:'"
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
525
                        ': %r' % info)
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
526
                revision_id = decode(info[13:])
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
527
            elif cset_tree._last_changed.has_key(file_id):
528
                return cset_tree._last_changed[file_id]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
529
            else:
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
530
                revision_id = self.info.target
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
531
            cset_tree.note_last_changed(file_id, revision_id)
532
            return revision_id
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
533
534
        def renamed(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
535
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
536
            if len(info) < 2:
537
                raise BzrError('renamed action lines need both a from and to'
538
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
539
            old_path = info[0]
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
540
            if info[1].startswith('=> '):
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
541
                new_path = info[1][3:]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
542
            else:
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
543
                new_path = info[1]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
544
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
545
            file_id = cset_tree.path2id(old_path)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
546
            if len(info) > 2:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
547
                revision = get_rev_id(info[2], file_id, kind)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
548
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
549
                revision = get_rev_id(None, file_id, kind)
550
            cset_tree.note_rename(old_path, new_path)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
551
            if lines:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
552
                cset_tree.note_patch(new_path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
553
554
        def removed(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
555
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
556
            if len(info) > 1:
557
                # TODO: in the future we might allow file ids to be
558
                # given for removed entries
559
                raise BzrError('removed action lines should only have the path'
560
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
561
            path = info[0]
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
562
            cset_tree.note_deletion(path)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
563
564
        def added(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
565
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
566
            if len(info) <= 1:
567
                raise BzrError('add action lines require the path and file id'
568
                        ': %r' % extra)
569
            elif len(info) > 3:
570
                raise BzrError('add action lines have fewer than 3 entries.'
571
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
572
            path = info[0]
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
573
            if not info[1].startswith('file-id:'):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
574
                raise BzrError('The file-id should follow the path for an add'
575
                        ': %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
576
            file_id = info[1][8:]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
577
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
578
            cset_tree.note_id(file_id, path, kind)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
579
            if len(info) > 2:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
580
                revision = get_rev_id(info[2], file_id, kind)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
581
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
582
                revision = get_rev_id(None, file_id, kind)
0.5.117 by John Arbash Meinel
Almost there. Just need to track down a few remaining bugs.
583
            if kind == 'directory':
584
                return
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
585
            cset_tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
586
587
        def modified(kind, extra, lines):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
588
            info = extra.split(' // ')
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
589
            if len(info) < 1:
590
                raise BzrError('modified action lines have at least'
591
                        'the path in them: %r' % extra)
0.5.87 by John Arbash Meinel
Handling international characters, added more test cases.
592
            path = info[0]
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
593
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
594
            file_id = cset_tree.path2id(path)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
595
            if len(info) > 1:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
596
                revision = get_rev_id(info[1], file_id, kind)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
597
            else:
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
598
                revision = get_rev_id(None, file_id, kind)
599
            cset_tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
600
            
601
602
        valid_actions = {
603
            'renamed':renamed,
604
            'removed':removed,
605
            'added':added,
606
            'modified':modified
607
        }
608
        for action_line, lines in self.info.actions:
609
            first = action_line.find(' ')
610
            if first == -1:
611
                raise BzrError('Bogus action line'
612
                        ' (no opening space): %r' % action_line)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
613
            second = action_line.find(' ', first+1)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
614
            if second == -1:
615
                raise BzrError('Bogus action line'
616
                        ' (missing second space): %r' % action_line)
617
            action = action_line[:first]
618
            kind = action_line[first+1:second]
619
            if kind not in ('file', 'directory'):
620
                raise BzrError('Bogus action line'
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
621
                        ' (invalid object kind %r): %r' % (kind, action_line))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
622
            extra = action_line[second+1:]
623
624
            if action not in valid_actions:
625
                raise BzrError('Bogus action line'
626
                        ' (unrecognized action): %r' % action_line)
627
            valid_actions[action](kind, extra, lines)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
628
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
629
def read_changeset(from_file, branch):
630
    """Read in a changeset from a iterable object (such as a file object)
631
632
    :param from_file: A file-like object to read the changeset information.
633
    :param branch: This will be used to build the changeset tree, it needs
634
                   to contain the base of the changeset. (Which you probably
635
                   won't know about until after the changeset is parsed.)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
636
    """
637
    cr = ChangesetReader(from_file)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
638
    return cr.get_changeset(branch)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
639
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
640
class ChangesetTree(Tree):
641
    def __init__(self, base_tree):
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
642
        self.base_tree = base_tree
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
643
        self._renamed = {} # Mapping from old_path => new_path
644
        self._renamed_r = {} # new_path => old_path
645
        self._new_id = {} # new_path => new_id
646
        self._new_id_r = {} # new_id => new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
647
        self._kinds = {} # new_id => kind
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
648
        self._last_changed = {} # new_id => revision_id
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
649
        self.patches = {}
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
650
        self.deleted = []
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
651
        self.contents_by_id = True
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
652
        self._inventory = None
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
653
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
654
    def __str__(self):
655
        return pprint.pformat(self.__dict__)
656
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
657
    def note_rename(self, old_path, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
658
        """A file/directory has been renamed from old_path => new_path"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
659
        assert not self._renamed.has_key(old_path)
660
        assert not self._renamed_r.has_key(new_path)
661
        self._renamed[new_path] = old_path
662
        self._renamed_r[old_path] = new_path
663
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
664
    def note_id(self, new_id, new_path, kind='file'):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
665
        """Files that don't exist in base need a new id."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
666
        self._new_id[new_path] = new_id
667
        self._new_id_r[new_id] = new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
668
        self._kinds[new_id] = kind
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
669
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
670
    def note_last_changed(self, file_id, revision_id):
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
671
        if (self._last_changed.has_key(file_id)
672
                and self._last_changed[file_id] != revision_id):
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
673
            raise BzrError('Mismatched last-changed revision for file_id {%s}'
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
674
                    ': %s != %s' % (file_id,
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
675
                                    self._last_changed[file_id],
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
676
                                    revision_id))
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
677
        self._last_changed[file_id] = revision_id
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
678
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
679
    def note_patch(self, new_path, patch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
680
        """There is a patch for a given filename."""
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
681
        self.patches[new_path] = patch
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
682
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
683
    def note_deletion(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
684
        """The file at old_path has been deleted."""
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
685
        self.deleted.append(old_path)
686
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
687
    def old_path(self, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
688
        """Get the old_path (path in the base_tree) for the file at new_path"""
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
689
        assert new_path[:1] not in ('\\', '/')
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
690
        old_path = self._renamed.get(new_path)
691
        if old_path is not None:
692
            return old_path
693
        dirname,basename = os.path.split(new_path)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
694
        # dirname is not '' doesn't work, because
695
        # dirname may be a unicode entry, and is
696
        # requires the objects to be identical
697
        if dirname != '':
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
698
            old_dir = self.old_path(dirname)
699
            if old_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
700
                old_path = None
701
            else:
702
                old_path = os.path.join(old_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
703
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
704
            old_path = new_path
705
        #If the new path wasn't in renamed, the old one shouldn't be in
706
        #renamed_r
707
        if self._renamed_r.has_key(old_path):
708
            return None
709
        return old_path 
710
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
711
    def new_path(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
712
        """Get the new_path (path in the target_tree) for the file at old_path
713
        in the base tree.
714
        """
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
715
        assert old_path[:1] not in ('\\', '/')
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
716
        new_path = self._renamed_r.get(old_path)
717
        if new_path is not None:
718
            return new_path
719
        if self._renamed.has_key(new_path):
720
            return None
721
        dirname,basename = os.path.split(old_path)
0.5.81 by John Arbash Meinel
Cleaning up from pychecker.
722
        if dirname != '':
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
723
            new_dir = self.new_path(dirname)
724
            if new_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
725
                new_path = None
726
            else:
727
                new_path = os.path.join(new_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
728
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
729
            new_path = old_path
730
        #If the old path wasn't in renamed, the new one shouldn't be in
731
        #renamed_r
732
        if self._renamed.has_key(new_path):
733
            return None
734
        return new_path 
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
735
736
    def path2id(self, path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
737
        """Return the id of the file present at path in the target tree."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
738
        file_id = self._new_id.get(path)
739
        if file_id is not None:
740
            return file_id
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
741
        old_path = self.old_path(path)
742
        if old_path is None:
743
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
744
        if old_path in self.deleted:
745
            return None
0.5.66 by John Arbash Meinel
Refactoring, moving test code into test (switching back to assert is None)
746
        if hasattr(self.base_tree, 'path2id'):
747
            return self.base_tree.path2id(old_path)
748
        else:
749
            return self.base_tree.inventory.path2id(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
750
751
    def id2path(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
752
        """Return the new path in the target tree of the file with id file_id"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
753
        path = self._new_id_r.get(file_id)
754
        if path is not None:
755
            return path
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
756
        old_path = self.base_tree.id2path(file_id)
757
        if old_path is None:
758
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
759
        if old_path in self.deleted:
760
            return None
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
761
        return self.new_path(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
762
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
763
    def old_contents_id(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
764
        """Return the id in the base_tree for the given file_id,
765
        or None if the file did not exist in base.
766
767
        FIXME:  Something doesn't seem right here. It seems like this function
768
                should always either return None or file_id. Even if
769
                you are doing the by-path lookup, you are doing a
770
                id2path lookup, just to do the reverse path2id lookup.
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
771
772
        Notice that you're doing the path2id on a different tree!
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
773
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
774
        if self.contents_by_id:
775
            if self.base_tree.has_id(file_id):
776
                return file_id
777
            else:
778
                return None
779
        new_path = self.id2path(file_id)
780
        return self.base_tree.path2id(new_path)
781
        
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
782
    def get_file(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
783
        """Return a file-like object containing the new contents of the
784
        file given by file_id.
785
786
        TODO:   It might be nice if this actually generated an entry
787
                in the text-store, so that the file contents would
788
                then be cached.
789
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
790
        base_id = self.old_contents_id(file_id)
0.5.50 by aaron.bentley at utoronto
Evaluate patches against file paths, not file ids
791
        if base_id is not None:
792
            patch_original = self.base_tree.get_file(base_id)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
793
        else:
794
            patch_original = None
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
795
        file_patch = self.patches.get(self.id2path(file_id))
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
796
        if file_patch is None:
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
797
            return patch_original
0.5.94 by Aaron Bentley
Switched to native patch application, added tests for terminating newlines
798
799
        assert not file_patch.startswith('\\'), \
800
            'Malformed patch for %s, %r' % (file_id, file_patch)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
801
        return patched_file(file_patch, patch_original)
802
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
803
    def get_kind(self, file_id):
804
        if file_id in self._kinds:
805
            return self._kinds[file_id]
806
        return self.base_tree.inventory[file_id].kind
807
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
808
    def get_last_changed(self, file_id):
0.5.118 by John Arbash Meinel
Got most of test_changeset to work. Still needs work for Aaron's test code.
809
        if file_id in self._last_changed:
810
            return self._last_changed[file_id]
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
811
        return self.base_tree.inventory[file_id].revision
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
812
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
813
    def get_size_and_sha1(self, file_id):
814
        """Return the size and sha1 hash of the given file id.
815
        If the file was not locally modified, this is extracted
816
        from the base_tree. Rather than re-reading the file.
817
        """
818
        new_path = self.id2path(file_id)
819
        if new_path is None:
820
            return None, None
821
        if new_path not in self.patches:
822
            # If the entry does not have a patch, then the
823
            # contents must be the same as in the base_tree
824
            ie = self.base_tree.inventory[file_id]
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
825
            if ie.text_size is None:
826
                return ie.text_size, ie.text_sha1
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
827
            return int(ie.text_size), ie.text_sha1
0.5.94 by Aaron Bentley
Switched to native patch application, added tests for terminating newlines
828
        fileobj = self.get_file(file_id)
829
        content = fileobj.read()
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
830
        return len(content), sha_string(content)
831
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
832
833
    def _get_inventory(self):
834
        """Build up the inventory entry for the ChangesetTree.
835
836
        This need to be called before ever accessing self.inventory
837
        """
838
        from os.path import dirname, basename
839
840
        assert self.base_tree is not None
841
        base_inv = self.base_tree.inventory
842
        root_id = base_inv.root.file_id
843
        try:
844
            # New inventories have a unique root_id
845
            inv = Inventory(root_id)
846
        except TypeError:
847
            inv = Inventory()
848
849
        def add_entry(file_id):
850
            path = self.id2path(file_id)
851
            if path is None:
852
                return
853
            parent_path = dirname(path)
0.5.116 by John Arbash Meinel
Fixed a bug based on the new InventoryEntry separation.
854
            if parent_path == u'':
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
855
                parent_id = root_id
856
            else:
857
                parent_id = self.path2id(parent_path)
858
859
            kind = self.get_kind(file_id)
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
860
            revision_id = self.get_last_changed(file_id)
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
861
862
            name = basename(path)
0.5.116 by John Arbash Meinel
Fixed a bug based on the new InventoryEntry separation.
863
            if kind == 'directory':
864
                ie = InventoryDirectory(file_id, name, parent_id)
865
            elif kind == 'file':
866
                ie = InventoryFile(file_id, name, parent_id)
867
            elif kind == 'symlink':
868
                ie = InventoryLink(file_id, name, parent_id)
0.5.115 by John Arbash Meinel
Getting closer to being able to read back the changesets, still broken, though.
869
            ie.revision = revision_id
870
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
871
            if kind == 'directory':
872
                ie.text_size, ie.text_sha1 = None, None
873
            else:
874
                ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
875
            if (ie.text_size is None) and (kind != 'directory'):
876
                raise BzrError('Got a text_size of None for file_id %r' % file_id)
877
            inv.add(ie)
878
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
879
        sorted_entries = self.sorted_path_id()
880
        for path, file_id in sorted_entries:
881
            if file_id == inv.root.file_id:
0.5.83 by John Arbash Meinel
Tests pass. Now ChangesetTree has it's own inventory.
882
                continue
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
883
            add_entry(file_id)
884
885
        return inv
886
887
    # Have to overload the inherited inventory property
888
    # because _get_inventory is only called in the parent.
889
    # Reading the docs, property() objects do not use
890
    # overloading, they use the function as it was defined
891
    # at that instant
892
    inventory = property(_get_inventory)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
893
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
894
    def __iter__(self):
0.5.82 by John Arbash Meinel
Lots of changes, changing separators, updating tests, updated ChangesetTree to include text_ids
895
        for path, entry in self.inventory.iter_entries():
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
896
            yield entry.file_id
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
897
0.6.1 by Aaron Bentley
Fleshed out MockTree, fixed all test failures
898
    def sorted_path_id(self):
899
        paths = []
900
        for result in self._new_id.iteritems():
901
            paths.append(result)
902
        for id in self.base_tree:
903
            path = self.id2path(id)
904
            if path is None:
905
                continue
906
            paths.append((path, id))
907
        paths.sort()
908
        return paths
909
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
910
def patched_file(file_patch, original):
0.5.94 by Aaron Bentley
Switched to native patch application, added tests for terminating newlines
911
    """Produce a file-like object with the patched version of a text"""
912
    from patches import iter_patched
913
    from iterablefile import IterableFile
914
    if file_patch == "":
915
        return IterableFile(())
916
    return IterableFile(iter_patched(original, file_patch.splitlines(True)))
917