/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
1
#!/usr/bin/env python
2
"""\
3
Read in a changeset output, and process it into a Changeset object.
4
"""
5
6
import bzrlib, bzrlib.changeset
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
7
import pprint
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
8
import common
9
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
10
from bzrlib.trace import mutter
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
11
from bzrlib.errors import BzrError
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
12
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
13
class BadChangeset(Exception): pass
14
class MalformedHeader(BadChangeset): pass
15
class MalformedPatches(BadChangeset): pass
16
class MalformedFooter(BadChangeset): pass
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
17
0.5.11 by John Arbash Meinel
Working on properly representing renames.
18
def _unescape(name):
19
    """Now we want to find the filename effected.
20
    Unfortunately the filename is written out as
21
    repr(filename), which means that it surrounds
22
    the name with quotes which may be single or double
23
    (single is preferred unless there is a single quote in
24
    the filename). And some characters will be escaped.
25
26
    TODO:   There has to be some pythonic way of undo-ing the
27
            representation of a string rather than using eval.
28
    """
29
    delimiter = name[0]
30
    if name[-1] != delimiter:
31
        raise BadChangeset('Could not properly parse the'
32
                ' filename: %r' % name)
33
    # We need to handle escaped hexadecimals too.
34
    return name[1:-1].replace('\"', '"').replace("\'", "'")
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
35
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
36
class RevisionInfo(object):
37
    """Gets filled out for each revision object that is read.
38
    """
39
    def __init__(self, rev_id):
40
        self.rev_id = rev_id
41
        self.sha1 = None
42
        self.committer = None
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
43
        self.date = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
44
        self.timestamp = None
45
        self.timezone = None
46
        self.inventory_id = None
47
        self.inventory_sha1 = None
48
49
        self.parents = None
50
        self.message = None
51
52
    def __str__(self):
53
        return pprint.pformat(self.__dict__)
54
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
55
    def as_revision(self):
56
        from bzrlib.revision import Revision, RevisionReference
57
        rev = Revision(revision_id=self.rev_id,
58
            committer=self.committer,
59
            timestamp=float(self.timestamp),
60
            timezone=int(self.timezone),
61
            inventory_id=self.inventory_id,
62
            inventory_sha1=self.inventory_sha1,
63
            message='\n'.join(self.message))
64
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
65
        if self.parents:
66
            for parent in self.parents:
67
                rev_id, sha1 = parent.split('\t')
68
                rev.parents.append(RevisionReference(rev_id, sha1))
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
69
70
        return rev
71
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
72
class ChangesetInfo(object):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
73
    """This contains the meta information. Stuff that allows you to
74
    recreate the revision or inventory XML.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
75
    """
76
    def __init__(self):
77
        self.committer = None
78
        self.date = None
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
79
        self.message = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
80
        self.base = None
81
        self.base_sha1 = None
82
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
83
        # A list of RevisionInfo objects
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
84
        self.revisions = []
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
85
86
        self.actions = []
87
88
        # The next entries are created during complete_info() and
89
        # other post-read functions.
90
91
        # A list of real Revision objects
92
        self.real_revisions = []
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
93
        self.text_ids = {} # file_id => text_id
94
95
        self.timestamp = None
96
        self.timezone = None
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
97
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
98
    def __str__(self):
99
        return pprint.pformat(self.__dict__)
100
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
101
    def complete_info(self):
102
        """This makes sure that all information is properly
103
        split up, based on the assumptions that can be made
104
        when information is missing.
105
        """
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
106
        # Put in all of the guessable information.
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
107
        if not self.timestamp and self.date:
108
            self.timestamp, self.timezone = common.unpack_highres_date(self.date)
109
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
110
        self.real_revisions = []
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
111
        for rev in self.revisions:
0.5.60 by John Arbash Meinel
read_changeset now parses the date: subheader of revisions correctly.
112
            if rev.timestamp is None:
113
                if rev.date is not None:
114
                    rev.timestamp, rev.timezone = \
115
                            common.unpack_highres_date(rev.date)
116
                else:
117
                    rev.timestamp = self.timestamp
118
                    rev.timezone = self.timezone
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
119
            if rev.message is None and self.message:
120
                rev.message = self.message
121
            if rev.committer is None and self.committer:
122
                rev.committer = self.committer
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
123
            if rev.inventory_id is None:
124
                rev.inventory_id = rev.rev_id
125
            self.real_revisions.append(rev.as_revision())
126
127
        if self.base is None:
128
            # When we don't have a base, then the real base
129
            # is the first parent of the first revision listed
130
            rev = self.real_revisions[0]
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
131
            if len(rev.parents) == 0:
132
                # There is no base listed, and
133
                # the lowest revision doesn't have a parent
134
                # so this is probably against the empty tree
135
                # and thus base truly is None
136
                self.base = None
137
                self.base_sha1 = None
138
            else:
139
                self.base = rev.parents[0].revision_id
140
                # In general, if self.base is None, self.base_sha1 should
141
                # also be None
142
                if self.base_sha1 is not None:
143
                    assert self.base_sha1 == rev.parents[0].revision_sha1
144
                self.base_sha1 = rev.parents[0].revision_sha1
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
145
0.5.67 by John Arbash Meinel
Working on apply_changeset
146
    def _get_target(self):
147
        if len(self.real_revisions) > 0:
148
            return self.real_revisions[-1].revision_id
149
        elif len(self.revisions) > 0:
150
            return self.revisions[-1].rev_id
151
        return None
152
153
    target = property(_get_target, doc='The target revision id')
154
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
155
class ChangesetReader(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
156
    """This class reads in a changeset from a file, and returns
157
    a Changeset object, which can then be applied against a tree.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
158
    """
159
    def __init__(self, from_file):
160
        """Read in the changeset from the file.
161
162
        :param from_file: A file-like object (must have iterator support).
163
        """
164
        object.__init__(self)
165
        self.from_file = from_file
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
166
        self._next_line = None
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
167
        
168
        self.info = ChangesetInfo()
169
        # We put the actual inventory ids in the footer, so that the patch
170
        # is easier to read for humans.
171
        # Unfortunately, that means we need to read everything before we
172
        # can create a proper changeset.
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
173
        self._read()
174
        self._validate()
175
176
    def _read(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
177
        self._read_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
178
        self._read_patches()
179
        self._read_footer()
180
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
181
    def _validate(self):
182
        """Make sure that the information read in makes sense
183
        and passes appropriate checksums.
184
        """
185
        # Fill in all the missing blanks for the revisions
186
        # and generate the real_revisions list.
187
        self.info.complete_info()
188
        self._validate_revisions()
189
190
    def _validate_revisions(self):
191
        """Make sure all revision entries match their checksum."""
192
        from bzrlib.xml import pack_xml
193
        from cStringIO import StringIO
194
        from bzrlib.osutils import sha_file
195
196
        # This is a mapping from each revision id to it's sha hash
197
        rev_to_sha1 = {}
198
199
        for rev, rev_info in zip(self.info.real_revisions, self.info.revisions):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
200
            assert rev.revision_id == rev_info.rev_id
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
201
            sio = StringIO()
202
            pack_xml(rev, sio)
203
            sio.seek(0)
204
            sha1 = sha_file(sio)
205
            if sha1 != rev_info.sha1:
206
                raise BzrError('Revision checksum mismatch.'
207
                    ' For rev_id {%s} supplied sha1 (%s) != measured (%s)'
208
                    % (rev.revision_id, rev_info.sha1, sha1))
209
            if rev_to_sha1.has_key(rev.revision_id):
210
                raise BzrError('Revision {%s} given twice in the list'
211
                        % (rev.revision_id))
212
            rev_to_sha1[rev.revision_id] = sha1
213
214
        # Now that we've checked all the sha1 sums, we can make sure that
215
        # at least for the small list we have, all of the references are
216
        # valid.
217
        for rev in self.info.real_revisions:
218
            for parent in rev.parents:
219
                if parent.revision_id in rev_to_sha1:
220
                    if parent.revision_sha1 != rev_to_sha1[parent.revision_id]:
221
                        raise BzrError('Parent revision checksum mismatch.'
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
222
                                ' A parent was referenced with an'
223
                                ' incorrect checksum'
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
224
                                ': {%r} %s != %s' % (parent.revision_id,
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
225
                                            parent.revision_sha1,
226
                                            rev_to_sha1[parent.revision_id]))
227
228
    def _validate_references_from_branch(self, branch):
229
        """Now that we have a branch which should have some of the
230
        revisions we care about, go through and validate all of them
231
        that we can.
232
        """
233
        rev_to_sha = {}
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
234
        inv_to_sha = {}
235
        def add_sha(d, rev_id, sha1):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
236
            if rev_id is None:
237
                if sha1 is not None:
238
                    raise BzrError('A Null revision should always'
239
                        'have a null sha1 hash')
240
                return
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
241
            if rev_id in d:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
242
                # This really should have been validated as part
243
                # of _validate_revisions but lets do it again
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
244
                if sha1 != d[rev_id]:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
245
                    raise BzrError('** Revision %r referenced with 2 different'
246
                            ' sha hashes %s != %s' % (rev_id,
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
247
                                sha1, d[rev_id]))
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
248
            else:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
249
                d[rev_id] = sha1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
250
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
251
        add_sha(rev_to_sha, self.info.base, self.info.base_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
252
        # All of the contained revisions were checked
253
        # in _validate_revisions
254
        checked = {}
255
        for rev_info in self.info.revisions:
256
            checked[rev_info.rev_id] = True
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
257
            add_sha(rev_to_sha, rev_info.rev_id, rev_info.sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
258
                
259
        for rev in self.info.real_revisions:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
260
            add_sha(inv_to_sha, rev_info.inventory_id, rev_info.inventory_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
261
            for parent in rev.parents:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
262
                add_sha(rev_to_sha, parent.revision_id, parent.revision_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
263
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
264
        count = 0
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
265
        missing = {}
266
        for rev_id, sha1 in rev_to_sha.iteritems():
267
            if rev_id in branch.revision_store:
268
                local_sha1 = branch.get_revision_sha1(rev_id)
269
                if sha1 != local_sha1:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
270
                    raise BzrError('sha1 mismatch. For revision id {%s}' 
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
271
                            'local: %s, cset: %s' % (rev_id, local_sha1, sha1))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
272
                else:
273
                    count += 1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
274
            elif rev_id not in checked:
275
                missing[rev_id] = sha1
276
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
277
        for inv_id, sha1 in inv_to_sha.iteritems():
278
            if inv_id in branch.inventory_store:
279
                local_sha1 = branch.get_inventory_sha1(inv_id)
280
                if sha1 != local_sha1:
281
                    raise BzrError('sha1 mismatch. For inventory id {%s}' 
282
                            'local: %s, cset: %s' % (inv_id, local_sha1, sha1))
283
                else:
284
                    count += 1
285
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
286
        if len(missing) > 0:
287
            # I don't know if this is an error yet
288
            from bzrlib.trace import warning
289
            warning('Not all revision hashes could be validated.'
290
                    ' Unable validate %d hashes' % len(missing))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
291
        mutter('Verified %d sha hashes for the changeset.' % count)
292
293
    def _create_inventory(self, tree):
294
        """Build up the inventory entry for the ChangesetTree.
295
296
        TODO: This sort of thing should probably end up part of
297
        ChangesetTree, but since it doesn't handle meta-information
298
        yet, we need to do it here. (We need the ChangesetInfo,
299
        specifically the text_ids)
300
        """
301
        from os.path import dirname, basename
302
        from bzrlib.inventory import Inventory, InventoryEntry, ROOT_ID
303
304
        # TODO: deal with trees having a unique ROOT_ID
305
        root_id = ROOT_ID
306
        inv = Inventory()
307
        for file_id in tree:
308
            if file_id == root_id:
309
                continue
310
            path = tree.id2path(file_id)
311
            parent_path = dirname(path)
312
            if path == '':
313
                parent_id = root_id
314
            else:
315
                parent_id = tree.path2id(parent_path)
316
317
            if self.info.text_ids.has_key(file_id):
318
                text_id = self.info.text_ids[file_id]
319
            else:
320
                # If we don't have the text_id in the local map
321
                # that means the file didn't exist in the changeset
322
                # so we just use the old text_id.
323
                text_id = tree.base_tree.inventory[file_id].text_id
324
            name = basename(path)
325
            kind = tree.get_kind(file_id)
326
            ie = InventoryEntry(file_id, name, kind, parent_id, text_id=text_id)
327
            ie.text_size, ie.text_sha1 = tree.get_size_and_sha1(file_id)
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
328
            if (ie.text_size is None) and (kind != 'directory'):
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
329
                raise BzrError('Got a text_size of None for file_id %r' % file_id)
330
            inv.add(ie)
331
        return inv
332
333
    def _validate_inventory(self, inv):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
334
        """At this point we should have generated the ChangesetTree,
335
        so build up an inventory, and make sure the hashes match.
336
        """
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
337
        from bzrlib.xml import pack_xml
338
        from cStringIO import StringIO
339
        from bzrlib.osutils import sha_file, pumpfile
340
341
        # Now we should have a complete inventory entry.
342
        sio = StringIO()
343
        pack_xml(inv, sio)
344
        sio.seek(0)
345
        sha1 = sha_file(sio)
346
        # Target revision is the last entry in the real_revisions list
347
        rev = self.info.real_revisions[-1]
348
        if sha1 != rev.inventory_sha1:
349
            raise BzrError('Inventory sha hash mismatch.')
350
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
351
        
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
352
    def get_info_tree_inv(self, branch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
353
        """Return the meta information, and a Changeset tree which can
354
        be used to populate the local stores and working tree, respectively.
355
        """
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
356
        self._validate_references_from_branch(branch)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
357
        tree = ChangesetTree(branch.revision_tree(self.info.base))
358
        self._update_tree(tree)
359
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
360
        inv = self._create_inventory(tree)
361
        self._validate_inventory(inv)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
362
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
363
        return self.info, tree, inv
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
364
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
365
    def _next(self):
366
        """yield the next line, but secretly
367
        keep 1 extra line for peeking.
368
        """
369
        for line in self.from_file:
370
            last = self._next_line
371
            self._next_line = line
372
            if last is not None:
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
373
                #mutter('yielding line: %r' % last)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
374
                yield last
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
375
        last = self._next_line
376
        self._next_line = None
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
377
        #mutter('yielding line: %r' % last)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
378
        yield last
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
379
380
    def _read_header(self):
381
        """Read the bzr header"""
382
        header = common.get_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
383
        found = False
384
        for line in self._next():
385
            if found:
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
386
                # not all mailers will keep trailing whitespace
387
                if line == '#\n':
388
                    line = '# \n'
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
389
                if (line[:2] != '# ' or line[-1:] != '\n'
390
                        or line[2:-1] != header[0]):
391
                    raise MalformedHeader('Found a header, but it'
392
                        ' was improperly formatted')
393
                header.pop(0) # We read this line.
394
                if not header:
395
                    break # We found everything.
396
            elif (line[:1] == '#' and line[-1:] == '\n'):
397
                line = line[1:-1].strip()
398
                if line[:len(common.header_str)] == common.header_str:
399
                    if line == header[0]:
400
                        found = True
401
                    else:
402
                        raise MalformedHeader('Found what looks like'
403
                                ' a header, but did not match')
404
                    header.pop(0)
405
        else:
406
            raise MalformedHeader('Did not find an opening header')
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
407
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
408
        for line in self._next():
409
            # The bzr header is terminated with a blank line
410
            # which does not start with '#'
411
            if line == '\n':
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
412
                break
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
413
            self._handle_next(line)
414
415
    def _read_next_entry(self, line, indent=1):
416
        """Read in a key-value pair
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
417
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
418
        if line[:1] != '#':
419
            raise MalformedHeader('Bzr header did not start with #')
420
        line = line[1:-1] # Remove the '#' and '\n'
421
        if line[:indent] == ' '*indent:
422
            line = line[indent:]
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
423
        if not line:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
424
            return None, None# Ignore blank lines
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
425
426
        loc = line.find(': ')
427
        if loc != -1:
428
            key = line[:loc]
429
            value = line[loc+2:]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
430
            if not value:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
431
                value = self._read_many(indent=indent+3)
432
        elif line[-1:] == ':':
433
            key = line[:-1]
434
            value = self._read_many(indent=indent+3)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
435
        else:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
436
            raise MalformedHeader('While looking for key: value pairs,'
437
                    ' did not find the colon %r' % (line))
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
438
439
        key = key.replace(' ', '_')
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
440
        #mutter('found %s: %s' % (key, value))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
441
        return key, value
442
443
    def _handle_next(self, line):
444
        key, value = self._read_next_entry(line, indent=1)
445
        if key is None:
446
            return
447
448
        if key == 'revision':
449
            self._read_revision(value)
450
        elif hasattr(self.info, key):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
451
            if getattr(self.info, key) is None:
452
                setattr(self.info, key, value)
453
            else:
454
                raise MalformedHeader('Duplicated Key: %s' % key)
455
        else:
456
            # What do we do with a key we don't recognize
457
            raise MalformedHeader('Unknown Key: %s' % key)
458
        
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
459
    def _read_many(self, indent):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
460
        """If a line ends with no entry, that means that it should be
461
        followed with multiple lines of values.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
462
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
463
        This detects the end of the list, because it will be a line that
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
464
        does not start properly indented.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
465
        """
466
        values = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
467
        start = '#' + (' '*indent)
468
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
469
        if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
470
            return values
471
472
        for line in self._next():
473
            values.append(line[len(start):-1])
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
474
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
475
                break
476
        return values
477
478
    def _read_one_patch(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
479
        """Read in one patch, return the complete patch, along with
480
        the next line.
481
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
482
        :return: action, lines, do_continue
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
483
        """
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
484
        #mutter('_read_one_patch: %r' % self._next_line)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
485
        # Peek and see if there are no patches
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
486
        if self._next_line is None or self._next_line[:1] == '#':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
487
            return None, [], False
488
489
        line = self._next().next()
490
        if line[:3] != '***':
491
            raise MalformedPatches('The first line of all patches'
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
492
                ' should be a bzr meta line "***"'
493
                ': %r' % line)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
494
        action = line[4:-1]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
495
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
496
        if self._next_line is None or self._next_line[:1] == '#':
497
            return action, [], False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
498
        lines = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
499
        for line in self._next():
500
            lines.append(line)
501
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
502
            if self._next_line is not None and self._next_line[:3] == '***':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
503
                return action, lines, True
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
504
            elif self._next_line is None or self._next_line[:1] == '#':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
505
                return action, lines, False
506
        return action, lines, False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
507
            
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
508
    def _read_patches(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
509
        do_continue = True
510
        while do_continue:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
511
            action, lines, do_continue = self._read_one_patch()
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
512
            if action is not None:
513
                self.info.actions.append((action, lines))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
514
515
    def _read_revision(self, rev_id):
516
        """Revision entries have extra information associated.
517
        """
518
        rev_info = RevisionInfo(rev_id)
519
        start = '#    '
520
        for line in self._next():
521
            key,value = self._read_next_entry(line, indent=4)
522
            #if key is None:
523
            #    continue
524
            if hasattr(rev_info, key):
525
                if getattr(rev_info, key) is None:
526
                    setattr(rev_info, key, value)
527
                else:
528
                    raise MalformedHeader('Duplicated Key: %s' % key)
529
            else:
530
                # What do we do with a key we don't recognize
531
                raise MalformedHeader('Unknown Key: %s' % key)
532
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
533
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
534
                break
535
536
        self.info.revisions.append(rev_info)
537
538
    def _read_footer(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
539
        """Read the rest of the meta information.
540
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
541
        :param first_line:  The previous step iterates past what it
542
                            can handle. That extra line is given here.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
543
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
544
        for line in self._next():
545
            self._handle_next(line)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
546
            if self._next_line is None or self._next_line[:1] != '#':
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
547
                break
548
549
    def _update_tree(self, tree):
550
        """This fills out a ChangesetTree based on the information
551
        that was read in.
552
553
        :param tree: A ChangesetTree to update with the new information.
554
        """
555
        from common import decode
556
557
        def get_text_id(info, file_id):
558
            if info is not None:
559
                if info[:8] != 'text-id:':
560
                    raise BzrError("Text ids should be prefixed with 'text-id:'"
561
                        ': %r' % info)
562
                text_id = decode(info[8:])
563
            elif self.info.text_ids.has_key(file_id):
564
                return self.info.text_ids[file_id]
565
            else:
566
                # If text_id was not explicitly supplied
567
                # then it should be whatever we would guess it to be
568
                # based on the base revision, and what we know about
569
                # the target revision
570
                text_id = common.guess_text_id(tree.base_tree, 
571
                        file_id, self.info.base, True)
572
            if (self.info.text_ids.has_key(file_id)
573
                    and self.info.text_ids[file_id] != text_id):
574
                raise BzrError('Mismatched text_ids for file_id {%s}'
575
                        ': %s != %s' % (file_id,
576
                                        self.info.text_ids[file_id],
577
                                        text_id))
578
            # The Info object makes more sense for where
579
            # to store something like text_id, since it is
580
            # what will be used to generate stored inventory
581
            # entries.
582
            # The problem is that we are parsing the
583
            # ChangesetTree right now, we really modifying
584
            # the ChangesetInfo object
585
            self.info.text_ids[file_id] = text_id
586
            return text_id
587
588
        def renamed(kind, extra, lines):
589
            info = extra.split('\t')
590
            if len(info) < 2:
591
                raise BzrError('renamed action lines need both a from and to'
592
                        ': %r' % extra)
593
            old_path = decode(info[0])
594
            if info[1][:3] == '=> ':
595
                new_path = decode(info[1][3:])
596
            else:
597
                new_path = decode(info[1][3:])
598
599
            file_id = tree.path2id(new_path)
600
            if len(info) > 2:
601
                text_id = get_text_id(info[2], file_id)
602
            else:
603
                text_id = get_text_id(None, file_id)
604
            tree.note_rename(old_path, new_path)
605
            if lines:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
606
                tree.note_patch(new_path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
607
608
        def removed(kind, extra, lines):
609
            info = extra.split('\t')
610
            if len(info) > 1:
611
                # TODO: in the future we might allow file ids to be
612
                # given for removed entries
613
                raise BzrError('removed action lines should only have the path'
614
                        ': %r' % extra)
615
            path = decode(info[0])
616
            tree.note_deletion(path)
617
618
        def added(kind, extra, lines):
619
            info = extra.split('\t')
620
            if len(info) <= 1:
621
                raise BzrError('add action lines require the path and file id'
622
                        ': %r' % extra)
623
            elif len(info) > 3:
624
                raise BzrError('add action lines have fewer than 3 entries.'
625
                        ': %r' % extra)
626
            path = decode(info[0])
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
627
            if info[1][:8] != 'file-id:':
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
628
                raise BzrError('The file-id should follow the path for an add'
629
                        ': %r' % extra)
630
            file_id = decode(info[1][8:])
631
632
            if len(info) > 2:
633
                text_id = get_text_id(info[2], file_id)
634
            else:
635
                text_id = get_text_id(None, file_id)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
636
            tree.note_id(file_id, path, kind)
637
            tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
638
639
        def modified(kind, extra, lines):
640
            info = extra.split('\t')
641
            if len(info) < 1:
642
                raise BzrError('modified action lines have at least'
643
                        'the path in them: %r' % extra)
644
            path = decode(info[0])
645
646
            file_id = tree.path2id(path)
647
            if len(info) > 1:
648
                text_id = get_text_id(info[1], file_id)
649
            else:
650
                text_id = get_text_id(None, file_id)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
651
            tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
652
            
653
654
        valid_actions = {
655
            'renamed':renamed,
656
            'removed':removed,
657
            'added':added,
658
            'modified':modified
659
        }
660
        for action_line, lines in self.info.actions:
661
            first = action_line.find(' ')
662
            if first == -1:
663
                raise BzrError('Bogus action line'
664
                        ' (no opening space): %r' % action_line)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
665
            second = action_line.find(' ', first+1)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
666
            if second == -1:
667
                raise BzrError('Bogus action line'
668
                        ' (missing second space): %r' % action_line)
669
            action = action_line[:first]
670
            kind = action_line[first+1:second]
671
            if kind not in ('file', 'directory'):
672
                raise BzrError('Bogus action line'
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
673
                        ' (invalid object kind %r): %r' % (kind, action_line))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
674
            extra = action_line[second+1:]
675
676
            if action not in valid_actions:
677
                raise BzrError('Bogus action line'
678
                        ' (unrecognized action): %r' % action_line)
679
            valid_actions[action](kind, extra, lines)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
680
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
681
def read_changeset(from_file, branch):
682
    """Read in a changeset from a iterable object (such as a file object)
683
684
    :param from_file: A file-like object to read the changeset information.
685
    :param branch: This will be used to build the changeset tree, it needs
686
                   to contain the base of the changeset. (Which you probably
687
                   won't know about until after the changeset is parsed.)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
688
    """
689
    cr = ChangesetReader(from_file)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
690
    return cr.get_info_tree_inv(branch)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
691
692
class ChangesetTree:
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
693
    def __init__(self, base_tree=None):
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
694
        self.base_tree = base_tree
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
695
        self._renamed = {} # Mapping from old_path => new_path
696
        self._renamed_r = {} # new_path => old_path
697
        self._new_id = {} # new_path => new_id
698
        self._new_id_r = {} # new_id => new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
699
        self._kinds = {} # new_id => kind
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
700
        self.patches = {}
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
701
        self.deleted = []
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
702
        self.contents_by_id = True
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
703
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
704
    def __str__(self):
705
        return pprint.pformat(self.__dict__)
706
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
707
    def note_rename(self, old_path, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
708
        """A file/directory has been renamed from old_path => new_path"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
709
        assert not self._renamed.has_key(old_path)
710
        assert not self._renamed_r.has_key(new_path)
711
        self._renamed[new_path] = old_path
712
        self._renamed_r[old_path] = new_path
713
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
714
    def note_id(self, new_id, new_path, kind='file'):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
715
        """Files that don't exist in base need a new id."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
716
        self._new_id[new_path] = new_id
717
        self._new_id_r[new_id] = new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
718
        self._kinds[new_id] = kind
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
719
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
720
    def note_patch(self, new_path, patch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
721
        """There is a patch for a given filename."""
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
722
        self.patches[new_path] = patch
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
723
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
724
    def note_deletion(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
725
        """The file at old_path has been deleted."""
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
726
        self.deleted.append(old_path)
727
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
728
    def old_path(self, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
729
        """Get the old_path (path in the base_tree) for the file at new_path"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
730
        import os.path
731
        old_path = self._renamed.get(new_path)
732
        if old_path is not None:
733
            return old_path
734
        dirname,basename = os.path.split(new_path)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
735
        # dirname is not '' doesn't work, because
736
        # dirname may be a unicode entry, and is
737
        # requires the objects to be identical
738
        if dirname != '':
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
739
            old_dir = self.old_path(dirname)
740
            if old_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
741
                old_path = None
742
            else:
743
                old_path = os.path.join(old_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
744
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
745
            old_path = new_path
746
        #If the new path wasn't in renamed, the old one shouldn't be in
747
        #renamed_r
748
        if self._renamed_r.has_key(old_path):
749
            return None
750
        return old_path 
751
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
752
    def new_path(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
753
        """Get the new_path (path in the target_tree) for the file at old_path
754
        in the base tree.
755
        """
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
756
        import os.path
757
        new_path = self._renamed_r.get(old_path)
758
        if new_path is not None:
759
            return new_path
760
        if self._renamed.has_key(new_path):
761
            return None
762
        dirname,basename = os.path.split(old_path)
763
        if dirname is not '':
764
            new_dir = self.new_path(dirname)
765
            if new_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
766
                new_path = None
767
            else:
768
                new_path = os.path.join(new_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
769
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
770
            new_path = old_path
771
        #If the old path wasn't in renamed, the new one shouldn't be in
772
        #renamed_r
773
        if self._renamed.has_key(new_path):
774
            return None
775
        return new_path 
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
776
777
    def path2id(self, path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
778
        """Return the id of the file present at path in the target tree."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
779
        file_id = self._new_id.get(path)
780
        if file_id is not None:
781
            return file_id
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
782
        old_path = self.old_path(path)
783
        if old_path is None:
784
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
785
        if old_path in self.deleted:
786
            return None
0.5.66 by John Arbash Meinel
Refactoring, moving test code into test (switching back to assert is None)
787
        if hasattr(self.base_tree, 'path2id'):
788
            return self.base_tree.path2id(old_path)
789
        else:
790
            return self.base_tree.inventory.path2id(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
791
792
    def id2path(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
793
        """Return the new path in the target tree of the file with id file_id"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
794
        path = self._new_id_r.get(file_id)
795
        if path is not None:
796
            return path
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
797
        old_path = self.base_tree.id2path(file_id)
798
        if old_path is None:
799
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
800
        if old_path in self.deleted:
801
            return None
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
802
        return self.new_path(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
803
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
804
    def old_contents_id(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
805
        """Return the id in the base_tree for the given file_id,
806
        or None if the file did not exist in base.
807
808
        FIXME:  Something doesn't seem right here. It seems like this function
809
                should always either return None or file_id. Even if
810
                you are doing the by-path lookup, you are doing a
811
                id2path lookup, just to do the reverse path2id lookup.
812
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
813
        if self.contents_by_id:
814
            if self.base_tree.has_id(file_id):
815
                return file_id
816
            else:
817
                return None
818
        new_path = self.id2path(file_id)
819
        return self.base_tree.path2id(new_path)
820
        
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
821
    def get_file(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
822
        """Return a file-like object containing the new contents of the
823
        file given by file_id.
824
825
        TODO:   It might be nice if this actually generated an entry
826
                in the text-store, so that the file contents would
827
                then be cached.
828
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
829
        base_id = self.old_contents_id(file_id)
0.5.50 by aaron.bentley at utoronto
Evaluate patches against file paths, not file ids
830
        if base_id is not None:
831
            patch_original = self.base_tree.get_file(base_id)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
832
        else:
833
            patch_original = None
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
834
        file_patch = self.patches.get(self.id2path(file_id))
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
835
        if file_patch is None:
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
836
            return patch_original
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
837
        return patched_file(file_patch, patch_original)
838
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
839
    def get_kind(self, file_id):
840
        if file_id in self._kinds:
841
            return self._kinds[file_id]
842
        return self.base_tree.inventory[file_id].kind
843
844
    def get_size_and_sha1(self, file_id):
845
        """Return the size and sha1 hash of the given file id.
846
        If the file was not locally modified, this is extracted
847
        from the base_tree. Rather than re-reading the file.
848
        """
849
        from bzrlib.osutils import sha_string
850
851
        new_path = self.id2path(file_id)
852
        if new_path is None:
853
            return None, None
854
        if new_path not in self.patches:
855
            # If the entry does not have a patch, then the
856
            # contents must be the same as in the base_tree
857
            ie = self.base_tree.inventory[file_id]
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
858
            if ie.text_size is None:
859
                return ie.text_size, ie.text_sha1
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
860
            return int(ie.text_size), ie.text_sha1
861
        content = self.get_file(file_id).read()
862
        return len(content), sha_string(content)
863
864
        
865
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
866
    def __iter__(self):
867
        for file_id in self._new_id_r.iterkeys():
868
            yield file_id
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
869
        for path, entry in self.base_tree.inventory.iter_entries():
870
            if self.id2path(entry.file_id) is None:
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
871
                continue
0.5.69 by John Arbash Meinel
Applying patch from Robey Pointer to clean up apply_changeset.
872
            yield entry.file_id
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
873
874
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
875
def patched_file(file_patch, original):
876
    from bzrlib.patch import patch
877
    from tempfile import mkdtemp
878
    from shutil import rmtree
879
    from StringIO import StringIO
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
880
    from bzrlib.osutils import pumpfile
881
    import os.path
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
882
    temp_dir = mkdtemp()
883
    try:
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
884
        original_path = os.path.join(temp_dir, "originalfile")
885
        temp_original = file(original_path, "wb")
886
        if original is not None:
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
887
            pumpfile(original, temp_original)
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
888
        temp_original.close()
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
889
        patched_path = os.path.join(temp_dir, "patchfile")
0.5.47 by aaron.bentley at utoronto
Added safety check to patch call
890
        assert patch(file_patch, original_path, patched_path) == 0
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
891
        result = StringIO()
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
892
        temp_patched = file(patched_path, "rb")
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
893
        pumpfile(temp_patched, result)
894
        temp_patched.close()
895
        result.seek(0,0)
896
897
    finally:
898
        rmtree(temp_dir)
899
900
    return result
901