/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
1
#!/usr/bin/env python
2
"""\
3
Read in a changeset output, and process it into a Changeset object.
4
"""
5
6
import bzrlib, bzrlib.changeset
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
7
import pprint
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
8
import common
9
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
10
from bzrlib.trace import mutter
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
11
from bzrlib.errors import BzrError
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
12
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
13
class BadChangeset(Exception): pass
14
class MalformedHeader(BadChangeset): pass
15
class MalformedPatches(BadChangeset): pass
16
class MalformedFooter(BadChangeset): pass
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
17
0.5.11 by John Arbash Meinel
Working on properly representing renames.
18
def _unescape(name):
19
    """Now we want to find the filename effected.
20
    Unfortunately the filename is written out as
21
    repr(filename), which means that it surrounds
22
    the name with quotes which may be single or double
23
    (single is preferred unless there is a single quote in
24
    the filename). And some characters will be escaped.
25
26
    TODO:   There has to be some pythonic way of undo-ing the
27
            representation of a string rather than using eval.
28
    """
29
    delimiter = name[0]
30
    if name[-1] != delimiter:
31
        raise BadChangeset('Could not properly parse the'
32
                ' filename: %r' % name)
33
    # We need to handle escaped hexadecimals too.
34
    return name[1:-1].replace('\"', '"').replace("\'", "'")
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
35
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
36
class RevisionInfo(object):
37
    """Gets filled out for each revision object that is read.
38
    """
39
    def __init__(self, rev_id):
40
        self.rev_id = rev_id
41
        self.sha1 = None
42
        self.committer = None
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
43
        self.date = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
44
        self.timestamp = None
45
        self.timezone = None
46
        self.inventory_id = None
47
        self.inventory_sha1 = None
48
49
        self.parents = None
50
        self.message = None
51
52
    def __str__(self):
53
        return pprint.pformat(self.__dict__)
54
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
55
    def as_revision(self):
56
        from bzrlib.revision import Revision, RevisionReference
57
        rev = Revision(revision_id=self.rev_id,
58
            committer=self.committer,
59
            timestamp=float(self.timestamp),
60
            timezone=int(self.timezone),
61
            inventory_id=self.inventory_id,
62
            inventory_sha1=self.inventory_sha1,
63
            message='\n'.join(self.message))
64
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
65
        if self.parents:
66
            for parent in self.parents:
67
                rev_id, sha1 = parent.split('\t')
68
                rev.parents.append(RevisionReference(rev_id, sha1))
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
69
70
        return rev
71
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
72
class ChangesetInfo(object):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
73
    """This contains the meta information. Stuff that allows you to
74
    recreate the revision or inventory XML.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
75
    """
76
    def __init__(self):
77
        self.committer = None
78
        self.date = None
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
79
        self.message = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
80
        self.base = None
81
        self.base_sha1 = None
82
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
83
        # A list of RevisionInfo objects
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
84
        self.revisions = []
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
85
86
        self.actions = []
87
88
        # The next entries are created during complete_info() and
89
        # other post-read functions.
90
91
        # A list of real Revision objects
92
        self.real_revisions = []
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
93
        self.text_ids = {} # file_id => text_id
94
95
        self.timestamp = None
96
        self.timezone = None
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
97
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
98
    def __str__(self):
99
        return pprint.pformat(self.__dict__)
100
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
101
    def complete_info(self):
102
        """This makes sure that all information is properly
103
        split up, based on the assumptions that can be made
104
        when information is missing.
105
        """
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
106
        # Put in all of the guessable information.
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
107
        if not self.timestamp and self.date:
108
            self.timestamp, self.timezone = common.unpack_highres_date(self.date)
109
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
110
        self.real_revisions = []
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
111
        for rev in self.revisions:
0.5.60 by John Arbash Meinel
read_changeset now parses the date: subheader of revisions correctly.
112
            if rev.timestamp is None:
113
                if rev.date is not None:
114
                    rev.timestamp, rev.timezone = \
115
                            common.unpack_highres_date(rev.date)
116
                else:
117
                    rev.timestamp = self.timestamp
118
                    rev.timezone = self.timezone
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
119
            if rev.message is None and self.message:
120
                rev.message = self.message
121
            if rev.committer is None and self.committer:
122
                rev.committer = self.committer
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
123
            if rev.inventory_id is None:
124
                rev.inventory_id = rev.rev_id
125
            self.real_revisions.append(rev.as_revision())
126
127
        if self.base is None:
128
            # When we don't have a base, then the real base
129
            # is the first parent of the first revision listed
130
            rev = self.real_revisions[0]
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
131
            if len(rev.parents) == 0:
132
                # There is no base listed, and
133
                # the lowest revision doesn't have a parent
134
                # so this is probably against the empty tree
135
                # and thus base truly is None
136
                self.base = None
137
                self.base_sha1 = None
138
            else:
139
                self.base = rev.parents[0].revision_id
140
                # In general, if self.base is None, self.base_sha1 should
141
                # also be None
142
                if self.base_sha1 is not None:
143
                    assert self.base_sha1 == rev.parents[0].revision_sha1
144
                self.base_sha1 = rev.parents[0].revision_sha1
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
145
0.5.67 by John Arbash Meinel
Working on apply_changeset
146
    def _get_target(self):
147
        if len(self.real_revisions) > 0:
148
            return self.real_revisions[-1].revision_id
149
        elif len(self.revisions) > 0:
150
            return self.revisions[-1].rev_id
151
        return None
152
153
    target = property(_get_target, doc='The target revision id')
154
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
155
class ChangesetReader(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
156
    """This class reads in a changeset from a file, and returns
157
    a Changeset object, which can then be applied against a tree.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
158
    """
159
    def __init__(self, from_file):
160
        """Read in the changeset from the file.
161
162
        :param from_file: A file-like object (must have iterator support).
163
        """
164
        object.__init__(self)
165
        self.from_file = from_file
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
166
        self._next_line = None
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
167
        
168
        self.info = ChangesetInfo()
169
        # We put the actual inventory ids in the footer, so that the patch
170
        # is easier to read for humans.
171
        # Unfortunately, that means we need to read everything before we
172
        # can create a proper changeset.
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
173
        self._read()
174
        self._validate()
175
176
    def _read(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
177
        self._read_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
178
        self._read_patches()
179
        self._read_footer()
180
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
181
    def _validate(self):
182
        """Make sure that the information read in makes sense
183
        and passes appropriate checksums.
184
        """
185
        # Fill in all the missing blanks for the revisions
186
        # and generate the real_revisions list.
187
        self.info.complete_info()
188
        self._validate_revisions()
189
190
    def _validate_revisions(self):
191
        """Make sure all revision entries match their checksum."""
192
        from bzrlib.xml import pack_xml
193
        from cStringIO import StringIO
194
        from bzrlib.osutils import sha_file
195
196
        # This is a mapping from each revision id to it's sha hash
197
        rev_to_sha1 = {}
198
199
        for rev, rev_info in zip(self.info.real_revisions, self.info.revisions):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
200
            assert rev.revision_id == rev_info.rev_id
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
201
            sio = StringIO()
202
            pack_xml(rev, sio)
203
            sio.seek(0)
204
            sha1 = sha_file(sio)
205
            if sha1 != rev_info.sha1:
206
                raise BzrError('Revision checksum mismatch.'
207
                    ' For rev_id {%s} supplied sha1 (%s) != measured (%s)'
208
                    % (rev.revision_id, rev_info.sha1, sha1))
209
            if rev_to_sha1.has_key(rev.revision_id):
210
                raise BzrError('Revision {%s} given twice in the list'
211
                        % (rev.revision_id))
212
            rev_to_sha1[rev.revision_id] = sha1
213
214
        # Now that we've checked all the sha1 sums, we can make sure that
215
        # at least for the small list we have, all of the references are
216
        # valid.
217
        for rev in self.info.real_revisions:
218
            for parent in rev.parents:
219
                if parent.revision_id in rev_to_sha1:
220
                    if parent.revision_sha1 != rev_to_sha1[parent.revision_id]:
221
                        raise BzrError('Parent revision checksum mismatch.'
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
222
                                ' A parent was referenced with an'
223
                                ' incorrect checksum'
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
224
                                ': {%r} %s != %s' % (parent.revision_id,
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
225
                                            parent.revision_sha1,
226
                                            rev_to_sha1[parent.revision_id]))
227
228
    def _validate_references_from_branch(self, branch):
229
        """Now that we have a branch which should have some of the
230
        revisions we care about, go through and validate all of them
231
        that we can.
232
        """
233
        rev_to_sha = {}
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
234
        inv_to_sha = {}
235
        def add_sha(d, rev_id, sha1):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
236
            if rev_id is None:
237
                if sha1 is not None:
238
                    raise BzrError('A Null revision should always'
239
                        'have a null sha1 hash')
240
                return
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
241
            if rev_id in d:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
242
                # This really should have been validated as part
243
                # of _validate_revisions but lets do it again
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
244
                if sha1 != d[rev_id]:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
245
                    raise BzrError('** Revision %r referenced with 2 different'
246
                            ' sha hashes %s != %s' % (rev_id,
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
247
                                sha1, d[rev_id]))
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
248
            else:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
249
                d[rev_id] = sha1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
250
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
251
        add_sha(rev_to_sha, self.info.base, self.info.base_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
252
        # All of the contained revisions were checked
253
        # in _validate_revisions
254
        checked = {}
255
        for rev_info in self.info.revisions:
256
            checked[rev_info.rev_id] = True
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
257
            add_sha(rev_to_sha, rev_info.rev_id, rev_info.sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
258
                
259
        for rev in self.info.real_revisions:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
260
            add_sha(inv_to_sha, rev_info.inventory_id, rev_info.inventory_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
261
            for parent in rev.parents:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
262
                add_sha(rev_to_sha, parent.revision_id, parent.revision_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
263
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
264
        count = 0
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
265
        missing = {}
266
        for rev_id, sha1 in rev_to_sha.iteritems():
267
            if rev_id in branch.revision_store:
268
                local_sha1 = branch.get_revision_sha1(rev_id)
269
                if sha1 != local_sha1:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
270
                    raise BzrError('sha1 mismatch. For revision id {%s}' 
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
271
                            'local: %s, cset: %s' % (rev_id, local_sha1, sha1))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
272
                else:
273
                    count += 1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
274
            elif rev_id not in checked:
275
                missing[rev_id] = sha1
276
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
277
        for inv_id, sha1 in inv_to_sha.iteritems():
278
            if inv_id in branch.inventory_store:
279
                local_sha1 = branch.get_inventory_sha1(inv_id)
280
                if sha1 != local_sha1:
281
                    raise BzrError('sha1 mismatch. For inventory id {%s}' 
282
                            'local: %s, cset: %s' % (inv_id, local_sha1, sha1))
283
                else:
284
                    count += 1
285
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
286
        if len(missing) > 0:
287
            # I don't know if this is an error yet
288
            from bzrlib.trace import warning
289
            warning('Not all revision hashes could be validated.'
290
                    ' Unable validate %d hashes' % len(missing))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
291
        mutter('Verified %d sha hashes for the changeset.' % count)
292
293
    def _create_inventory(self, tree):
294
        """Build up the inventory entry for the ChangesetTree.
295
296
        TODO: This sort of thing should probably end up part of
297
        ChangesetTree, but since it doesn't handle meta-information
298
        yet, we need to do it here. (We need the ChangesetInfo,
299
        specifically the text_ids)
300
        """
301
        from os.path import dirname, basename
302
        from bzrlib.inventory import Inventory, InventoryEntry, ROOT_ID
303
304
        # TODO: deal with trees having a unique ROOT_ID
305
        root_id = ROOT_ID
306
        inv = Inventory()
307
        for file_id in tree:
308
            if file_id == root_id:
309
                continue
310
            path = tree.id2path(file_id)
311
            parent_path = dirname(path)
312
            if path == '':
313
                parent_id = root_id
314
            else:
315
                parent_id = tree.path2id(parent_path)
316
317
            if self.info.text_ids.has_key(file_id):
318
                text_id = self.info.text_ids[file_id]
319
            else:
320
                # If we don't have the text_id in the local map
321
                # that means the file didn't exist in the changeset
322
                # so we just use the old text_id.
323
                text_id = tree.base_tree.inventory[file_id].text_id
324
            name = basename(path)
325
            kind = tree.get_kind(file_id)
326
            ie = InventoryEntry(file_id, name, kind, parent_id, text_id=text_id)
327
            ie.text_size, ie.text_sha1 = tree.get_size_and_sha1(file_id)
328
            if ie.text_size is None:
329
                raise BzrError('Got a text_size of None for file_id %r' % file_id)
330
            inv.add(ie)
331
        return inv
332
333
    def _validate_inventory(self, inv):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
334
        """At this point we should have generated the ChangesetTree,
335
        so build up an inventory, and make sure the hashes match.
336
        """
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
337
        from bzrlib.xml import pack_xml
338
        from cStringIO import StringIO
339
        from bzrlib.osutils import sha_file, pumpfile
340
341
        # Now we should have a complete inventory entry.
342
        sio = StringIO()
343
        pack_xml(inv, sio)
344
        sio.seek(0)
345
        sha1 = sha_file(sio)
346
        # Target revision is the last entry in the real_revisions list
347
        rev = self.info.real_revisions[-1]
348
        if sha1 != rev.inventory_sha1:
349
            raise BzrError('Inventory sha hash mismatch.')
350
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
351
        
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
352
    def get_info_tree_inv(self, branch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
353
        """Return the meta information, and a Changeset tree which can
354
        be used to populate the local stores and working tree, respectively.
355
        """
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
356
        self._validate_references_from_branch(branch)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
357
        tree = ChangesetTree(branch.revision_tree(self.info.base))
358
        self._update_tree(tree)
359
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
360
        inv = self._create_inventory(tree)
361
        self._validate_inventory(inv)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
362
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
363
        return self.info, tree, inv
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
364
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
365
    def _next(self):
366
        """yield the next line, but secretly
367
        keep 1 extra line for peeking.
368
        """
369
        for line in self.from_file:
370
            last = self._next_line
371
            self._next_line = line
372
            if last is not None:
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
373
                #mutter('yielding line: %r' % last)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
374
                yield last
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
375
        last = self._next_line
376
        self._next_line = None
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
377
        #mutter('yielding line: %r' % last)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
378
        yield last
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
379
380
    def _read_header(self):
381
        """Read the bzr header"""
382
        header = common.get_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
383
        found = False
384
        for line in self._next():
385
            if found:
386
                if (line[:2] != '# ' or line[-1:] != '\n'
387
                        or line[2:-1] != header[0]):
388
                    raise MalformedHeader('Found a header, but it'
389
                        ' was improperly formatted')
390
                header.pop(0) # We read this line.
391
                if not header:
392
                    break # We found everything.
393
            elif (line[:1] == '#' and line[-1:] == '\n'):
394
                line = line[1:-1].strip()
395
                if line[:len(common.header_str)] == common.header_str:
396
                    if line == header[0]:
397
                        found = True
398
                    else:
399
                        raise MalformedHeader('Found what looks like'
400
                                ' a header, but did not match')
401
                    header.pop(0)
402
        else:
403
            raise MalformedHeader('Did not find an opening header')
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
404
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
405
        for line in self._next():
406
            # The bzr header is terminated with a blank line
407
            # which does not start with '#'
408
            if line == '\n':
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
409
                break
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
410
            self._handle_next(line)
411
412
    def _read_next_entry(self, line, indent=1):
413
        """Read in a key-value pair
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
414
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
415
        if line[:1] != '#':
416
            raise MalformedHeader('Bzr header did not start with #')
417
        line = line[1:-1] # Remove the '#' and '\n'
418
        if line[:indent] == ' '*indent:
419
            line = line[indent:]
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
420
        if not line:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
421
            return None, None# Ignore blank lines
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
422
423
        loc = line.find(': ')
424
        if loc != -1:
425
            key = line[:loc]
426
            value = line[loc+2:]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
427
            if not value:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
428
                value = self._read_many(indent=indent+3)
429
        elif line[-1:] == ':':
430
            key = line[:-1]
431
            value = self._read_many(indent=indent+3)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
432
        else:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
433
            raise MalformedHeader('While looking for key: value pairs,'
434
                    ' did not find the colon %r' % (line))
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
435
436
        key = key.replace(' ', '_')
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
437
        #mutter('found %s: %s' % (key, value))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
438
        return key, value
439
440
    def _handle_next(self, line):
441
        key, value = self._read_next_entry(line, indent=1)
442
        if key is None:
443
            return
444
445
        if key == 'revision':
446
            self._read_revision(value)
447
        elif hasattr(self.info, key):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
448
            if getattr(self.info, key) is None:
449
                setattr(self.info, key, value)
450
            else:
451
                raise MalformedHeader('Duplicated Key: %s' % key)
452
        else:
453
            # What do we do with a key we don't recognize
454
            raise MalformedHeader('Unknown Key: %s' % key)
455
        
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
456
    def _read_many(self, indent):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
457
        """If a line ends with no entry, that means that it should be
458
        followed with multiple lines of values.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
459
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
460
        This detects the end of the list, because it will be a line that
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
461
        does not start properly indented.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
462
        """
463
        values = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
464
        start = '#' + (' '*indent)
465
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
466
        if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
467
            return values
468
469
        for line in self._next():
470
            values.append(line[len(start):-1])
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
471
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
472
                break
473
        return values
474
475
    def _read_one_patch(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
476
        """Read in one patch, return the complete patch, along with
477
        the next line.
478
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
479
        :return: action, lines, do_continue
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
480
        """
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
481
        #mutter('_read_one_patch: %r' % self._next_line)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
482
        # Peek and see if there are no patches
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
483
        if self._next_line is None or self._next_line[:1] == '#':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
484
            return None, [], False
485
486
        line = self._next().next()
487
        if line[:3] != '***':
488
            raise MalformedPatches('The first line of all patches'
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
489
                ' should be a bzr meta line "***"'
490
                ': %r' % line)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
491
        action = line[4:-1]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
492
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
493
        if self._next_line is None or self._next_line[:1] == '#':
494
            return action, [], False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
495
        lines = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
496
        for line in self._next():
497
            lines.append(line)
498
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
499
            if self._next_line is not None and self._next_line[:3] == '***':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
500
                return action, lines, True
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
501
            elif self._next_line is None or self._next_line[:1] == '#':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
502
                return action, lines, False
503
        return action, lines, False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
504
            
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
505
    def _read_patches(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
506
        do_continue = True
507
        while do_continue:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
508
            action, lines, do_continue = self._read_one_patch()
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
509
            if action is not None:
510
                self.info.actions.append((action, lines))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
511
512
    def _read_revision(self, rev_id):
513
        """Revision entries have extra information associated.
514
        """
515
        rev_info = RevisionInfo(rev_id)
516
        start = '#    '
517
        for line in self._next():
518
            key,value = self._read_next_entry(line, indent=4)
519
            #if key is None:
520
            #    continue
521
            if hasattr(rev_info, key):
522
                if getattr(rev_info, key) is None:
523
                    setattr(rev_info, key, value)
524
                else:
525
                    raise MalformedHeader('Duplicated Key: %s' % key)
526
            else:
527
                # What do we do with a key we don't recognize
528
                raise MalformedHeader('Unknown Key: %s' % key)
529
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
530
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
531
                break
532
533
        self.info.revisions.append(rev_info)
534
535
    def _read_footer(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
536
        """Read the rest of the meta information.
537
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
538
        :param first_line:  The previous step iterates past what it
539
                            can handle. That extra line is given here.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
540
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
541
        for line in self._next():
542
            self._handle_next(line)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
543
            if self._next_line is None or self._next_line[:1] != '#':
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
544
                break
545
546
    def _update_tree(self, tree):
547
        """This fills out a ChangesetTree based on the information
548
        that was read in.
549
550
        :param tree: A ChangesetTree to update with the new information.
551
        """
552
        from common import decode
553
554
        def get_text_id(info, file_id):
555
            if info is not None:
556
                if info[:8] != 'text-id:':
557
                    raise BzrError("Text ids should be prefixed with 'text-id:'"
558
                        ': %r' % info)
559
                text_id = decode(info[8:])
560
            elif self.info.text_ids.has_key(file_id):
561
                return self.info.text_ids[file_id]
562
            else:
563
                # If text_id was not explicitly supplied
564
                # then it should be whatever we would guess it to be
565
                # based on the base revision, and what we know about
566
                # the target revision
567
                text_id = common.guess_text_id(tree.base_tree, 
568
                        file_id, self.info.base, True)
569
            if (self.info.text_ids.has_key(file_id)
570
                    and self.info.text_ids[file_id] != text_id):
571
                raise BzrError('Mismatched text_ids for file_id {%s}'
572
                        ': %s != %s' % (file_id,
573
                                        self.info.text_ids[file_id],
574
                                        text_id))
575
            # The Info object makes more sense for where
576
            # to store something like text_id, since it is
577
            # what will be used to generate stored inventory
578
            # entries.
579
            # The problem is that we are parsing the
580
            # ChangesetTree right now, we really modifying
581
            # the ChangesetInfo object
582
            self.info.text_ids[file_id] = text_id
583
            return text_id
584
585
        def renamed(kind, extra, lines):
586
            info = extra.split('\t')
587
            if len(info) < 2:
588
                raise BzrError('renamed action lines need both a from and to'
589
                        ': %r' % extra)
590
            old_path = decode(info[0])
591
            if info[1][:3] == '=> ':
592
                new_path = decode(info[1][3:])
593
            else:
594
                new_path = decode(info[1][3:])
595
596
            file_id = tree.path2id(new_path)
597
            if len(info) > 2:
598
                text_id = get_text_id(info[2], file_id)
599
            else:
600
                text_id = get_text_id(None, file_id)
601
            tree.note_rename(old_path, new_path)
602
            if lines:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
603
                tree.note_patch(new_path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
604
605
        def removed(kind, extra, lines):
606
            info = extra.split('\t')
607
            if len(info) > 1:
608
                # TODO: in the future we might allow file ids to be
609
                # given for removed entries
610
                raise BzrError('removed action lines should only have the path'
611
                        ': %r' % extra)
612
            path = decode(info[0])
613
            tree.note_deletion(path)
614
615
        def added(kind, extra, lines):
616
            info = extra.split('\t')
617
            if len(info) <= 1:
618
                raise BzrError('add action lines require the path and file id'
619
                        ': %r' % extra)
620
            elif len(info) > 3:
621
                raise BzrError('add action lines have fewer than 3 entries.'
622
                        ': %r' % extra)
623
            path = decode(info[0])
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
624
            if info[1][:8] != 'file-id:':
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
625
                raise BzrError('The file-id should follow the path for an add'
626
                        ': %r' % extra)
627
            file_id = decode(info[1][8:])
628
629
            if len(info) > 2:
630
                text_id = get_text_id(info[2], file_id)
631
            else:
632
                text_id = get_text_id(None, file_id)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
633
            tree.note_id(file_id, path, kind)
634
            tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
635
636
        def modified(kind, extra, lines):
637
            info = extra.split('\t')
638
            if len(info) < 1:
639
                raise BzrError('modified action lines have at least'
640
                        'the path in them: %r' % extra)
641
            path = decode(info[0])
642
643
            file_id = tree.path2id(path)
644
            if len(info) > 1:
645
                text_id = get_text_id(info[1], file_id)
646
            else:
647
                text_id = get_text_id(None, file_id)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
648
            tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
649
            
650
651
        valid_actions = {
652
            'renamed':renamed,
653
            'removed':removed,
654
            'added':added,
655
            'modified':modified
656
        }
657
        for action_line, lines in self.info.actions:
658
            first = action_line.find(' ')
659
            if first == -1:
660
                raise BzrError('Bogus action line'
661
                        ' (no opening space): %r' % action_line)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
662
            second = action_line.find(' ', first+1)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
663
            if second == -1:
664
                raise BzrError('Bogus action line'
665
                        ' (missing second space): %r' % action_line)
666
            action = action_line[:first]
667
            kind = action_line[first+1:second]
668
            if kind not in ('file', 'directory'):
669
                raise BzrError('Bogus action line'
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
670
                        ' (invalid object kind %r): %r' % (kind, action_line))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
671
            extra = action_line[second+1:]
672
673
            if action not in valid_actions:
674
                raise BzrError('Bogus action line'
675
                        ' (unrecognized action): %r' % action_line)
676
            valid_actions[action](kind, extra, lines)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
677
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
678
def read_changeset(from_file, branch):
679
    """Read in a changeset from a iterable object (such as a file object)
680
681
    :param from_file: A file-like object to read the changeset information.
682
    :param branch: This will be used to build the changeset tree, it needs
683
                   to contain the base of the changeset. (Which you probably
684
                   won't know about until after the changeset is parsed.)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
685
    """
686
    cr = ChangesetReader(from_file)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
687
    return cr.get_info_tree_inv(branch)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
688
689
class ChangesetTree:
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
690
    def __init__(self, base_tree=None):
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
691
        self.base_tree = base_tree
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
692
        self._renamed = {} # Mapping from old_path => new_path
693
        self._renamed_r = {} # new_path => old_path
694
        self._new_id = {} # new_path => new_id
695
        self._new_id_r = {} # new_id => new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
696
        self._kinds = {} # new_id => kind
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
697
        self.patches = {}
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
698
        self.deleted = []
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
699
        self.contents_by_id = True
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
700
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
701
    def __str__(self):
702
        return pprint.pformat(self.__dict__)
703
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
704
    def note_rename(self, old_path, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
705
        """A file/directory has been renamed from old_path => new_path"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
706
        assert not self._renamed.has_key(old_path)
707
        assert not self._renamed_r.has_key(new_path)
708
        self._renamed[new_path] = old_path
709
        self._renamed_r[old_path] = new_path
710
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
711
    def note_id(self, new_id, new_path, kind='file'):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
712
        """Files that don't exist in base need a new id."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
713
        self._new_id[new_path] = new_id
714
        self._new_id_r[new_id] = new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
715
        self._kinds[new_id] = kind
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
716
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
717
    def note_patch(self, new_path, patch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
718
        """There is a patch for a given filename."""
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
719
        self.patches[new_path] = patch
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
720
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
721
    def note_deletion(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
722
        """The file at old_path has been deleted."""
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
723
        self.deleted.append(old_path)
724
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
725
    def old_path(self, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
726
        """Get the old_path (path in the base_tree) for the file at new_path"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
727
        import os.path
728
        old_path = self._renamed.get(new_path)
729
        if old_path is not None:
730
            return old_path
731
        dirname,basename = os.path.split(new_path)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
732
        # dirname is not '' doesn't work, because
733
        # dirname may be a unicode entry, and is
734
        # requires the objects to be identical
735
        if dirname != '':
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
736
            old_dir = self.old_path(dirname)
737
            if old_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
738
                old_path = None
739
            else:
740
                old_path = os.path.join(old_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
741
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
742
            old_path = new_path
743
        #If the new path wasn't in renamed, the old one shouldn't be in
744
        #renamed_r
745
        if self._renamed_r.has_key(old_path):
746
            return None
747
        return old_path 
748
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
749
    def new_path(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
750
        """Get the new_path (path in the target_tree) for the file at old_path
751
        in the base tree.
752
        """
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
753
        import os.path
754
        new_path = self._renamed_r.get(old_path)
755
        if new_path is not None:
756
            return new_path
757
        if self._renamed.has_key(new_path):
758
            return None
759
        dirname,basename = os.path.split(old_path)
760
        if dirname is not '':
761
            new_dir = self.new_path(dirname)
762
            if new_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
763
                new_path = None
764
            else:
765
                new_path = os.path.join(new_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
766
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
767
            new_path = old_path
768
        #If the old path wasn't in renamed, the new one shouldn't be in
769
        #renamed_r
770
        if self._renamed.has_key(new_path):
771
            return None
772
        return new_path 
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
773
774
    def path2id(self, path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
775
        """Return the id of the file present at path in the target tree."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
776
        file_id = self._new_id.get(path)
777
        if file_id is not None:
778
            return file_id
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
779
        old_path = self.old_path(path)
780
        if old_path is None:
781
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
782
        if old_path in self.deleted:
783
            return None
0.5.66 by John Arbash Meinel
Refactoring, moving test code into test (switching back to assert is None)
784
        if hasattr(self.base_tree, 'path2id'):
785
            return self.base_tree.path2id(old_path)
786
        else:
787
            return self.base_tree.inventory.path2id(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
788
789
    def id2path(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
790
        """Return the new path in the target tree of the file with id file_id"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
791
        path = self._new_id_r.get(file_id)
792
        if path is not None:
793
            return path
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
794
        old_path = self.base_tree.id2path(file_id)
795
        if old_path is None:
796
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
797
        if old_path in self.deleted:
798
            return None
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
799
        return self.new_path(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
800
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
801
    def old_contents_id(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
802
        """Return the id in the base_tree for the given file_id,
803
        or None if the file did not exist in base.
804
805
        FIXME:  Something doesn't seem right here. It seems like this function
806
                should always either return None or file_id. Even if
807
                you are doing the by-path lookup, you are doing a
808
                id2path lookup, just to do the reverse path2id lookup.
809
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
810
        if self.contents_by_id:
811
            if self.base_tree.has_id(file_id):
812
                return file_id
813
            else:
814
                return None
815
        new_path = self.id2path(file_id)
816
        return self.base_tree.path2id(new_path)
817
        
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
818
    def get_file(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
819
        """Return a file-like object containing the new contents of the
820
        file given by file_id.
821
822
        TODO:   It might be nice if this actually generated an entry
823
                in the text-store, so that the file contents would
824
                then be cached.
825
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
826
        base_id = self.old_contents_id(file_id)
0.5.50 by aaron.bentley at utoronto
Evaluate patches against file paths, not file ids
827
        if base_id is not None:
828
            patch_original = self.base_tree.get_file(base_id)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
829
        else:
830
            patch_original = None
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
831
        file_patch = self.patches.get(self.id2path(file_id))
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
832
        if file_patch is None:
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
833
            return patch_original
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
834
        return patched_file(file_patch, patch_original)
835
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
836
    def get_kind(self, file_id):
837
        if file_id in self._kinds:
838
            return self._kinds[file_id]
839
        return self.base_tree.inventory[file_id].kind
840
841
    def get_size_and_sha1(self, file_id):
842
        """Return the size and sha1 hash of the given file id.
843
        If the file was not locally modified, this is extracted
844
        from the base_tree. Rather than re-reading the file.
845
        """
846
        from bzrlib.osutils import sha_string
847
848
        new_path = self.id2path(file_id)
849
        if new_path is None:
850
            return None, None
851
        if new_path not in self.patches:
852
            # If the entry does not have a patch, then the
853
            # contents must be the same as in the base_tree
854
            ie = self.base_tree.inventory[file_id]
855
            return int(ie.text_size), ie.text_sha1
856
        content = self.get_file(file_id).read()
857
        return len(content), sha_string(content)
858
859
        
860
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
861
    def __iter__(self):
862
        for file_id in self._new_id_r.iterkeys():
863
            yield file_id
864
        for file_id in self.base_tree:
865
            if self.id2path(file_id) is None:
866
                continue
867
            yield file_id
868
869
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
870
def patched_file(file_patch, original):
871
    from bzrlib.patch import patch
872
    from tempfile import mkdtemp
873
    from shutil import rmtree
874
    from StringIO import StringIO
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
875
    from bzrlib.osutils import pumpfile
876
    import os.path
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
877
    temp_dir = mkdtemp()
878
    try:
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
879
        original_path = os.path.join(temp_dir, "originalfile")
880
        temp_original = file(original_path, "wb")
881
        if original is not None:
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
882
            pumpfile(original, temp_original)
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
883
        temp_original.close()
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
884
        patched_path = os.path.join(temp_dir, "patchfile")
0.5.47 by aaron.bentley at utoronto
Added safety check to patch call
885
        assert patch(file_patch, original_path, patched_path) == 0
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
886
        result = StringIO()
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
887
        temp_patched = file(patched_path, "rb")
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
888
        pumpfile(temp_patched, result)
889
        temp_patched.close()
890
        result.seek(0,0)
891
892
    finally:
893
        rmtree(temp_dir)
894
895
    return result
896