/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
1
#!/usr/bin/env python
2
"""\
3
Read in a changeset output, and process it into a Changeset object.
4
"""
5
6
import bzrlib, bzrlib.changeset
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
7
import pprint
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
8
import common
9
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
10
from bzrlib.trace import mutter
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
11
from bzrlib.errors import BzrError
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
12
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
13
class BadChangeset(Exception): pass
14
class MalformedHeader(BadChangeset): pass
15
class MalformedPatches(BadChangeset): pass
16
class MalformedFooter(BadChangeset): pass
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
17
0.5.11 by John Arbash Meinel
Working on properly representing renames.
18
def _unescape(name):
19
    """Now we want to find the filename effected.
20
    Unfortunately the filename is written out as
21
    repr(filename), which means that it surrounds
22
    the name with quotes which may be single or double
23
    (single is preferred unless there is a single quote in
24
    the filename). And some characters will be escaped.
25
26
    TODO:   There has to be some pythonic way of undo-ing the
27
            representation of a string rather than using eval.
28
    """
29
    delimiter = name[0]
30
    if name[-1] != delimiter:
31
        raise BadChangeset('Could not properly parse the'
32
                ' filename: %r' % name)
33
    # We need to handle escaped hexadecimals too.
34
    return name[1:-1].replace('\"', '"').replace("\'", "'")
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
35
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
36
class RevisionInfo(object):
37
    """Gets filled out for each revision object that is read.
38
    """
39
    def __init__(self, rev_id):
40
        self.rev_id = rev_id
41
        self.sha1 = None
42
        self.committer = None
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
43
        self.date = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
44
        self.timestamp = None
45
        self.timezone = None
46
        self.inventory_id = None
47
        self.inventory_sha1 = None
48
49
        self.parents = None
50
        self.message = None
51
52
    def __str__(self):
53
        return pprint.pformat(self.__dict__)
54
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
55
    def as_revision(self):
56
        from bzrlib.revision import Revision, RevisionReference
57
        rev = Revision(revision_id=self.rev_id,
58
            committer=self.committer,
59
            timestamp=float(self.timestamp),
60
            timezone=int(self.timezone),
61
            inventory_id=self.inventory_id,
62
            inventory_sha1=self.inventory_sha1,
63
            message='\n'.join(self.message))
64
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
65
        if self.parents:
66
            for parent in self.parents:
67
                rev_id, sha1 = parent.split('\t')
68
                rev.parents.append(RevisionReference(rev_id, sha1))
0.5.37 by John Arbash Meinel
Made read_changeset able to spit out 'Revision' entities.
69
70
        return rev
71
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
72
class ChangesetInfo(object):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
73
    """This contains the meta information. Stuff that allows you to
74
    recreate the revision or inventory XML.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
75
    """
76
    def __init__(self):
77
        self.committer = None
78
        self.date = None
0.5.17 by John Arbash Meinel
adding apply-changset, plus more meta information.
79
        self.message = None
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
80
        self.base = None
81
        self.base_sha1 = None
82
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
83
        # A list of RevisionInfo objects
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
84
        self.revisions = []
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
85
86
        self.actions = []
87
88
        # The next entries are created during complete_info() and
89
        # other post-read functions.
90
91
        # A list of real Revision objects
92
        self.real_revisions = []
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
93
        self.text_ids = {} # file_id => text_id
94
95
        self.timestamp = None
96
        self.timezone = None
0.5.15 by John Arbash Meinel
Created an apply-changeset function, and modified output for better parsing.
97
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
98
    def __str__(self):
99
        return pprint.pformat(self.__dict__)
100
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
101
    def complete_info(self):
102
        """This makes sure that all information is properly
103
        split up, based on the assumptions that can be made
104
        when information is missing.
105
        """
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
106
        # Put in all of the guessable information.
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
107
        if not self.timestamp and self.date:
108
            self.timestamp, self.timezone = common.unpack_highres_date(self.date)
109
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
110
        self.real_revisions = []
0.5.39 by John Arbash Meinel
(broken) Working on changing the processing to use a ChangesetTree.
111
        for rev in self.revisions:
0.5.60 by John Arbash Meinel
read_changeset now parses the date: subheader of revisions correctly.
112
            if rev.timestamp is None:
113
                if rev.date is not None:
114
                    rev.timestamp, rev.timezone = \
115
                            common.unpack_highres_date(rev.date)
116
                else:
117
                    rev.timestamp = self.timestamp
118
                    rev.timezone = self.timezone
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
119
            if rev.message is None and self.message:
120
                rev.message = self.message
121
            if rev.committer is None and self.committer:
122
                rev.committer = self.committer
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
123
            if rev.inventory_id is None:
124
                rev.inventory_id = rev.rev_id
125
            self.real_revisions.append(rev.as_revision())
126
127
        if self.base is None:
128
            # When we don't have a base, then the real base
129
            # is the first parent of the first revision listed
130
            rev = self.real_revisions[0]
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
131
            if len(rev.parents) == 0:
132
                # There is no base listed, and
133
                # the lowest revision doesn't have a parent
134
                # so this is probably against the empty tree
135
                # and thus base truly is None
136
                self.base = None
137
                self.base_sha1 = None
138
            else:
139
                self.base = rev.parents[0].revision_id
140
                # In general, if self.base is None, self.base_sha1 should
141
                # also be None
142
                if self.base_sha1 is not None:
143
                    assert self.base_sha1 == rev.parents[0].revision_sha1
144
                self.base_sha1 = rev.parents[0].revision_sha1
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
145
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
146
class ChangesetReader(object):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
147
    """This class reads in a changeset from a file, and returns
148
    a Changeset object, which can then be applied against a tree.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
149
    """
150
    def __init__(self, from_file):
151
        """Read in the changeset from the file.
152
153
        :param from_file: A file-like object (must have iterator support).
154
        """
155
        object.__init__(self)
156
        self.from_file = from_file
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
157
        self._next_line = None
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
158
        
159
        self.info = ChangesetInfo()
160
        # We put the actual inventory ids in the footer, so that the patch
161
        # is easier to read for humans.
162
        # Unfortunately, that means we need to read everything before we
163
        # can create a proper changeset.
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
164
        self._read()
165
        self._validate()
166
167
    def _read(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
168
        self._read_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
169
        self._read_patches()
170
        self._read_footer()
171
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
172
    def _validate(self):
173
        """Make sure that the information read in makes sense
174
        and passes appropriate checksums.
175
        """
176
        # Fill in all the missing blanks for the revisions
177
        # and generate the real_revisions list.
178
        self.info.complete_info()
179
        self._validate_revisions()
180
181
    def _validate_revisions(self):
182
        """Make sure all revision entries match their checksum."""
183
        from bzrlib.xml import pack_xml
184
        from cStringIO import StringIO
185
        from bzrlib.osutils import sha_file
186
187
        # This is a mapping from each revision id to it's sha hash
188
        rev_to_sha1 = {}
189
190
        for rev, rev_info in zip(self.info.real_revisions, self.info.revisions):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
191
            assert rev.revision_id == rev_info.rev_id
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
192
            sio = StringIO()
193
            pack_xml(rev, sio)
194
            sio.seek(0)
195
            sha1 = sha_file(sio)
196
            if sha1 != rev_info.sha1:
197
                raise BzrError('Revision checksum mismatch.'
198
                    ' For rev_id {%s} supplied sha1 (%s) != measured (%s)'
199
                    % (rev.revision_id, rev_info.sha1, sha1))
200
            if rev_to_sha1.has_key(rev.revision_id):
201
                raise BzrError('Revision {%s} given twice in the list'
202
                        % (rev.revision_id))
203
            rev_to_sha1[rev.revision_id] = sha1
204
205
        # Now that we've checked all the sha1 sums, we can make sure that
206
        # at least for the small list we have, all of the references are
207
        # valid.
208
        for rev in self.info.real_revisions:
209
            for parent in rev.parents:
210
                if parent.revision_id in rev_to_sha1:
211
                    if parent.revision_sha1 != rev_to_sha1[parent.revision_id]:
212
                        raise BzrError('Parent revision checksum mismatch.'
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
213
                                ' A parent was referenced with an'
214
                                ' incorrect checksum'
0.5.62 by John Arbash Meinel
Doing some internal validation before allowing processing to continue, additional checks at the command level.
215
                                ': {%r} %s != %s' % (parent.revision_id,
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
216
                                            parent.revision_sha1,
217
                                            rev_to_sha1[parent.revision_id]))
218
219
    def _validate_references_from_branch(self, branch):
220
        """Now that we have a branch which should have some of the
221
        revisions we care about, go through and validate all of them
222
        that we can.
223
        """
224
        rev_to_sha = {}
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
225
        inv_to_sha = {}
226
        def add_sha(d, rev_id, sha1):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
227
            if rev_id is None:
228
                if sha1 is not None:
229
                    raise BzrError('A Null revision should always'
230
                        'have a null sha1 hash')
231
                return
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
232
            if rev_id in d:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
233
                # This really should have been validated as part
234
                # of _validate_revisions but lets do it again
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
235
                if sha1 != d[rev_id]:
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
236
                    raise BzrError('** Revision %r referenced with 2 different'
237
                            ' sha hashes %s != %s' % (rev_id,
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
238
                                sha1, d[rev_id]))
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
239
            else:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
240
                d[rev_id] = sha1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
241
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
242
        add_sha(rev_to_sha, self.info.base, self.info.base_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
243
        # All of the contained revisions were checked
244
        # in _validate_revisions
245
        checked = {}
246
        for rev_info in self.info.revisions:
247
            checked[rev_info.rev_id] = True
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
248
            add_sha(rev_to_sha, rev_info.rev_id, rev_info.sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
249
                
250
        for rev in self.info.real_revisions:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
251
            add_sha(inv_to_sha, rev_info.inventory_id, rev_info.inventory_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
252
            for parent in rev.parents:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
253
                add_sha(rev_to_sha, parent.revision_id, parent.revision_sha1)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
254
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
255
        count = 0
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
256
        missing = {}
257
        for rev_id, sha1 in rev_to_sha.iteritems():
258
            if rev_id in branch.revision_store:
259
                local_sha1 = branch.get_revision_sha1(rev_id)
260
                if sha1 != local_sha1:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
261
                    raise BzrError('sha1 mismatch. For revision id {%s}' 
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
262
                            'local: %s, cset: %s' % (rev_id, local_sha1, sha1))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
263
                else:
264
                    count += 1
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
265
            elif rev_id not in checked:
266
                missing[rev_id] = sha1
267
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
268
        for inv_id, sha1 in inv_to_sha.iteritems():
269
            if inv_id in branch.inventory_store:
270
                local_sha1 = branch.get_inventory_sha1(inv_id)
271
                if sha1 != local_sha1:
272
                    raise BzrError('sha1 mismatch. For inventory id {%s}' 
273
                            'local: %s, cset: %s' % (inv_id, local_sha1, sha1))
274
                else:
275
                    count += 1
276
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
277
        if len(missing) > 0:
278
            # I don't know if this is an error yet
279
            from bzrlib.trace import warning
280
            warning('Not all revision hashes could be validated.'
281
                    ' Unable validate %d hashes' % len(missing))
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
282
        mutter('Verified %d sha hashes for the changeset.' % count)
283
284
    def _create_inventory(self, tree):
285
        """Build up the inventory entry for the ChangesetTree.
286
287
        TODO: This sort of thing should probably end up part of
288
        ChangesetTree, but since it doesn't handle meta-information
289
        yet, we need to do it here. (We need the ChangesetInfo,
290
        specifically the text_ids)
291
        """
292
        from os.path import dirname, basename
293
        from bzrlib.inventory import Inventory, InventoryEntry, ROOT_ID
294
295
        # TODO: deal with trees having a unique ROOT_ID
296
        root_id = ROOT_ID
297
        inv = Inventory()
298
        for file_id in tree:
299
            if file_id == root_id:
300
                continue
301
            path = tree.id2path(file_id)
302
            parent_path = dirname(path)
303
            if path == '':
304
                parent_id = root_id
305
            else:
306
                parent_id = tree.path2id(parent_path)
307
308
            if self.info.text_ids.has_key(file_id):
309
                text_id = self.info.text_ids[file_id]
310
            else:
311
                # If we don't have the text_id in the local map
312
                # that means the file didn't exist in the changeset
313
                # so we just use the old text_id.
314
                text_id = tree.base_tree.inventory[file_id].text_id
315
            name = basename(path)
316
            kind = tree.get_kind(file_id)
317
            ie = InventoryEntry(file_id, name, kind, parent_id, text_id=text_id)
318
            ie.text_size, ie.text_sha1 = tree.get_size_and_sha1(file_id)
319
            if ie.text_size is None:
320
                raise BzrError('Got a text_size of None for file_id %r' % file_id)
321
            inv.add(ie)
322
        return inv
323
324
    def _validate_inventory(self, inv):
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
325
        """At this point we should have generated the ChangesetTree,
326
        so build up an inventory, and make sure the hashes match.
327
        """
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
328
        from bzrlib.xml import pack_xml
329
        from cStringIO import StringIO
330
        from bzrlib.osutils import sha_file, pumpfile
331
332
        # Now we should have a complete inventory entry.
333
        sio = StringIO()
334
        pack_xml(inv, sio)
335
        sio.seek(0)
336
        sha1 = sha_file(sio)
337
        # Target revision is the last entry in the real_revisions list
338
        rev = self.info.real_revisions[-1]
339
        if sha1 != rev.inventory_sha1:
340
            raise BzrError('Inventory sha hash mismatch.')
341
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
342
        
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
343
    def get_info_tree_inv(self, branch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
344
        """Return the meta information, and a Changeset tree which can
345
        be used to populate the local stores and working tree, respectively.
346
        """
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
347
        self._validate_references_from_branch(branch)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
348
        tree = ChangesetTree(branch.revision_tree(self.info.base))
349
        self._update_tree(tree)
350
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
351
        inv = self._create_inventory(tree)
352
        self._validate_inventory(inv)
0.5.63 by John Arbash Meinel
Moving the validation into part of the reading.
353
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
354
        return self.info, tree, inv
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
355
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
356
    def _next(self):
357
        """yield the next line, but secretly
358
        keep 1 extra line for peeking.
359
        """
360
        for line in self.from_file:
361
            last = self._next_line
362
            self._next_line = line
363
            if last is not None:
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
364
                #mutter('yielding line: %r' % last)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
365
                yield last
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
366
        last = self._next_line
367
        self._next_line = None
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
368
        #mutter('yielding line: %r' % last)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
369
        yield last
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
370
371
    def _read_header(self):
372
        """Read the bzr header"""
373
        header = common.get_header()
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
374
        found = False
375
        for line in self._next():
376
            if found:
377
                if (line[:2] != '# ' or line[-1:] != '\n'
378
                        or line[2:-1] != header[0]):
379
                    raise MalformedHeader('Found a header, but it'
380
                        ' was improperly formatted')
381
                header.pop(0) # We read this line.
382
                if not header:
383
                    break # We found everything.
384
            elif (line[:1] == '#' and line[-1:] == '\n'):
385
                line = line[1:-1].strip()
386
                if line[:len(common.header_str)] == common.header_str:
387
                    if line == header[0]:
388
                        found = True
389
                    else:
390
                        raise MalformedHeader('Found what looks like'
391
                                ' a header, but did not match')
392
                    header.pop(0)
393
        else:
394
            raise MalformedHeader('Did not find an opening header')
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
395
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
396
        for line in self._next():
397
            # The bzr header is terminated with a blank line
398
            # which does not start with '#'
399
            if line == '\n':
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
400
                break
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
401
            self._handle_next(line)
402
403
    def _read_next_entry(self, line, indent=1):
404
        """Read in a key-value pair
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
405
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
406
        if line[:1] != '#':
407
            raise MalformedHeader('Bzr header did not start with #')
408
        line = line[1:-1] # Remove the '#' and '\n'
409
        if line[:indent] == ' '*indent:
410
            line = line[indent:]
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
411
        if not line:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
412
            return None, None# Ignore blank lines
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
413
414
        loc = line.find(': ')
415
        if loc != -1:
416
            key = line[:loc]
417
            value = line[loc+2:]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
418
            if not value:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
419
                value = self._read_many(indent=indent+3)
420
        elif line[-1:] == ':':
421
            key = line[:-1]
422
            value = self._read_many(indent=indent+3)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
423
        else:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
424
            raise MalformedHeader('While looking for key: value pairs,'
425
                    ' did not find the colon %r' % (line))
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
426
427
        key = key.replace(' ', '_')
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
428
        #mutter('found %s: %s' % (key, value))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
429
        return key, value
430
431
    def _handle_next(self, line):
432
        key, value = self._read_next_entry(line, indent=1)
433
        if key is None:
434
            return
435
436
        if key == 'revision':
437
            self._read_revision(value)
438
        elif hasattr(self.info, key):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
439
            if getattr(self.info, key) is None:
440
                setattr(self.info, key, value)
441
            else:
442
                raise MalformedHeader('Duplicated Key: %s' % key)
443
        else:
444
            # What do we do with a key we don't recognize
445
            raise MalformedHeader('Unknown Key: %s' % key)
446
        
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
447
    def _read_many(self, indent):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
448
        """If a line ends with no entry, that means that it should be
449
        followed with multiple lines of values.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
450
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
451
        This detects the end of the list, because it will be a line that
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
452
        does not start properly indented.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
453
        """
454
        values = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
455
        start = '#' + (' '*indent)
456
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
457
        if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
458
            return values
459
460
        for line in self._next():
461
            values.append(line[len(start):-1])
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
462
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
463
                break
464
        return values
465
466
    def _read_one_patch(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
467
        """Read in one patch, return the complete patch, along with
468
        the next line.
469
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
470
        :return: action, lines, do_continue
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
471
        """
0.5.57 by John Arbash Meinel
Simplified the header, only output base if it is not the expected one.
472
        #mutter('_read_one_patch: %r' % self._next_line)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
473
        # Peek and see if there are no patches
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
474
        if self._next_line is None or self._next_line[:1] == '#':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
475
            return None, [], False
476
477
        line = self._next().next()
478
        if line[:3] != '***':
479
            raise MalformedPatches('The first line of all patches'
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
480
                ' should be a bzr meta line "***"'
481
                ': %r' % line)
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
482
        action = line[4:-1]
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
483
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
484
        if self._next_line is None or self._next_line[:1] == '#':
485
            return action, [], False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
486
        lines = []
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
487
        for line in self._next():
488
            lines.append(line)
489
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
490
            if self._next_line is not None and self._next_line[:3] == '***':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
491
                return action, lines, True
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
492
            elif self._next_line is None or self._next_line[:1] == '#':
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
493
                return action, lines, False
494
        return action, lines, False
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
495
            
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
496
    def _read_patches(self):
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
497
        do_continue = True
498
        while do_continue:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
499
            action, lines, do_continue = self._read_one_patch()
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
500
            if action is not None:
501
                self.info.actions.append((action, lines))
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
502
503
    def _read_revision(self, rev_id):
504
        """Revision entries have extra information associated.
505
        """
506
        rev_info = RevisionInfo(rev_id)
507
        start = '#    '
508
        for line in self._next():
509
            key,value = self._read_next_entry(line, indent=4)
510
            #if key is None:
511
            #    continue
512
            if hasattr(rev_info, key):
513
                if getattr(rev_info, key) is None:
514
                    setattr(rev_info, key, value)
515
                else:
516
                    raise MalformedHeader('Duplicated Key: %s' % key)
517
            else:
518
                # What do we do with a key we don't recognize
519
                raise MalformedHeader('Unknown Key: %s' % key)
520
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
521
            if self._next_line is None or self._next_line[:len(start)] != start:
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
522
                break
523
524
        self.info.revisions.append(rev_info)
525
526
    def _read_footer(self):
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
527
        """Read the rest of the meta information.
528
0.5.9 by John Arbash Meinel
Now adding the patch information to the ChangesetInfo
529
        :param first_line:  The previous step iterates past what it
530
                            can handle. That extra line is given here.
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
531
        """
0.5.36 by John Arbash Meinel
Updated so that read_changeset is able to parse the output
532
        for line in self._next():
533
            self._handle_next(line)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
534
            if self._next_line is None or self._next_line[:1] != '#':
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
535
                break
536
537
    def _update_tree(self, tree):
538
        """This fills out a ChangesetTree based on the information
539
        that was read in.
540
541
        :param tree: A ChangesetTree to update with the new information.
542
        """
543
        from common import decode
544
545
        def get_text_id(info, file_id):
546
            if info is not None:
547
                if info[:8] != 'text-id:':
548
                    raise BzrError("Text ids should be prefixed with 'text-id:'"
549
                        ': %r' % info)
550
                text_id = decode(info[8:])
551
            elif self.info.text_ids.has_key(file_id):
552
                return self.info.text_ids[file_id]
553
            else:
554
                # If text_id was not explicitly supplied
555
                # then it should be whatever we would guess it to be
556
                # based on the base revision, and what we know about
557
                # the target revision
558
                text_id = common.guess_text_id(tree.base_tree, 
559
                        file_id, self.info.base, True)
560
            if (self.info.text_ids.has_key(file_id)
561
                    and self.info.text_ids[file_id] != text_id):
562
                raise BzrError('Mismatched text_ids for file_id {%s}'
563
                        ': %s != %s' % (file_id,
564
                                        self.info.text_ids[file_id],
565
                                        text_id))
566
            # The Info object makes more sense for where
567
            # to store something like text_id, since it is
568
            # what will be used to generate stored inventory
569
            # entries.
570
            # The problem is that we are parsing the
571
            # ChangesetTree right now, we really modifying
572
            # the ChangesetInfo object
573
            self.info.text_ids[file_id] = text_id
574
            return text_id
575
576
        def renamed(kind, extra, lines):
577
            info = extra.split('\t')
578
            if len(info) < 2:
579
                raise BzrError('renamed action lines need both a from and to'
580
                        ': %r' % extra)
581
            old_path = decode(info[0])
582
            if info[1][:3] == '=> ':
583
                new_path = decode(info[1][3:])
584
            else:
585
                new_path = decode(info[1][3:])
586
587
            file_id = tree.path2id(new_path)
588
            if len(info) > 2:
589
                text_id = get_text_id(info[2], file_id)
590
            else:
591
                text_id = get_text_id(None, file_id)
592
            tree.note_rename(old_path, new_path)
593
            if lines:
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
594
                tree.note_patch(new_path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
595
596
        def removed(kind, extra, lines):
597
            info = extra.split('\t')
598
            if len(info) > 1:
599
                # TODO: in the future we might allow file ids to be
600
                # given for removed entries
601
                raise BzrError('removed action lines should only have the path'
602
                        ': %r' % extra)
603
            path = decode(info[0])
604
            tree.note_deletion(path)
605
606
        def added(kind, extra, lines):
607
            info = extra.split('\t')
608
            if len(info) <= 1:
609
                raise BzrError('add action lines require the path and file id'
610
                        ': %r' % extra)
611
            elif len(info) > 3:
612
                raise BzrError('add action lines have fewer than 3 entries.'
613
                        ': %r' % extra)
614
            path = decode(info[0])
0.5.59 by John Arbash Meinel
Several fixes for handling the case where you are doing a changeset against revno=0 (Null base)
615
            if info[1][:8] != 'file-id:':
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
616
                raise BzrError('The file-id should follow the path for an add'
617
                        ': %r' % extra)
618
            file_id = decode(info[1][8:])
619
620
            if len(info) > 2:
621
                text_id = get_text_id(info[2], file_id)
622
            else:
623
                text_id = get_text_id(None, file_id)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
624
            tree.note_id(file_id, path, kind)
625
            tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
626
627
        def modified(kind, extra, lines):
628
            info = extra.split('\t')
629
            if len(info) < 1:
630
                raise BzrError('modified action lines have at least'
631
                        'the path in them: %r' % extra)
632
            path = decode(info[0])
633
634
            file_id = tree.path2id(path)
635
            if len(info) > 1:
636
                text_id = get_text_id(info[1], file_id)
637
            else:
638
                text_id = get_text_id(None, file_id)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
639
            tree.note_patch(path, ''.join(lines))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
640
            
641
642
        valid_actions = {
643
            'renamed':renamed,
644
            'removed':removed,
645
            'added':added,
646
            'modified':modified
647
        }
648
        for action_line, lines in self.info.actions:
649
            first = action_line.find(' ')
650
            if first == -1:
651
                raise BzrError('Bogus action line'
652
                        ' (no opening space): %r' % action_line)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
653
            second = action_line.find(' ', first+1)
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
654
            if second == -1:
655
                raise BzrError('Bogus action line'
656
                        ' (missing second space): %r' % action_line)
657
            action = action_line[:first]
658
            kind = action_line[first+1:second]
659
            if kind not in ('file', 'directory'):
660
                raise BzrError('Bogus action line'
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
661
                        ' (invalid object kind %r): %r' % (kind, action_line))
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
662
            extra = action_line[second+1:]
663
664
            if action not in valid_actions:
665
                raise BzrError('Bogus action line'
666
                        ' (unrecognized action): %r' % action_line)
667
            valid_actions[action](kind, extra, lines)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
668
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
669
def read_changeset(from_file, branch):
670
    """Read in a changeset from a iterable object (such as a file object)
671
672
    :param from_file: A file-like object to read the changeset information.
673
    :param branch: This will be used to build the changeset tree, it needs
674
                   to contain the base of the changeset. (Which you probably
675
                   won't know about until after the changeset is parsed.)
0.5.7 by John Arbash Meinel
Added a bunch more information about changesets. Can now read back in all of the meta information.
676
    """
677
    cr = ChangesetReader(from_file)
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
678
    return cr.get_info_tree_inv(branch)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
679
680
class ChangesetTree:
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
681
    def __init__(self, base_tree=None):
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
682
        self.base_tree = base_tree
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
683
        self._renamed = {} # Mapping from old_path => new_path
684
        self._renamed_r = {} # new_path => old_path
685
        self._new_id = {} # new_path => new_id
686
        self._new_id_r = {} # new_id => new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
687
        self._kinds = {} # new_id => kind
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
688
        self.patches = {}
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
689
        self.deleted = []
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
690
        self.contents_by_id = True
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
691
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
692
    def __str__(self):
693
        return pprint.pformat(self.__dict__)
694
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
695
    def note_rename(self, old_path, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
696
        """A file/directory has been renamed from old_path => new_path"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
697
        assert not self._renamed.has_key(old_path)
698
        assert not self._renamed_r.has_key(new_path)
699
        self._renamed[new_path] = old_path
700
        self._renamed_r[old_path] = new_path
701
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
702
    def note_id(self, new_id, new_path, kind='file'):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
703
        """Files that don't exist in base need a new id."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
704
        self._new_id[new_path] = new_id
705
        self._new_id_r[new_id] = new_path
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
706
        self._kinds[new_id] = kind
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
707
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
708
    def note_patch(self, new_path, patch):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
709
        """There is a patch for a given filename."""
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
710
        self.patches[new_path] = patch
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
711
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
712
    def note_deletion(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
713
        """The file at old_path has been deleted."""
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
714
        self.deleted.append(old_path)
715
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
716
    def old_path(self, new_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
717
        """Get the old_path (path in the base_tree) for the file at new_path"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
718
        import os.path
719
        old_path = self._renamed.get(new_path)
720
        if old_path is not None:
721
            return old_path
722
        dirname,basename = os.path.split(new_path)
0.5.56 by John Arbash Meinel
A couple more fixups, it seems actually capable now of writing out a changeset, and reading it back.
723
        # dirname is not '' doesn't work, because
724
        # dirname may be a unicode entry, and is
725
        # requires the objects to be identical
726
        if dirname != '':
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
727
            old_dir = self.old_path(dirname)
728
            if old_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
729
                old_path = None
730
            else:
731
                old_path = os.path.join(old_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
732
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
733
            old_path = new_path
734
        #If the new path wasn't in renamed, the old one shouldn't be in
735
        #renamed_r
736
        if self._renamed_r.has_key(old_path):
737
            return None
738
        return old_path 
739
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
740
    def new_path(self, old_path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
741
        """Get the new_path (path in the target_tree) for the file at old_path
742
        in the base tree.
743
        """
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
744
        import os.path
745
        new_path = self._renamed_r.get(old_path)
746
        if new_path is not None:
747
            return new_path
748
        if self._renamed.has_key(new_path):
749
            return None
750
        dirname,basename = os.path.split(old_path)
751
        if dirname is not '':
752
            new_dir = self.new_path(dirname)
753
            if new_dir is None:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
754
                new_path = None
755
            else:
756
                new_path = os.path.join(new_dir, basename)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
757
        else:
0.5.42 by aaron.bentley at utoronto
Improved rename handling
758
            new_path = old_path
759
        #If the old path wasn't in renamed, the new one shouldn't be in
760
        #renamed_r
761
        if self._renamed.has_key(new_path):
762
            return None
763
        return new_path 
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
764
765
    def path2id(self, path):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
766
        """Return the id of the file present at path in the target tree."""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
767
        file_id = self._new_id.get(path)
768
        if file_id is not None:
769
            return file_id
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
770
        old_path = self.old_path(path)
771
        if old_path is None:
772
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
773
        if old_path in self.deleted:
774
            return None
0.5.66 by John Arbash Meinel
Refactoring, moving test code into test (switching back to assert is None)
775
        if hasattr(self.base_tree, 'path2id'):
776
            return self.base_tree.path2id(old_path)
777
        else:
778
            return self.base_tree.inventory.path2id(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
779
780
    def id2path(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
781
        """Return the new path in the target tree of the file with id file_id"""
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
782
        path = self._new_id_r.get(file_id)
783
        if path is not None:
784
            return path
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
785
        old_path = self.base_tree.id2path(file_id)
786
        if old_path is None:
787
            return None
0.5.48 by aaron.bentley at utoronto
Implemented deletion for ChangesetTrees
788
        if old_path in self.deleted:
789
            return None
0.5.43 by aaron.bentley at utoronto
Handled moves and adds properly
790
        return self.new_path(old_path)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
791
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
792
    def old_contents_id(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
793
        """Return the id in the base_tree for the given file_id,
794
        or None if the file did not exist in base.
795
796
        FIXME:  Something doesn't seem right here. It seems like this function
797
                should always either return None or file_id. Even if
798
                you are doing the by-path lookup, you are doing a
799
                id2path lookup, just to do the reverse path2id lookup.
800
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
801
        if self.contents_by_id:
802
            if self.base_tree.has_id(file_id):
803
                return file_id
804
            else:
805
                return None
806
        new_path = self.id2path(file_id)
807
        return self.base_tree.path2id(new_path)
808
        
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
809
    def get_file(self, file_id):
0.5.55 by John Arbash Meinel
Lots of updates. Using a minimized annotations for changesets.
810
        """Return a file-like object containing the new contents of the
811
        file given by file_id.
812
813
        TODO:   It might be nice if this actually generated an entry
814
                in the text-store, so that the file contents would
815
                then be cached.
816
        """
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
817
        base_id = self.old_contents_id(file_id)
0.5.50 by aaron.bentley at utoronto
Evaluate patches against file paths, not file ids
818
        if base_id is not None:
819
            patch_original = self.base_tree.get_file(base_id)
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
820
        else:
821
            patch_original = None
0.5.52 by aaron.bentley at utoronto
Make contents-addressing configurable
822
        file_patch = self.patches.get(self.id2path(file_id))
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
823
        if file_patch is None:
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
824
            return patch_original
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
825
        return patched_file(file_patch, patch_original)
826
0.5.64 by John Arbash Meinel
SUCCESS, we now are able to validate the inventory XML.
827
    def get_kind(self, file_id):
828
        if file_id in self._kinds:
829
            return self._kinds[file_id]
830
        return self.base_tree.inventory[file_id].kind
831
832
    def get_size_and_sha1(self, file_id):
833
        """Return the size and sha1 hash of the given file id.
834
        If the file was not locally modified, this is extracted
835
        from the base_tree. Rather than re-reading the file.
836
        """
837
        from bzrlib.osutils import sha_string
838
839
        new_path = self.id2path(file_id)
840
        if new_path is None:
841
            return None, None
842
        if new_path not in self.patches:
843
            # If the entry does not have a patch, then the
844
            # contents must be the same as in the base_tree
845
            ie = self.base_tree.inventory[file_id]
846
            return int(ie.text_size), ie.text_sha1
847
        content = self.get_file(file_id).read()
848
        return len(content), sha_string(content)
849
850
        
851
0.5.49 by aaron.bentley at utoronto
Implemented iteration over ids
852
    def __iter__(self):
853
        for file_id in self._new_id_r.iterkeys():
854
            yield file_id
855
        for file_id in self.base_tree:
856
            if self.id2path(file_id) is None:
857
                continue
858
            yield file_id
859
860
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
861
def patched_file(file_patch, original):
862
    from bzrlib.patch import patch
863
    from tempfile import mkdtemp
864
    from shutil import rmtree
865
    from StringIO import StringIO
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
866
    from bzrlib.osutils import pumpfile
867
    import os.path
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
868
    temp_dir = mkdtemp()
869
    try:
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
870
        original_path = os.path.join(temp_dir, "originalfile")
871
        temp_original = file(original_path, "wb")
872
        if original is not None:
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
873
            pumpfile(original, temp_original)
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
874
        temp_original.close()
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
875
        patched_path = os.path.join(temp_dir, "patchfile")
0.5.47 by aaron.bentley at utoronto
Added safety check to patch call
876
        assert patch(file_patch, original_path, patched_path) == 0
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
877
        result = StringIO()
0.5.44 by aaron.bentley at utoronto
Got get_file working for new files
878
        temp_patched = file(patched_path, "rb")
0.5.41 by aaron.bentley at utoronto
Added non-working ChangesetTree
879
        pumpfile(temp_patched, result)
880
        temp_patched.close()
881
        result.seek(0,0)
882
883
    finally:
884
        rmtree(temp_dir)
885
886
    return result
887