3
Read in a changeset output, and process it into a Changeset object.
8
from cStringIO import StringIO
10
from bzrlib.tree import Tree
11
from bzrlib.trace import mutter, warning
12
from bzrlib.errors import BzrError
13
from bzrlib.xml5 import serializer_v5
14
from bzrlib.osutils import sha_file, sha_string
15
from bzrlib.revision import Revision
16
from bzrlib.inventory import (Inventory, InventoryEntry,
17
InventoryDirectory, InventoryFile,
20
from common import decode, get_header, header_str
22
class BadChangeset(Exception): pass
23
class MalformedHeader(BadChangeset): pass
24
class MalformedPatches(BadChangeset): pass
25
class MalformedFooter(BadChangeset): pass
28
"""Now we want to find the filename effected.
29
Unfortunately the filename is written out as
30
repr(filename), which means that it surrounds
31
the name with quotes which may be single or double
32
(single is preferred unless there is a single quote in
33
the filename). And some characters will be escaped.
35
TODO: There has to be some pythonic way of undo-ing the
36
representation of a string rather than using eval.
39
if name[-1] != delimiter:
40
raise BadChangeset('Could not properly parse the'
41
' filename: %r' % name)
42
# We need to handle escaped hexadecimals too.
43
return name[1:-1].replace('\"', '"').replace("\'", "'")
45
class RevisionInfo(object):
46
"""Gets filled out for each revision object that is read.
48
def __init__(self, revision_id):
49
self.revision_id = revision_id
55
self.inventory_sha1 = None
58
self.parent_sha1s = {}
62
return pprint.pformat(self.__dict__)
64
def as_revision(self):
65
rev = Revision(revision_id=self.revision_id,
66
committer=self.committer,
67
timestamp=float(self.timestamp),
68
timezone=int(self.timezone),
69
inventory_sha1=self.inventory_sha1,
70
message='\n'.join(self.message))
73
for parent in self.parents:
74
revision_id, sha1 = parent.split()
75
rev.parent_ids.append(revision_id)
76
self.parent_sha1s[revision_id] = sha1
80
class ChangesetInfo(object):
81
"""This contains the meta information. Stuff that allows you to
82
recreate the revision or inventory XML.
91
# A list of RevisionInfo objects
96
# The next entries are created during complete_info() and
97
# other post-read functions.
99
# A list of real Revision objects
100
self.real_revisions = []
102
self.timestamp = None
106
return pprint.pformat(self.__dict__)
108
def complete_info(self):
109
"""This makes sure that all information is properly
110
split up, based on the assumptions that can be made
111
when information is missing.
113
from common import unpack_highres_date
114
# Put in all of the guessable information.
115
if not self.timestamp and self.date:
116
self.timestamp, self.timezone = unpack_highres_date(self.date)
118
self.real_revisions = []
119
for rev in self.revisions:
120
if rev.timestamp is None:
121
if rev.date is not None:
122
rev.timestamp, rev.timezone = \
123
unpack_highres_date(rev.date)
125
rev.timestamp = self.timestamp
126
rev.timezone = self.timezone
127
if rev.message is None and self.message:
128
rev.message = self.message
129
if rev.committer is None and self.committer:
130
rev.committer = self.committer
131
self.real_revisions.append(rev.as_revision())
133
if self.base is None:
134
# When we don't have a base, then the real base
135
# is the first parent of the first revision listed
136
rev = self.real_revisions[0]
137
if len(rev.parent_ids) == 0:
138
# There is no base listed, and
139
# the lowest revision doesn't have a parent
140
# so this is probably against the empty tree
141
# and thus base truly is None
143
self.base_sha1 = None
145
self.base = rev.parent_ids[0]
146
self.base_sha1 = self.revisions[0].parent_sha1s[self.base]
148
def _get_target(self):
149
"""Return the target revision."""
150
if len(self.real_revisions) > 0:
151
return self.real_revisions[-1].revision_id
152
elif len(self.revisions) > 0:
153
return self.revisions[-1].revision_id
156
target = property(_get_target, doc='The target revision id')
158
class ChangesetReader(object):
159
"""This class reads in a changeset from a file, and returns
160
a Changeset object, which can then be applied against a tree.
162
def __init__(self, from_file):
163
"""Read in the changeset from the file.
165
:param from_file: A file-like object (must have iterator support).
167
object.__init__(self)
168
self.from_file = from_file
169
self._next_line = None
171
self.info = ChangesetInfo()
172
# We put the actual inventory ids in the footer, so that the patch
173
# is easier to read for humans.
174
# Unfortunately, that means we need to read everything before we
175
# can create a proper changeset.
185
"""Make sure that the information read in makes sense
186
and passes appropriate checksums.
188
# Fill in all the missing blanks for the revisions
189
# and generate the real_revisions list.
190
self.info.complete_info()
191
self._validate_revisions()
193
def _validate_revisions(self):
194
"""Make sure all revision entries match their checksum."""
196
# This is a mapping from each revision id to it's sha hash
199
for rev, rev_info in zip(self.info.real_revisions, self.info.revisions):
200
assert rev.revision_id == rev_info.revision_id
202
serializer_v5.write_revision(rev, sio)
205
if sha1 != rev_info.sha1:
206
raise BzrError('Revision checksum mismatch.'
207
' For revision_id {%s} supplied sha1 (%s) != measured (%s)'
208
% (rev.revision_id, rev_info.sha1, sha1))
209
if rev_to_sha1.has_key(rev.revision_id):
210
raise BzrError('Revision {%s} given twice in the list'
212
rev_to_sha1[rev.revision_id] = sha1
214
# Now that we've checked all the sha1 sums, we can make sure that
215
# at least for the small list we have, all of the references are
217
## TODO: Bring this back
218
## for rev in self.info.real_revisions:
219
## for p_id in rev.parent_ids:
220
## if p_id in rev_to_sha1:
221
## if parent.revision_sha1 != rev_to_sha1[p_id]:
222
## raise BzrError('Parent revision checksum mismatch.'
223
## ' A parent was referenced with an'
224
## ' incorrect checksum'
225
## ': {%r} %s != %s' % (parent.revision_id,
226
## parent.revision_sha1,
227
## rev_to_sha1[parent.revision_id]))
229
def _validate_references_from_branch(self, branch):
230
"""Now that we have a branch which should have some of the
231
revisions we care about, go through and validate all of them
236
def add_sha(d, revision_id, sha1):
237
if revision_id is None:
239
raise BzrError('A Null revision should always'
240
'have a null sha1 hash')
243
# This really should have been validated as part
244
# of _validate_revisions but lets do it again
245
if sha1 != d[revision_id]:
246
raise BzrError('** Revision %r referenced with 2 different'
247
' sha hashes %s != %s' % (revision_id,
248
sha1, d[revision_id]))
250
d[revision_id] = sha1
252
add_sha(rev_to_sha, self.info.base, self.info.base_sha1)
253
# All of the contained revisions were checked
254
# in _validate_revisions
256
for rev_info in self.info.revisions:
257
checked[rev_info.revision_id] = True
258
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
260
for (rev, rev_info) in zip(self.info.real_revisions, self.info.revisions):
261
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
262
for p_id, sha1 in rev_info.parent_sha1s.iteritems():
263
add_sha(rev_to_sha, p_id, sha1)
267
for revision_id, sha1 in rev_to_sha.iteritems():
268
if branch.has_revision(revision_id):
269
local_sha1 = branch.get_revision_sha1(revision_id)
270
if sha1 != local_sha1:
271
raise BzrError('sha1 mismatch. For revision id {%s}'
272
'local: %s, cset: %s' % (revision_id, local_sha1, sha1))
275
elif revision_id not in checked:
276
missing[revision_id] = sha1
278
for inv_id, sha1 in inv_to_sha.iteritems():
279
if branch.has_revision(inv_id):
280
# TODO: Currently branch.get_inventory_sha1() just returns the value
281
# that is stored in the revision text. Which is *really* bogus, because
282
# that means we aren't validating the actual text, just that we wrote
283
# and read the string. But for now, what the hell.
284
local_sha1 = branch.get_inventory_sha1(inv_id)
285
if sha1 != local_sha1:
286
raise BzrError('sha1 mismatch. For inventory id {%s}'
287
'local: %s, cset: %s' % (inv_id, local_sha1, sha1))
292
# I don't know if this is an error yet
293
warning('Not all revision hashes could be validated.'
294
' Unable validate %d hashes' % len(missing))
295
mutter('Verified %d sha hashes for the changeset.' % count)
297
def _validate_inventory(self, inv):
298
"""At this point we should have generated the ChangesetTree,
299
so build up an inventory, and make sure the hashes match.
302
assert inv is not None
304
# Now we should have a complete inventory entry.
305
s = serializer_v5.write_inventory_to_string(inv)
307
# Target revision is the last entry in the real_revisions list
308
rev = self.info.real_revisions[-1]
309
if sha1 != rev.inventory_sha1:
310
open(',,bogus-inv', 'wb').write(s)
311
raise BzrError('Inventory sha hash mismatch.')
314
def get_changeset(self, branch):
315
"""Return the meta information, and a Changeset tree which can
316
be used to populate the local stores and working tree, respectively.
318
self._validate_references_from_branch(branch)
319
cset_tree = ChangesetTree(branch.revision_tree(self.info.base))
320
self._update_tree(cset_tree)
322
inv = cset_tree.inventory
323
self._validate_inventory(inv)
325
return self.info, cset_tree
328
"""yield the next line, but secretly
329
keep 1 extra line for peeking.
331
for line in self.from_file:
332
last = self._next_line
333
self._next_line = line
335
#mutter('yielding line: %r' % last)
337
last = self._next_line
338
self._next_line = None
339
#mutter('yielding line: %r' % last)
342
def _read_header(self):
343
"""Read the bzr header"""
344
header = get_header()
346
for line in self._next():
348
# not all mailers will keep trailing whitespace
351
if (not line.startswith('# ') or not line.endswith('\n')
352
or decode(line[2:-1]) != header[0]):
353
raise MalformedHeader('Found a header, but it'
354
' was improperly formatted')
355
header.pop(0) # We read this line.
357
break # We found everything.
358
elif (line.startswith('#') and line.endswith('\n')):
359
line = decode(line[1:-1].strip())
360
if line[:len(header_str)] == header_str:
361
if line == header[0]:
364
raise MalformedHeader('Found what looks like'
365
' a header, but did not match')
368
raise MalformedHeader('Did not find an opening header')
370
for line in self._next():
371
# The bzr header is terminated with a blank line
372
# which does not start with '#'
375
self._handle_next(line)
377
def _read_next_entry(self, line, indent=1):
378
"""Read in a key-value pair
380
if not line.startswith('#'):
381
raise MalformedHeader('Bzr header did not start with #')
382
line = decode(line[1:-1]) # Remove the '#' and '\n'
383
if line[:indent] == ' '*indent:
386
return None, None# Ignore blank lines
388
loc = line.find(': ')
393
value = self._read_many(indent=indent+3)
394
elif line[-1:] == ':':
396
value = self._read_many(indent=indent+3)
398
raise MalformedHeader('While looking for key: value pairs,'
399
' did not find the colon %r' % (line))
401
key = key.replace(' ', '_')
402
#mutter('found %s: %s' % (key, value))
405
def _handle_next(self, line):
406
key, value = self._read_next_entry(line, indent=1)
407
mutter('_handle_next %r => %r' % (key, value))
411
if key == 'revision':
412
self._read_revision(value)
413
elif hasattr(self.info, key):
414
if getattr(self.info, key) is None:
415
setattr(self.info, key, value)
417
raise MalformedHeader('Duplicated Key: %s' % key)
419
# What do we do with a key we don't recognize
420
raise MalformedHeader('Unknown Key: %s' % key)
422
def _read_many(self, indent):
423
"""If a line ends with no entry, that means that it should be
424
followed with multiple lines of values.
426
This detects the end of the list, because it will be a line that
427
does not start properly indented.
430
start = '#' + (' '*indent)
432
if self._next_line is None or self._next_line[:len(start)] != start:
435
for line in self._next():
436
values.append(decode(line[len(start):-1]))
437
if self._next_line is None or self._next_line[:len(start)] != start:
441
def _read_one_patch(self):
442
"""Read in one patch, return the complete patch, along with
445
:return: action, lines, do_continue
447
#mutter('_read_one_patch: %r' % self._next_line)
448
# Peek and see if there are no patches
449
if self._next_line is None or self._next_line.startswith('#'):
450
return None, [], False
454
for line in self._next():
456
if not line.startswith('==='):
457
raise MalformedPatches('The first line of all patches'
458
' should be a bzr meta line "==="'
460
action = decode(line[4:-1])
461
if self._next_line is not None and self._next_line.startswith('==='):
462
return action, lines, True
463
elif self._next_line is None or self._next_line.startswith('#'):
464
return action, lines, False
471
return action, lines, False
473
def _read_patches(self):
476
action, lines, do_continue = self._read_one_patch()
477
if action is not None:
478
self.info.actions.append((action, lines))
480
def _read_revision(self, revision_id):
481
"""Revision entries have extra information associated.
483
rev_info = RevisionInfo(revision_id)
485
for line in self._next():
486
key,value = self._read_next_entry(line, indent=4)
489
if hasattr(rev_info, key):
490
if getattr(rev_info, key) is None:
491
setattr(rev_info, key, value)
493
raise MalformedHeader('Duplicated Key: %s' % key)
495
# What do we do with a key we don't recognize
496
raise MalformedHeader('Unknown Key: %s' % key)
498
if self._next_line is None or not self._next_line.startswith(start):
501
self.info.revisions.append(rev_info)
503
def _read_footer(self):
504
"""Read the rest of the meta information.
506
:param first_line: The previous step iterates past what it
507
can handle. That extra line is given here.
509
for line in self._next():
510
self._handle_next(line)
511
if self._next_line is None or not self._next_line.startswith('#'):
514
def _update_tree(self, cset_tree):
515
"""This fills out a ChangesetTree based on the information
518
:param cset_tree: A ChangesetTree to update with the new information.
521
def get_rev_id(info, file_id, kind):
523
if not info.startswith('last-changed:'):
524
raise BzrError("Last changed revision should start with 'last-changed:'"
526
revision_id = decode(info[13:])
527
elif cset_tree._last_changed.has_key(file_id):
528
return cset_tree._last_changed[file_id]
530
revision_id = self.info.target
531
cset_tree.note_last_changed(file_id, revision_id)
534
def renamed(kind, extra, lines):
535
info = extra.split(' // ')
537
raise BzrError('renamed action lines need both a from and to'
540
if info[1].startswith('=> '):
541
new_path = info[1][3:]
545
file_id = cset_tree.path2id(old_path)
547
revision = get_rev_id(info[2], file_id, kind)
549
revision = get_rev_id(None, file_id, kind)
550
cset_tree.note_rename(old_path, new_path)
552
cset_tree.note_patch(new_path, ''.join(lines))
554
def removed(kind, extra, lines):
555
info = extra.split(' // ')
557
# TODO: in the future we might allow file ids to be
558
# given for removed entries
559
raise BzrError('removed action lines should only have the path'
562
cset_tree.note_deletion(path)
564
def added(kind, extra, lines):
565
info = extra.split(' // ')
567
raise BzrError('add action lines require the path and file id'
570
raise BzrError('add action lines have fewer than 3 entries.'
573
if not info[1].startswith('file-id:'):
574
raise BzrError('The file-id should follow the path for an add'
576
file_id = info[1][8:]
578
cset_tree.note_id(file_id, path, kind)
580
revision = get_rev_id(info[2], file_id, kind)
582
revision = get_rev_id(None, file_id, kind)
583
if kind == 'directory':
585
cset_tree.note_patch(path, ''.join(lines))
587
def modified(kind, extra, lines):
588
info = extra.split(' // ')
590
raise BzrError('modified action lines have at least'
591
'the path in them: %r' % extra)
594
file_id = cset_tree.path2id(path)
596
revision = get_rev_id(info[1], file_id, kind)
598
revision = get_rev_id(None, file_id, kind)
599
cset_tree.note_patch(path, ''.join(lines))
608
for action_line, lines in self.info.actions:
609
first = action_line.find(' ')
611
raise BzrError('Bogus action line'
612
' (no opening space): %r' % action_line)
613
second = action_line.find(' ', first+1)
615
raise BzrError('Bogus action line'
616
' (missing second space): %r' % action_line)
617
action = action_line[:first]
618
kind = action_line[first+1:second]
619
if kind not in ('file', 'directory'):
620
raise BzrError('Bogus action line'
621
' (invalid object kind %r): %r' % (kind, action_line))
622
extra = action_line[second+1:]
624
if action not in valid_actions:
625
raise BzrError('Bogus action line'
626
' (unrecognized action): %r' % action_line)
627
valid_actions[action](kind, extra, lines)
629
def read_changeset(from_file, branch):
630
"""Read in a changeset from a iterable object (such as a file object)
632
:param from_file: A file-like object to read the changeset information.
633
:param branch: This will be used to build the changeset tree, it needs
634
to contain the base of the changeset. (Which you probably
635
won't know about until after the changeset is parsed.)
637
cr = ChangesetReader(from_file)
638
return cr.get_changeset(branch)
640
class ChangesetTree(Tree):
641
def __init__(self, base_tree):
642
self.base_tree = base_tree
643
self._renamed = {} # Mapping from old_path => new_path
644
self._renamed_r = {} # new_path => old_path
645
self._new_id = {} # new_path => new_id
646
self._new_id_r = {} # new_id => new_path
647
self._kinds = {} # new_id => kind
648
self._last_changed = {} # new_id => revision_id
651
self.contents_by_id = True
652
self._inventory = None
655
return pprint.pformat(self.__dict__)
657
def note_rename(self, old_path, new_path):
658
"""A file/directory has been renamed from old_path => new_path"""
659
assert not self._renamed.has_key(old_path)
660
assert not self._renamed_r.has_key(new_path)
661
self._renamed[new_path] = old_path
662
self._renamed_r[old_path] = new_path
664
def note_id(self, new_id, new_path, kind='file'):
665
"""Files that don't exist in base need a new id."""
666
self._new_id[new_path] = new_id
667
self._new_id_r[new_id] = new_path
668
self._kinds[new_id] = kind
670
def note_last_changed(self, file_id, revision_id):
671
if (self._last_changed.has_key(file_id)
672
and self._last_changed[file_id] != revision_id):
673
raise BzrError('Mismatched last-changed revision for file_id {%s}'
674
': %s != %s' % (file_id,
675
self._last_changed[file_id],
677
self._last_changed[file_id] = revision_id
679
def note_patch(self, new_path, patch):
680
"""There is a patch for a given filename."""
681
self.patches[new_path] = patch
683
def note_deletion(self, old_path):
684
"""The file at old_path has been deleted."""
685
self.deleted.append(old_path)
687
def old_path(self, new_path):
688
"""Get the old_path (path in the base_tree) for the file at new_path"""
689
assert new_path[:1] not in ('\\', '/')
690
old_path = self._renamed.get(new_path)
691
if old_path is not None:
693
dirname,basename = os.path.split(new_path)
694
# dirname is not '' doesn't work, because
695
# dirname may be a unicode entry, and is
696
# requires the objects to be identical
698
old_dir = self.old_path(dirname)
702
old_path = os.path.join(old_dir, basename)
705
#If the new path wasn't in renamed, the old one shouldn't be in
707
if self._renamed_r.has_key(old_path):
711
def new_path(self, old_path):
712
"""Get the new_path (path in the target_tree) for the file at old_path
715
assert old_path[:1] not in ('\\', '/')
716
new_path = self._renamed_r.get(old_path)
717
if new_path is not None:
719
if self._renamed.has_key(new_path):
721
dirname,basename = os.path.split(old_path)
723
new_dir = self.new_path(dirname)
727
new_path = os.path.join(new_dir, basename)
730
#If the old path wasn't in renamed, the new one shouldn't be in
732
if self._renamed.has_key(new_path):
736
def path2id(self, path):
737
"""Return the id of the file present at path in the target tree."""
738
file_id = self._new_id.get(path)
739
if file_id is not None:
741
old_path = self.old_path(path)
744
if old_path in self.deleted:
746
if hasattr(self.base_tree, 'path2id'):
747
return self.base_tree.path2id(old_path)
749
return self.base_tree.inventory.path2id(old_path)
751
def id2path(self, file_id):
752
"""Return the new path in the target tree of the file with id file_id"""
753
path = self._new_id_r.get(file_id)
756
old_path = self.base_tree.id2path(file_id)
759
if old_path in self.deleted:
761
return self.new_path(old_path)
763
def old_contents_id(self, file_id):
764
"""Return the id in the base_tree for the given file_id,
765
or None if the file did not exist in base.
767
FIXME: Something doesn't seem right here. It seems like this function
768
should always either return None or file_id. Even if
769
you are doing the by-path lookup, you are doing a
770
id2path lookup, just to do the reverse path2id lookup.
772
Notice that you're doing the path2id on a different tree!
774
if self.contents_by_id:
775
if self.base_tree.has_id(file_id):
779
new_path = self.id2path(file_id)
780
return self.base_tree.path2id(new_path)
782
def get_file(self, file_id):
783
"""Return a file-like object containing the new contents of the
784
file given by file_id.
786
TODO: It might be nice if this actually generated an entry
787
in the text-store, so that the file contents would
790
base_id = self.old_contents_id(file_id)
791
if base_id is not None:
792
patch_original = self.base_tree.get_file(base_id)
794
patch_original = None
795
file_patch = self.patches.get(self.id2path(file_id))
796
if file_patch is None:
797
return patch_original
799
assert not file_patch.startswith('\\'), \
800
'Malformed patch for %s, %r' % (file_id, file_patch)
801
return patched_file(file_patch, patch_original)
803
def get_kind(self, file_id):
804
if file_id in self._kinds:
805
return self._kinds[file_id]
806
return self.base_tree.inventory[file_id].kind
808
def get_last_changed(self, file_id):
809
if file_id in self._last_changed:
810
return self._last_changed[file_id]
811
return self.base_tree.inventory[file_id].revision
813
def get_size_and_sha1(self, file_id):
814
"""Return the size and sha1 hash of the given file id.
815
If the file was not locally modified, this is extracted
816
from the base_tree. Rather than re-reading the file.
818
new_path = self.id2path(file_id)
821
if new_path not in self.patches:
822
# If the entry does not have a patch, then the
823
# contents must be the same as in the base_tree
824
ie = self.base_tree.inventory[file_id]
825
if ie.text_size is None:
826
return ie.text_size, ie.text_sha1
827
return int(ie.text_size), ie.text_sha1
828
fileobj = self.get_file(file_id)
829
content = fileobj.read()
830
return len(content), sha_string(content)
833
def _get_inventory(self):
834
"""Build up the inventory entry for the ChangesetTree.
836
This need to be called before ever accessing self.inventory
838
from os.path import dirname, basename
840
assert self.base_tree is not None
841
base_inv = self.base_tree.inventory
842
root_id = base_inv.root.file_id
844
# New inventories have a unique root_id
845
inv = Inventory(root_id)
849
def add_entry(file_id):
850
path = self.id2path(file_id)
853
parent_path = dirname(path)
854
if parent_path == u'':
857
parent_id = self.path2id(parent_path)
859
kind = self.get_kind(file_id)
860
revision_id = self.get_last_changed(file_id)
862
name = basename(path)
863
if kind == 'directory':
864
ie = InventoryDirectory(file_id, name, parent_id)
866
ie = InventoryFile(file_id, name, parent_id)
867
elif kind == 'symlink':
868
ie = InventoryLink(file_id, name, parent_id)
869
ie.revision = revision_id
871
if kind == 'directory':
872
ie.text_size, ie.text_sha1 = None, None
874
ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
875
if (ie.text_size is None) and (kind != 'directory'):
876
raise BzrError('Got a text_size of None for file_id %r' % file_id)
879
sorted_entries = self.sorted_path_id()
880
for path, file_id in sorted_entries:
881
if file_id == inv.root.file_id:
887
# Have to overload the inherited inventory property
888
# because _get_inventory is only called in the parent.
889
# Reading the docs, property() objects do not use
890
# overloading, they use the function as it was defined
892
inventory = property(_get_inventory)
895
for path, entry in self.inventory.iter_entries():
898
def sorted_path_id(self):
900
for result in self._new_id.iteritems():
902
for id in self.base_tree:
903
path = self.id2path(id)
906
paths.append((path, id))
910
def patched_file(file_patch, original):
911
"""Produce a file-like object with the patched version of a text"""
912
from patches import iter_patched
913
from iterablefile import IterableFile
915
return IterableFile(())
916
return IterableFile(iter_patched(original, file_patch.splitlines(True)))