1
# Copyright (C) 2006 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
20
from cStringIO import StringIO
25
from bzrlib.errors import (TestamentMismatch, BzrError,
26
MalformedHeader, MalformedPatches, NotABundle)
27
from bzrlib.inventory import (Inventory, InventoryEntry,
28
InventoryDirectory, InventoryFile,
29
InventoryLink, ROOT_ID)
30
from bzrlib.osutils import sha_file, sha_string, pathjoin
31
from bzrlib.revision import Revision, NULL_REVISION
32
from bzrlib.testament import StrictTestament
33
from bzrlib.trace import mutter, warning
34
import bzrlib.transport
35
from bzrlib.tree import Tree
36
import bzrlib.urlutils
37
from bzrlib.xml5 import serializer_v5
40
class RevisionInfo(object):
41
"""Gets filled out for each revision object that is read.
43
def __init__(self, revision_id):
44
self.revision_id = revision_id
50
self.inventory_sha1 = None
52
self.parent_ids = None
55
self.properties = None
56
self.tree_actions = None
59
return pprint.pformat(self.__dict__)
61
def as_revision(self):
62
rev = Revision(revision_id=self.revision_id,
63
committer=self.committer,
64
timestamp=float(self.timestamp),
65
timezone=int(self.timezone),
66
inventory_sha1=self.inventory_sha1,
67
message='\n'.join(self.message))
70
rev.parent_ids.extend(self.parent_ids)
73
for property in self.properties:
74
key_end = property.find(': ')
75
assert key_end is not None
76
key = property[:key_end].encode('utf-8')
77
value = property[key_end+2:].encode('utf-8')
78
rev.properties[key] = value
83
class BundleInfo(object):
84
"""This contains the meta information. Stuff that allows you to
85
recreate the revision or inventory XML.
92
# A list of RevisionInfo objects
95
# The next entries are created during complete_info() and
96
# other post-read functions.
98
# A list of real Revision objects
99
self.real_revisions = []
101
self.timestamp = None
105
return pprint.pformat(self.__dict__)
107
def complete_info(self):
108
"""This makes sure that all information is properly
109
split up, based on the assumptions that can be made
110
when information is missing.
112
from bzrlib.bundle.serializer import unpack_highres_date
113
# Put in all of the guessable information.
114
if not self.timestamp and self.date:
115
self.timestamp, self.timezone = unpack_highres_date(self.date)
117
self.real_revisions = []
118
for rev in self.revisions:
119
if rev.timestamp is None:
120
if rev.date is not None:
121
rev.timestamp, rev.timezone = \
122
unpack_highres_date(rev.date)
124
rev.timestamp = self.timestamp
125
rev.timezone = self.timezone
126
if rev.message is None and self.message:
127
rev.message = self.message
128
if rev.committer is None and self.committer:
129
rev.committer = self.committer
130
self.real_revisions.append(rev.as_revision())
132
def get_base(self, revision):
133
revision_info = self.get_revision_info(revision.revision_id)
134
if revision_info.base_id is not None:
135
if revision_info.base_id == NULL_REVISION:
138
return revision_info.base_id
139
if len(revision.parent_ids) == 0:
140
# There is no base listed, and
141
# the lowest revision doesn't have a parent
142
# so this is probably against the empty tree
143
# and thus base truly is None
146
return revision.parent_ids[-1]
148
def _get_target(self):
149
"""Return the target revision."""
150
if len(self.real_revisions) > 0:
151
return self.real_revisions[0].revision_id
152
elif len(self.revisions) > 0:
153
return self.revisions[0].revision_id
156
target = property(_get_target, doc='The target revision id')
158
def get_revision(self, revision_id):
159
for r in self.real_revisions:
160
if r.revision_id == revision_id:
162
raise KeyError(revision_id)
164
def get_revision_info(self, revision_id):
165
for r in self.revisions:
166
if r.revision_id == revision_id:
168
raise KeyError(revision_id)
170
def revision_tree(self, repository, revision_id, base=None):
171
revision = self.get_revision(revision_id)
172
base = self.get_base(revision)
173
assert base != revision_id
174
self._validate_references_from_repository(repository)
175
revision_info = self.get_revision_info(revision_id)
176
inventory_revision_id = revision_id
177
bundle_tree = BundleTree(repository.revision_tree(base),
178
inventory_revision_id)
179
self._update_tree(bundle_tree, revision_id)
181
inv = bundle_tree.inventory
182
self._validate_inventory(inv, revision_id)
183
self._validate_revision(inv, revision_id)
187
def _validate_references_from_repository(self, repository):
188
"""Now that we have a repository which should have some of the
189
revisions we care about, go through and validate all of them
194
def add_sha(d, revision_id, sha1):
195
if revision_id is None:
197
raise BzrError('A Null revision should always'
198
'have a null sha1 hash')
201
# This really should have been validated as part
202
# of _validate_revisions but lets do it again
203
if sha1 != d[revision_id]:
204
raise BzrError('** Revision %r referenced with 2 different'
205
' sha hashes %s != %s' % (revision_id,
206
sha1, d[revision_id]))
208
d[revision_id] = sha1
210
# All of the contained revisions were checked
211
# in _validate_revisions
213
for rev_info in self.revisions:
214
checked[rev_info.revision_id] = True
215
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
217
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
218
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
222
for revision_id, sha1 in rev_to_sha.iteritems():
223
if repository.has_revision(revision_id):
224
testament = StrictTestament.from_revision(repository,
226
local_sha1 = testament.as_sha1()
227
if sha1 != local_sha1:
228
raise BzrError('sha1 mismatch. For revision id {%s}'
229
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
232
elif revision_id not in checked:
233
missing[revision_id] = sha1
235
for inv_id, sha1 in inv_to_sha.iteritems():
236
if repository.has_revision(inv_id):
237
# Note: branch.get_inventory_sha1() just returns the value that
238
# is stored in the revision text, and that value may be out
239
# of date. This is bogus, because that means we aren't
240
# validating the actual text, just that we wrote and read the
241
# string. But for now, what the hell.
242
local_sha1 = repository.get_inventory_sha1(inv_id)
243
if sha1 != local_sha1:
244
raise BzrError('sha1 mismatch. For inventory id {%s}'
245
'local: %s, bundle: %s' %
246
(inv_id, local_sha1, sha1))
251
# I don't know if this is an error yet
252
warning('Not all revision hashes could be validated.'
253
' Unable validate %d hashes' % len(missing))
254
mutter('Verified %d sha hashes for the bundle.' % count)
256
def _validate_inventory(self, inv, revision_id):
257
"""At this point we should have generated the BundleTree,
258
so build up an inventory, and make sure the hashes match.
261
assert inv is not None
263
# Now we should have a complete inventory entry.
264
s = serializer_v5.write_inventory_to_string(inv)
266
# Target revision is the last entry in the real_revisions list
267
rev = self.get_revision(revision_id)
268
assert rev.revision_id == revision_id
269
if sha1 != rev.inventory_sha1:
270
open(',,bogus-inv', 'wb').write(s)
271
warning('Inventory sha hash mismatch for revision %s. %s'
272
' != %s' % (revision_id, sha1, rev.inventory_sha1))
274
def _validate_revision(self, inventory, revision_id):
275
"""Make sure all revision entries match their checksum."""
277
# This is a mapping from each revision id to it's sha hash
280
rev = self.get_revision(revision_id)
281
rev_info = self.get_revision_info(revision_id)
282
assert rev.revision_id == rev_info.revision_id
283
assert rev.revision_id == revision_id
284
sha1 = StrictTestament(rev, inventory).as_sha1()
285
if sha1 != rev_info.sha1:
286
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
287
if rev.revision_id in rev_to_sha1:
288
raise BzrError('Revision {%s} given twice in the list'
290
rev_to_sha1[rev.revision_id] = sha1
292
def _update_tree(self, bundle_tree, revision_id):
293
"""This fills out a BundleTree based on the information
296
:param bundle_tree: A BundleTree to update with the new information.
299
def get_rev_id(last_changed, path, kind):
300
if last_changed is not None:
301
changed_revision_id = last_changed.decode('utf-8')
303
changed_revision_id = revision_id
304
bundle_tree.note_last_changed(path, changed_revision_id)
305
return changed_revision_id
307
def extra_info(info, new_path):
310
for info_item in info:
312
name, value = info_item.split(':', 1)
314
raise 'Value %r has no colon' % info_item
315
if name == 'last-changed':
317
elif name == 'executable':
318
assert value in ('yes', 'no'), value
319
val = (value == 'yes')
320
bundle_tree.note_executable(new_path, val)
321
elif name == 'target':
322
bundle_tree.note_target(new_path, value)
323
elif name == 'encoding':
325
return last_changed, encoding
327
def do_patch(path, lines, encoding):
328
if encoding is not None:
329
assert encoding == 'base64'
330
patch = base64.decodestring(''.join(lines))
332
patch = ''.join(lines)
333
bundle_tree.note_patch(path, patch)
335
def renamed(kind, extra, lines):
336
info = extra.split(' // ')
338
raise BzrError('renamed action lines need both a from and to'
341
if info[1].startswith('=> '):
342
new_path = info[1][3:]
346
bundle_tree.note_rename(old_path, new_path)
347
last_modified, encoding = extra_info(info[2:], new_path)
348
revision = get_rev_id(last_modified, new_path, kind)
350
do_patch(new_path, lines, encoding)
352
def removed(kind, extra, lines):
353
info = extra.split(' // ')
355
# TODO: in the future we might allow file ids to be
356
# given for removed entries
357
raise BzrError('removed action lines should only have the path'
360
bundle_tree.note_deletion(path)
362
def added(kind, extra, lines):
363
info = extra.split(' // ')
365
raise BzrError('add action lines require the path and file id'
368
raise BzrError('add action lines have fewer than 5 entries.'
371
if not info[1].startswith('file-id:'):
372
raise BzrError('The file-id should follow the path for an add'
374
file_id = info[1][8:]
376
bundle_tree.note_id(file_id, path, kind)
377
# this will be overridden in extra_info if executable is specified.
378
bundle_tree.note_executable(path, False)
379
last_changed, encoding = extra_info(info[2:], path)
380
revision = get_rev_id(last_changed, path, kind)
381
if kind == 'directory':
383
do_patch(path, lines, encoding)
385
def modified(kind, extra, lines):
386
info = extra.split(' // ')
388
raise BzrError('modified action lines have at least'
389
'the path in them: %r' % extra)
392
last_modified, encoding = extra_info(info[1:], path)
393
revision = get_rev_id(last_modified, path, kind)
395
do_patch(path, lines, encoding)
403
for action_line, lines in \
404
self.get_revision_info(revision_id).tree_actions:
405
first = action_line.find(' ')
407
raise BzrError('Bogus action line'
408
' (no opening space): %r' % action_line)
409
second = action_line.find(' ', first+1)
411
raise BzrError('Bogus action line'
412
' (missing second space): %r' % action_line)
413
action = action_line[:first]
414
kind = action_line[first+1:second]
415
if kind not in ('file', 'directory', 'symlink'):
416
raise BzrError('Bogus action line'
417
' (invalid object kind %r): %r' % (kind, action_line))
418
extra = action_line[second+1:]
420
if action not in valid_actions:
421
raise BzrError('Bogus action line'
422
' (unrecognized action): %r' % action_line)
423
valid_actions[action](kind, extra, lines)
426
class BundleTree(Tree):
427
def __init__(self, base_tree, revision_id):
428
self.base_tree = base_tree
429
self._renamed = {} # Mapping from old_path => new_path
430
self._renamed_r = {} # new_path => old_path
431
self._new_id = {} # new_path => new_id
432
self._new_id_r = {} # new_id => new_path
433
self._kinds = {} # new_id => kind
434
self._last_changed = {} # new_id => revision_id
435
self._executable = {} # new_id => executable value
437
self._targets = {} # new path => new symlink target
439
self.contents_by_id = True
440
self.revision_id = revision_id
441
self._inventory = None
444
def _true_path(path):
451
return pprint.pformat(self.__dict__)
453
def note_rename(self, old_path, new_path):
454
"""A file/directory has been renamed from old_path => new_path"""
455
new_path = self._true_path(new_path)
456
old_path = self._true_path(old_path)
457
assert new_path not in self._renamed
458
assert old_path not in self._renamed_r
459
self._renamed[new_path] = old_path
460
self._renamed_r[old_path] = new_path
462
def note_id(self, new_id, new_path, kind='file'):
463
"""Files that don't exist in base need a new id."""
464
new_path = self._true_path(new_path)
465
self._new_id[new_path] = new_id
466
self._new_id_r[new_id] = new_path
467
self._kinds[new_id] = kind
469
def note_last_changed(self, file_id, revision_id):
470
if (file_id in self._last_changed
471
and self._last_changed[file_id] != revision_id):
472
raise BzrError('Mismatched last-changed revision for file_id {%s}'
473
': %s != %s' % (file_id,
474
self._last_changed[file_id],
476
self._last_changed[file_id] = revision_id
478
def note_patch(self, new_path, patch):
479
"""There is a patch for a given filename."""
480
self.patches[self._true_path(new_path)] = patch
482
def note_target(self, new_path, target):
483
"""The symlink at the new path has the given target"""
484
self._targets[self._true_path(new_path)] = target
486
def note_deletion(self, old_path):
487
"""The file at old_path has been deleted."""
488
self.deleted.append(self._true_path(old_path))
490
def note_executable(self, new_path, executable):
491
self._executable[self._true_path(new_path)] = executable
493
def old_path(self, new_path):
494
"""Get the old_path (path in the base_tree) for the file at new_path"""
495
new_path = self._true_path(new_path)
496
assert new_path[:1] not in ('\\', '/')
497
old_path = self._renamed.get(new_path)
498
if old_path is not None:
500
dirname,basename = os.path.split(new_path)
501
# dirname is not '' doesn't work, because
502
# dirname may be a unicode entry, and is
503
# requires the objects to be identical
505
old_dir = self.old_path(dirname)
509
old_path = pathjoin(old_dir, basename)
512
#If the new path wasn't in renamed, the old one shouldn't be in
514
if old_path in self._renamed_r:
518
def new_path(self, old_path):
519
"""Get the new_path (path in the target_tree) for the file at old_path
522
old_path = self._true_path(old_path)
523
assert old_path[:1] not in ('\\', '/')
524
new_path = self._renamed_r.get(old_path)
525
if new_path is not None:
527
if new_path in self._renamed:
529
dirname,basename = os.path.split(old_path)
531
new_dir = self.new_path(dirname)
535
new_path = pathjoin(new_dir, basename)
538
#If the old path wasn't in renamed, the new one shouldn't be in
540
if new_path in self._renamed:
544
def path2id(self, path):
545
"""Return the id of the file present at path in the target tree."""
546
file_id = self._new_id.get(path)
547
if file_id is not None:
549
old_path = self.old_path(path)
552
if old_path in self.deleted:
554
if getattr(self.base_tree, 'path2id', None) is not None:
555
return self.base_tree.path2id(old_path)
557
return self.base_tree.inventory.path2id(old_path)
559
def id2path(self, file_id):
560
"""Return the new path in the target tree of the file with id file_id"""
561
path = self._new_id_r.get(file_id)
564
old_path = self.base_tree.id2path(file_id)
567
if old_path in self.deleted:
569
return self.new_path(old_path)
571
def old_contents_id(self, file_id):
572
"""Return the id in the base_tree for the given file_id.
573
Return None if the file did not exist in base.
575
if self.contents_by_id:
576
if self.base_tree.has_id(file_id):
580
new_path = self.id2path(file_id)
581
return self.base_tree.path2id(new_path)
583
def get_file(self, file_id):
584
"""Return a file-like object containing the new contents of the
585
file given by file_id.
587
TODO: It might be nice if this actually generated an entry
588
in the text-store, so that the file contents would
591
base_id = self.old_contents_id(file_id)
592
if base_id is not None:
593
patch_original = self.base_tree.get_file(base_id)
595
patch_original = None
596
file_patch = self.patches.get(self.id2path(file_id))
597
if file_patch is None:
598
if (patch_original is None and
599
self.get_kind(file_id) == 'directory'):
601
assert patch_original is not None, "None: %s" % file_id
602
return patch_original
604
assert not file_patch.startswith('\\'), \
605
'Malformed patch for %s, %r' % (file_id, file_patch)
606
return patched_file(file_patch, patch_original)
608
def get_symlink_target(self, file_id):
609
new_path = self.id2path(file_id)
611
return self._targets[new_path]
613
return self.base_tree.get_symlink_target(file_id)
615
def get_kind(self, file_id):
616
if file_id in self._kinds:
617
return self._kinds[file_id]
618
return self.base_tree.inventory[file_id].kind
620
def is_executable(self, file_id):
621
path = self.id2path(file_id)
622
if path in self._executable:
623
return self._executable[path]
625
return self.base_tree.inventory[file_id].executable
627
def get_last_changed(self, file_id):
628
path = self.id2path(file_id)
629
if path in self._last_changed:
630
return self._last_changed[path]
631
return self.base_tree.inventory[file_id].revision
633
def get_size_and_sha1(self, file_id):
634
"""Return the size and sha1 hash of the given file id.
635
If the file was not locally modified, this is extracted
636
from the base_tree. Rather than re-reading the file.
638
new_path = self.id2path(file_id)
641
if new_path not in self.patches:
642
# If the entry does not have a patch, then the
643
# contents must be the same as in the base_tree
644
ie = self.base_tree.inventory[file_id]
645
if ie.text_size is None:
646
return ie.text_size, ie.text_sha1
647
return int(ie.text_size), ie.text_sha1
648
fileobj = self.get_file(file_id)
649
content = fileobj.read()
650
return len(content), sha_string(content)
652
def _get_inventory(self):
653
"""Build up the inventory entry for the BundleTree.
655
This need to be called before ever accessing self.inventory
657
from os.path import dirname, basename
659
assert self.base_tree is not None
660
base_inv = self.base_tree.inventory
662
if base_inv.root is not None:
663
root_id = base_inv.root.file_id
665
root_id = self._new_id['']
669
# New inventories have a unique root_id
670
inv = Inventory(root_id, self.revision_id)
672
inv = Inventory(revision_id=self.revision_id)
673
inv.root.revision = self.get_last_changed(root_id)
675
def add_entry(file_id):
676
path = self.id2path(file_id)
679
parent_path = dirname(path)
680
if parent_path == u'':
683
parent_id = self.path2id(parent_path)
685
kind = self.get_kind(file_id)
686
revision_id = self.get_last_changed(file_id)
688
name = basename(path)
689
if kind == 'directory':
690
ie = InventoryDirectory(file_id, name, parent_id)
692
ie = InventoryFile(file_id, name, parent_id)
693
ie.executable = self.is_executable(file_id)
694
elif kind == 'symlink':
695
ie = InventoryLink(file_id, name, parent_id)
696
ie.symlink_target = self.get_symlink_target(file_id)
697
ie.revision = revision_id
699
if kind in ('directory', 'symlink'):
700
ie.text_size, ie.text_sha1 = None, None
702
ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
703
if (ie.text_size is None) and (kind == 'file'):
704
raise BzrError('Got a text_size of None for file_id %r' % file_id)
707
sorted_entries = self.sorted_path_id()
708
for path, file_id in sorted_entries:
709
if file_id == inv.root.file_id:
715
# Have to overload the inherited inventory property
716
# because _get_inventory is only called in the parent.
717
# Reading the docs, property() objects do not use
718
# overloading, they use the function as it was defined
720
inventory = property(_get_inventory)
723
for path, entry in self.inventory.iter_entries():
726
def sorted_path_id(self):
728
for result in self._new_id.iteritems():
730
for id in self.base_tree:
731
path = self.id2path(id)
734
paths.append((path, id))
739
def patched_file(file_patch, original):
740
"""Produce a file-like object with the patched version of a text"""
741
from bzrlib.patches import iter_patched
742
from bzrlib.iterablefile import IterableFile
744
return IterableFile(())
745
# string.splitlines(True) also splits on '\r', but the iter_patched code
746
# only expects to iterate over '\n' style lines
747
return IterableFile(iter_patched(original,
748
StringIO(file_patch).readlines()))