1
# Copyright (C) 2006 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
20
from cStringIO import StringIO
25
from bzrlib.errors import (TestamentMismatch, BzrError,
26
MalformedHeader, MalformedPatches, NotABundle)
27
from bzrlib.inventory import (Inventory, InventoryEntry,
28
InventoryDirectory, InventoryFile,
30
from bzrlib.osutils import sha_file, sha_string, pathjoin
31
from bzrlib.revision import Revision, NULL_REVISION
32
from bzrlib.testament import StrictTestament
33
from bzrlib.trace import mutter, warning
34
import bzrlib.transport
35
from bzrlib.tree import Tree
36
import bzrlib.urlutils
37
from bzrlib.xml5 import serializer_v5
40
class RevisionInfo(object):
41
"""Gets filled out for each revision object that is read.
43
def __init__(self, revision_id):
44
self.revision_id = revision_id
50
self.inventory_sha1 = None
52
self.parent_ids = None
55
self.properties = None
56
self.tree_actions = None
59
return pprint.pformat(self.__dict__)
61
def as_revision(self):
62
rev = Revision(revision_id=self.revision_id,
63
committer=self.committer,
64
timestamp=float(self.timestamp),
65
timezone=int(self.timezone),
66
inventory_sha1=self.inventory_sha1,
67
message='\n'.join(self.message))
70
rev.parent_ids.extend(self.parent_ids)
73
for property in self.properties:
74
key_end = property.find(': ')
75
assert key_end is not None
76
key = property[:key_end].encode('utf-8')
77
value = property[key_end+2:].encode('utf-8')
78
rev.properties[key] = value
83
class BundleInfo(object):
84
"""This contains the meta information. Stuff that allows you to
85
recreate the revision or inventory XML.
92
# A list of RevisionInfo objects
95
# The next entries are created during complete_info() and
96
# other post-read functions.
98
# A list of real Revision objects
99
self.real_revisions = []
101
self.timestamp = None
105
return pprint.pformat(self.__dict__)
107
def complete_info(self):
108
"""This makes sure that all information is properly
109
split up, based on the assumptions that can be made
110
when information is missing.
112
from bzrlib.bundle.serializer import unpack_highres_date
113
# Put in all of the guessable information.
114
if not self.timestamp and self.date:
115
self.timestamp, self.timezone = unpack_highres_date(self.date)
116
mutter('bundle date %s => %s, %s', self.date, self.timestamp, self.timezone)
118
self.real_revisions = []
119
for rev in self.revisions:
120
if rev.timestamp is None:
121
if rev.date is not None:
122
rev.timestamp, rev.timezone = \
123
unpack_highres_date(rev.date)
124
mutter('date %s => %s, %s', rev.date, rev.timestamp, rev.timezone)
126
rev.timestamp = self.timestamp
127
rev.timezone = self.timezone
128
if rev.message is None and self.message:
129
rev.message = self.message
130
if rev.committer is None and self.committer:
131
rev.committer = self.committer
132
self.real_revisions.append(rev.as_revision())
134
def get_base(self, revision):
135
revision_info = self.get_revision_info(revision.revision_id)
136
if revision_info.base_id is not None:
137
if revision_info.base_id == NULL_REVISION:
140
return revision_info.base_id
141
if len(revision.parent_ids) == 0:
142
# There is no base listed, and
143
# the lowest revision doesn't have a parent
144
# so this is probably against the empty tree
145
# and thus base truly is None
148
return revision.parent_ids[-1]
150
def _get_target(self):
151
"""Return the target revision."""
152
if len(self.real_revisions) > 0:
153
return self.real_revisions[0].revision_id
154
elif len(self.revisions) > 0:
155
return self.revisions[0].revision_id
158
target = property(_get_target, doc='The target revision id')
160
def get_revision(self, revision_id):
161
for r in self.real_revisions:
162
if r.revision_id == revision_id:
164
raise KeyError(revision_id)
166
def get_revision_info(self, revision_id):
167
for r in self.revisions:
168
if r.revision_id == revision_id:
170
raise KeyError(revision_id)
172
def revision_tree(self, repository, revision_id, base=None):
173
revision = self.get_revision(revision_id)
174
base = self.get_base(revision)
175
assert base != revision_id
176
self._validate_references_from_repository(repository)
177
revision_info = self.get_revision_info(revision_id)
178
inventory_revision_id = revision_id
179
bundle_tree = BundleTree(repository.revision_tree(base),
180
inventory_revision_id)
181
self._update_tree(bundle_tree, revision_id)
183
inv = bundle_tree.inventory
184
self._validate_inventory(inv, revision_id)
185
self._validate_revision(inv, revision_id)
189
def _validate_references_from_repository(self, repository):
190
"""Now that we have a repository which should have some of the
191
revisions we care about, go through and validate all of them
196
def add_sha(d, revision_id, sha1):
197
if revision_id is None:
199
raise BzrError('A Null revision should always'
200
'have a null sha1 hash')
203
# This really should have been validated as part
204
# of _validate_revisions but lets do it again
205
if sha1 != d[revision_id]:
206
raise BzrError('** Revision %r referenced with 2 different'
207
' sha hashes %s != %s' % (revision_id,
208
sha1, d[revision_id]))
210
d[revision_id] = sha1
212
# All of the contained revisions were checked
213
# in _validate_revisions
215
for rev_info in self.revisions:
216
checked[rev_info.revision_id] = True
217
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
219
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
220
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
224
for revision_id, sha1 in rev_to_sha.iteritems():
225
if repository.has_revision(revision_id):
226
testament = StrictTestament.from_revision(repository,
228
local_sha1 = testament.as_sha1()
229
if sha1 != local_sha1:
230
raise BzrError('sha1 mismatch. For revision id {%s}'
231
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
234
elif revision_id not in checked:
235
missing[revision_id] = sha1
237
for inv_id, sha1 in inv_to_sha.iteritems():
238
if repository.has_revision(inv_id):
239
# Note: branch.get_inventory_sha1() just returns the value that
240
# is stored in the revision text, and that value may be out
241
# of date. This is bogus, because that means we aren't
242
# validating the actual text, just that we wrote and read the
243
# string. But for now, what the hell.
244
local_sha1 = repository.get_inventory_sha1(inv_id)
245
if sha1 != local_sha1:
246
raise BzrError('sha1 mismatch. For inventory id {%s}'
247
'local: %s, bundle: %s' %
248
(inv_id, local_sha1, sha1))
253
# I don't know if this is an error yet
254
warning('Not all revision hashes could be validated.'
255
' Unable validate %d hashes' % len(missing))
256
mutter('Verified %d sha hashes for the bundle.' % count)
258
def _validate_inventory(self, inv, revision_id):
259
"""At this point we should have generated the BundleTree,
260
so build up an inventory, and make sure the hashes match.
263
assert inv is not None
265
# Now we should have a complete inventory entry.
266
s = serializer_v5.write_inventory_to_string(inv)
268
# Target revision is the last entry in the real_revisions list
269
rev = self.get_revision(revision_id)
270
assert rev.revision_id == revision_id
271
if sha1 != rev.inventory_sha1:
272
open(',,bogus-inv', 'wb').write(s)
273
warning('Inventory sha hash mismatch for revision %s. %s'
274
' != %s' % (revision_id, sha1, rev.inventory_sha1))
276
def _validate_revision(self, inventory, revision_id):
277
"""Make sure all revision entries match their checksum."""
279
# This is a mapping from each revision id to it's sha hash
282
rev = self.get_revision(revision_id)
283
rev_info = self.get_revision_info(revision_id)
284
assert rev.revision_id == rev_info.revision_id
285
assert rev.revision_id == revision_id
286
sha1 = StrictTestament(rev, inventory).as_sha1()
287
if sha1 != rev_info.sha1:
288
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
289
if rev_to_sha1.has_key(rev.revision_id):
290
raise BzrError('Revision {%s} given twice in the list'
292
rev_to_sha1[rev.revision_id] = sha1
294
def _update_tree(self, bundle_tree, revision_id):
295
"""This fills out a BundleTree based on the information
298
:param bundle_tree: A BundleTree to update with the new information.
301
def get_rev_id(last_changed, path, kind):
302
if last_changed is not None:
303
changed_revision_id = last_changed.decode('utf-8')
305
changed_revision_id = revision_id
306
bundle_tree.note_last_changed(path, changed_revision_id)
307
return changed_revision_id
309
def extra_info(info, new_path):
312
for info_item in info:
314
name, value = info_item.split(':', 1)
316
raise 'Value %r has no colon' % info_item
317
if name == 'last-changed':
319
elif name == 'executable':
320
assert value in ('yes', 'no'), value
321
val = (value == 'yes')
322
bundle_tree.note_executable(new_path, val)
323
elif name == 'target':
324
bundle_tree.note_target(new_path, value)
325
elif name == 'encoding':
327
return last_changed, encoding
329
def do_patch(path, lines, encoding):
330
if encoding is not None:
331
assert encoding == 'base64'
332
patch = base64.decodestring(''.join(lines))
334
patch = ''.join(lines)
335
bundle_tree.note_patch(path, patch)
337
def renamed(kind, extra, lines):
338
info = extra.split(' // ')
340
raise BzrError('renamed action lines need both a from and to'
343
if info[1].startswith('=> '):
344
new_path = info[1][3:]
348
bundle_tree.note_rename(old_path, new_path)
349
last_modified, encoding = extra_info(info[2:], new_path)
350
revision = get_rev_id(last_modified, new_path, kind)
352
do_patch(new_path, lines, encoding)
354
def removed(kind, extra, lines):
355
info = extra.split(' // ')
357
# TODO: in the future we might allow file ids to be
358
# given for removed entries
359
raise BzrError('removed action lines should only have the path'
362
bundle_tree.note_deletion(path)
364
def added(kind, extra, lines):
365
info = extra.split(' // ')
367
raise BzrError('add action lines require the path and file id'
370
raise BzrError('add action lines have fewer than 5 entries.'
373
if not info[1].startswith('file-id:'):
374
raise BzrError('The file-id should follow the path for an add'
376
file_id = info[1][8:]
378
bundle_tree.note_id(file_id, path, kind)
379
# this will be overridden in extra_info if executable is specified.
380
bundle_tree.note_executable(path, False)
381
last_changed, encoding = extra_info(info[2:], path)
382
revision = get_rev_id(last_changed, path, kind)
383
if kind == 'directory':
385
do_patch(path, lines, encoding)
387
def modified(kind, extra, lines):
388
info = extra.split(' // ')
390
raise BzrError('modified action lines have at least'
391
'the path in them: %r' % extra)
394
last_modified, encoding = extra_info(info[1:], path)
395
revision = get_rev_id(last_modified, path, kind)
397
do_patch(path, lines, encoding)
405
for action_line, lines in \
406
self.get_revision_info(revision_id).tree_actions:
407
first = action_line.find(' ')
409
raise BzrError('Bogus action line'
410
' (no opening space): %r' % action_line)
411
second = action_line.find(' ', first+1)
413
raise BzrError('Bogus action line'
414
' (missing second space): %r' % action_line)
415
action = action_line[:first]
416
kind = action_line[first+1:second]
417
if kind not in ('file', 'directory', 'symlink'):
418
raise BzrError('Bogus action line'
419
' (invalid object kind %r): %r' % (kind, action_line))
420
extra = action_line[second+1:]
422
if action not in valid_actions:
423
raise BzrError('Bogus action line'
424
' (unrecognized action): %r' % action_line)
425
valid_actions[action](kind, extra, lines)
428
class BundleTree(Tree):
429
def __init__(self, base_tree, revision_id):
430
self.base_tree = base_tree
431
self._renamed = {} # Mapping from old_path => new_path
432
self._renamed_r = {} # new_path => old_path
433
self._new_id = {} # new_path => new_id
434
self._new_id_r = {} # new_id => new_path
435
self._kinds = {} # new_id => kind
436
self._last_changed = {} # new_id => revision_id
437
self._executable = {} # new_id => executable value
439
self._targets = {} # new path => new symlink target
441
self.contents_by_id = True
442
self.revision_id = revision_id
443
self._inventory = None
446
return pprint.pformat(self.__dict__)
448
def note_rename(self, old_path, new_path):
449
"""A file/directory has been renamed from old_path => new_path"""
450
assert not self._renamed.has_key(new_path)
451
assert not self._renamed_r.has_key(old_path)
452
self._renamed[new_path] = old_path
453
self._renamed_r[old_path] = new_path
455
def note_id(self, new_id, new_path, kind='file'):
456
"""Files that don't exist in base need a new id."""
457
self._new_id[new_path] = new_id
458
self._new_id_r[new_id] = new_path
459
self._kinds[new_id] = kind
461
def note_last_changed(self, file_id, revision_id):
462
if (self._last_changed.has_key(file_id)
463
and self._last_changed[file_id] != revision_id):
464
raise BzrError('Mismatched last-changed revision for file_id {%s}'
465
': %s != %s' % (file_id,
466
self._last_changed[file_id],
468
self._last_changed[file_id] = revision_id
470
def note_patch(self, new_path, patch):
471
"""There is a patch for a given filename."""
472
self.patches[new_path] = patch
474
def note_target(self, new_path, target):
475
"""The symlink at the new path has the given target"""
476
self._targets[new_path] = target
478
def note_deletion(self, old_path):
479
"""The file at old_path has been deleted."""
480
self.deleted.append(old_path)
482
def note_executable(self, new_path, executable):
483
self._executable[new_path] = executable
485
def old_path(self, new_path):
486
"""Get the old_path (path in the base_tree) for the file at new_path"""
487
assert new_path[:1] not in ('\\', '/')
488
old_path = self._renamed.get(new_path)
489
if old_path is not None:
491
dirname,basename = os.path.split(new_path)
492
# dirname is not '' doesn't work, because
493
# dirname may be a unicode entry, and is
494
# requires the objects to be identical
496
old_dir = self.old_path(dirname)
500
old_path = pathjoin(old_dir, basename)
503
#If the new path wasn't in renamed, the old one shouldn't be in
505
if self._renamed_r.has_key(old_path):
509
def new_path(self, old_path):
510
"""Get the new_path (path in the target_tree) for the file at old_path
513
assert old_path[:1] not in ('\\', '/')
514
new_path = self._renamed_r.get(old_path)
515
if new_path is not None:
517
if self._renamed.has_key(new_path):
519
dirname,basename = os.path.split(old_path)
521
new_dir = self.new_path(dirname)
525
new_path = pathjoin(new_dir, basename)
528
#If the old path wasn't in renamed, the new one shouldn't be in
530
if self._renamed.has_key(new_path):
534
def path2id(self, path):
535
"""Return the id of the file present at path in the target tree."""
536
file_id = self._new_id.get(path)
537
if file_id is not None:
539
old_path = self.old_path(path)
542
if old_path in self.deleted:
544
if hasattr(self.base_tree, 'path2id'):
545
return self.base_tree.path2id(old_path)
547
return self.base_tree.inventory.path2id(old_path)
549
def id2path(self, file_id):
550
"""Return the new path in the target tree of the file with id file_id"""
551
path = self._new_id_r.get(file_id)
554
old_path = self.base_tree.id2path(file_id)
557
if old_path in self.deleted:
559
return self.new_path(old_path)
561
def old_contents_id(self, file_id):
562
"""Return the id in the base_tree for the given file_id.
563
Return None if the file did not exist in base.
565
if self.contents_by_id:
566
if self.base_tree.has_id(file_id):
570
new_path = self.id2path(file_id)
571
return self.base_tree.path2id(new_path)
573
def get_file(self, file_id):
574
"""Return a file-like object containing the new contents of the
575
file given by file_id.
577
TODO: It might be nice if this actually generated an entry
578
in the text-store, so that the file contents would
581
base_id = self.old_contents_id(file_id)
582
if base_id is not None:
583
patch_original = self.base_tree.get_file(base_id)
585
patch_original = None
586
file_patch = self.patches.get(self.id2path(file_id))
587
if file_patch is None:
588
if (patch_original is None and
589
self.get_kind(file_id) == 'directory'):
591
assert patch_original is not None, "None: %s" % file_id
592
return patch_original
594
assert not file_patch.startswith('\\'), \
595
'Malformed patch for %s, %r' % (file_id, file_patch)
596
return patched_file(file_patch, patch_original)
598
def get_symlink_target(self, file_id):
599
new_path = self.id2path(file_id)
601
return self._targets[new_path]
603
return self.base_tree.get_symlink_target(file_id)
605
def get_kind(self, file_id):
606
if file_id in self._kinds:
607
return self._kinds[file_id]
608
return self.base_tree.inventory[file_id].kind
610
def is_executable(self, file_id):
611
path = self.id2path(file_id)
612
if path in self._executable:
613
return self._executable[path]
615
return self.base_tree.inventory[file_id].executable
617
def get_last_changed(self, file_id):
618
path = self.id2path(file_id)
619
if path in self._last_changed:
620
return self._last_changed[path]
621
return self.base_tree.inventory[file_id].revision
623
def get_size_and_sha1(self, file_id):
624
"""Return the size and sha1 hash of the given file id.
625
If the file was not locally modified, this is extracted
626
from the base_tree. Rather than re-reading the file.
628
new_path = self.id2path(file_id)
631
if new_path not in self.patches:
632
# If the entry does not have a patch, then the
633
# contents must be the same as in the base_tree
634
ie = self.base_tree.inventory[file_id]
635
if ie.text_size is None:
636
return ie.text_size, ie.text_sha1
637
return int(ie.text_size), ie.text_sha1
638
fileobj = self.get_file(file_id)
639
content = fileobj.read()
640
return len(content), sha_string(content)
642
def _get_inventory(self):
643
"""Build up the inventory entry for the BundleTree.
645
This need to be called before ever accessing self.inventory
647
from os.path import dirname, basename
649
assert self.base_tree is not None
650
base_inv = self.base_tree.inventory
651
root_id = base_inv.root.file_id
653
# New inventories have a unique root_id
654
inv = Inventory(root_id, self.revision_id)
656
inv = Inventory(revision_id=self.revision_id)
658
def add_entry(file_id):
659
path = self.id2path(file_id)
662
parent_path = dirname(path)
663
if parent_path == u'':
666
parent_id = self.path2id(parent_path)
668
kind = self.get_kind(file_id)
669
revision_id = self.get_last_changed(file_id)
671
name = basename(path)
672
if kind == 'directory':
673
ie = InventoryDirectory(file_id, name, parent_id)
675
ie = InventoryFile(file_id, name, parent_id)
676
ie.executable = self.is_executable(file_id)
677
elif kind == 'symlink':
678
ie = InventoryLink(file_id, name, parent_id)
679
ie.symlink_target = self.get_symlink_target(file_id)
680
ie.revision = revision_id
682
if kind in ('directory', 'symlink'):
683
ie.text_size, ie.text_sha1 = None, None
685
ie.text_size, ie.text_sha1 = self.get_size_and_sha1(file_id)
686
if (ie.text_size is None) and (kind == 'file'):
687
raise BzrError('Got a text_size of None for file_id %r' % file_id)
690
sorted_entries = self.sorted_path_id()
691
for path, file_id in sorted_entries:
692
if file_id == inv.root.file_id:
698
# Have to overload the inherited inventory property
699
# because _get_inventory is only called in the parent.
700
# Reading the docs, property() objects do not use
701
# overloading, they use the function as it was defined
703
inventory = property(_get_inventory)
706
for path, entry in self.inventory.iter_entries():
709
def sorted_path_id(self):
711
for result in self._new_id.iteritems():
713
for id in self.base_tree:
714
path = self.id2path(id)
717
paths.append((path, id))
722
def patched_file(file_patch, original):
723
"""Produce a file-like object with the patched version of a text"""
724
from bzrlib.patches import iter_patched
725
from bzrlib.iterablefile import IterableFile
727
return IterableFile(())
728
return IterableFile(iter_patched(original, file_patch.splitlines(True)))