1
# Copyright (C) 2005-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
19
from __future__ import absolute_import
22
from io import BytesIO
31
from . import apply_bundle
32
from ..errors import (
36
from ..bzr.inventory import (
42
from ..osutils import sha_string, pathjoin
43
from ..revision import Revision, NULL_REVISION
44
from ..sixish import (
47
from ..testament import StrictTestament
48
from ..trace import mutter, warning
49
from ..tree import Tree
50
from ..bzr.xml5 import serializer_v5
53
class RevisionInfo(object):
54
"""Gets filled out for each revision object that is read.
57
def __init__(self, revision_id):
58
self.revision_id = revision_id
64
self.inventory_sha1 = None
66
self.parent_ids = None
69
self.properties = None
70
self.tree_actions = None
73
return pprint.pformat(self.__dict__)
75
def as_revision(self):
76
rev = Revision(revision_id=self.revision_id,
77
committer=self.committer,
78
timestamp=float(self.timestamp),
79
timezone=int(self.timezone),
80
inventory_sha1=self.inventory_sha1,
81
message='\n'.join(self.message))
84
rev.parent_ids.extend(self.parent_ids)
87
for property in self.properties:
88
key_end = property.find(': ')
90
if not property.endswith(':'):
91
raise ValueError(property)
92
key = str(property[:-1])
95
key = str(property[:key_end])
96
value = property[key_end+2:]
97
rev.properties[key] = value
102
def from_revision(revision):
103
revision_info = RevisionInfo(revision.revision_id)
104
date = timestamp.format_highres_date(revision.timestamp,
106
revision_info.date = date
107
revision_info.timezone = revision.timezone
108
revision_info.timestamp = revision.timestamp
109
revision_info.message = revision.message.split('\n')
110
revision_info.properties = [': '.join(p) for p in
111
viewitems(revision.properties)]
115
class BundleInfo(object):
116
"""This contains the meta information. Stuff that allows you to
117
recreate the revision or inventory XML.
119
def __init__(self, bundle_format=None):
120
self.bundle_format = None
121
self.committer = None
125
# A list of RevisionInfo objects
128
# The next entries are created during complete_info() and
129
# other post-read functions.
131
# A list of real Revision objects
132
self.real_revisions = []
134
self.timestamp = None
137
# Have we checked the repository yet?
138
self._validated_revisions_against_repo = False
141
return pprint.pformat(self.__dict__)
143
def complete_info(self):
144
"""This makes sure that all information is properly
145
split up, based on the assumptions that can be made
146
when information is missing.
148
from breezy.timestamp import unpack_highres_date
149
# Put in all of the guessable information.
150
if not self.timestamp and self.date:
151
self.timestamp, self.timezone = unpack_highres_date(self.date)
153
self.real_revisions = []
154
for rev in self.revisions:
155
if rev.timestamp is None:
156
if rev.date is not None:
157
rev.timestamp, rev.timezone = \
158
unpack_highres_date(rev.date)
160
rev.timestamp = self.timestamp
161
rev.timezone = self.timezone
162
if rev.message is None and self.message:
163
rev.message = self.message
164
if rev.committer is None and self.committer:
165
rev.committer = self.committer
166
self.real_revisions.append(rev.as_revision())
168
def get_base(self, revision):
169
revision_info = self.get_revision_info(revision.revision_id)
170
if revision_info.base_id is not None:
171
return revision_info.base_id
172
if len(revision.parent_ids) == 0:
173
# There is no base listed, and
174
# the lowest revision doesn't have a parent
175
# so this is probably against the empty tree
176
# and thus base truly is NULL_REVISION
179
return revision.parent_ids[-1]
181
def _get_target(self):
182
"""Return the target revision."""
183
if len(self.real_revisions) > 0:
184
return self.real_revisions[0].revision_id
185
elif len(self.revisions) > 0:
186
return self.revisions[0].revision_id
189
target = property(_get_target, doc='The target revision id')
191
def get_revision(self, revision_id):
192
for r in self.real_revisions:
193
if r.revision_id == revision_id:
195
raise KeyError(revision_id)
197
def get_revision_info(self, revision_id):
198
for r in self.revisions:
199
if r.revision_id == revision_id:
201
raise KeyError(revision_id)
203
def revision_tree(self, repository, revision_id, base=None):
204
revision = self.get_revision(revision_id)
205
base = self.get_base(revision)
206
if base == revision_id:
207
raise AssertionError()
208
if not self._validated_revisions_against_repo:
209
self._validate_references_from_repository(repository)
210
revision_info = self.get_revision_info(revision_id)
211
inventory_revision_id = revision_id
212
bundle_tree = BundleTree(repository.revision_tree(base),
213
inventory_revision_id)
214
self._update_tree(bundle_tree, revision_id)
216
inv = bundle_tree.inventory
217
self._validate_inventory(inv, revision_id)
218
self._validate_revision(bundle_tree, revision_id)
222
def _validate_references_from_repository(self, repository):
223
"""Now that we have a repository which should have some of the
224
revisions we care about, go through and validate all of them
229
def add_sha(d, revision_id, sha1):
230
if revision_id is None:
232
raise BzrError('A Null revision should always'
233
'have a null sha1 hash')
236
# This really should have been validated as part
237
# of _validate_revisions but lets do it again
238
if sha1 != d[revision_id]:
239
raise BzrError('** Revision %r referenced with 2 different'
240
' sha hashes %s != %s' % (revision_id,
241
sha1, d[revision_id]))
243
d[revision_id] = sha1
245
# All of the contained revisions were checked
246
# in _validate_revisions
248
for rev_info in self.revisions:
249
checked[rev_info.revision_id] = True
250
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
252
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
253
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
257
for revision_id, sha1 in viewitems(rev_to_sha):
258
if repository.has_revision(revision_id):
259
testament = StrictTestament.from_revision(repository,
261
local_sha1 = self._testament_sha1_from_revision(repository,
263
if sha1 != local_sha1:
264
raise BzrError('sha1 mismatch. For revision id {%s}'
265
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
268
elif revision_id not in checked:
269
missing[revision_id] = sha1
272
# I don't know if this is an error yet
273
warning('Not all revision hashes could be validated.'
274
' Unable validate %d hashes' % len(missing))
275
mutter('Verified %d sha hashes for the bundle.' % count)
276
self._validated_revisions_against_repo = True
278
def _validate_inventory(self, inv, revision_id):
279
"""At this point we should have generated the BundleTree,
280
so build up an inventory, and make sure the hashes match.
282
# Now we should have a complete inventory entry.
283
s = serializer_v5.write_inventory_to_string(inv)
285
# Target revision is the last entry in the real_revisions list
286
rev = self.get_revision(revision_id)
287
if rev.revision_id != revision_id:
288
raise AssertionError()
289
if sha1 != rev.inventory_sha1:
290
with open(',,bogus-inv', 'wb') as f:
292
warning('Inventory sha hash mismatch for revision %s. %s'
293
' != %s' % (revision_id, sha1, rev.inventory_sha1))
295
def _validate_revision(self, tree, revision_id):
296
"""Make sure all revision entries match their checksum."""
298
# This is a mapping from each revision id to its sha hash
301
rev = self.get_revision(revision_id)
302
rev_info = self.get_revision_info(revision_id)
303
if not (rev.revision_id == rev_info.revision_id):
304
raise AssertionError()
305
if not (rev.revision_id == revision_id):
306
raise AssertionError()
307
sha1 = self._testament_sha1(rev, tree)
308
if sha1 != rev_info.sha1:
309
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
310
if rev.revision_id in rev_to_sha1:
311
raise BzrError('Revision {%s} given twice in the list'
313
rev_to_sha1[rev.revision_id] = sha1
315
def _update_tree(self, bundle_tree, revision_id):
316
"""This fills out a BundleTree based on the information
319
:param bundle_tree: A BundleTree to update with the new information.
322
def get_rev_id(last_changed, path, kind):
323
if last_changed is not None:
324
# last_changed will be a Unicode string because of how it was
325
# read. Convert it back to utf8.
326
changed_revision_id = cache_utf8.encode(last_changed)
328
changed_revision_id = revision_id
329
bundle_tree.note_last_changed(path, changed_revision_id)
330
return changed_revision_id
332
def extra_info(info, new_path):
335
for info_item in info:
337
name, value = info_item.split(':', 1)
339
raise ValueError('Value %r has no colon' % info_item)
340
if name == 'last-changed':
342
elif name == 'executable':
343
val = (value == 'yes')
344
bundle_tree.note_executable(new_path, val)
345
elif name == 'target':
346
bundle_tree.note_target(new_path, value)
347
elif name == 'encoding':
349
return last_changed, encoding
351
def do_patch(path, lines, encoding):
352
if encoding == 'base64':
353
patch = base64.decodestring(b''.join(lines))
354
elif encoding is None:
355
patch = b''.join(lines)
357
raise ValueError(encoding)
358
bundle_tree.note_patch(path, patch)
360
def renamed(kind, extra, lines):
361
info = extra.split(' // ')
363
raise BzrError('renamed action lines need both a from and to'
366
if info[1].startswith('=> '):
367
new_path = info[1][3:]
371
bundle_tree.note_rename(old_path, new_path)
372
last_modified, encoding = extra_info(info[2:], new_path)
373
revision = get_rev_id(last_modified, new_path, kind)
375
do_patch(new_path, lines, encoding)
377
def removed(kind, extra, lines):
378
info = extra.split(' // ')
380
# TODO: in the future we might allow file ids to be
381
# given for removed entries
382
raise BzrError('removed action lines should only have the path'
385
bundle_tree.note_deletion(path)
387
def added(kind, extra, lines):
388
info = extra.split(' // ')
390
raise BzrError('add action lines require the path and file id'
393
raise BzrError('add action lines have fewer than 5 entries.'
396
if not info[1].startswith('file-id:'):
397
raise BzrError('The file-id should follow the path for an add'
399
# This will be Unicode because of how the stream is read. Turn it
400
# back into a utf8 file_id
401
file_id = cache_utf8.encode(info[1][8:])
403
bundle_tree.note_id(file_id, path, kind)
404
# this will be overridden in extra_info if executable is specified.
405
bundle_tree.note_executable(path, False)
406
last_changed, encoding = extra_info(info[2:], path)
407
revision = get_rev_id(last_changed, path, kind)
408
if kind == 'directory':
410
do_patch(path, lines, encoding)
412
def modified(kind, extra, lines):
413
info = extra.split(' // ')
415
raise BzrError('modified action lines have at least'
416
'the path in them: %r' % extra)
419
last_modified, encoding = extra_info(info[1:], path)
420
revision = get_rev_id(last_modified, path, kind)
422
do_patch(path, lines, encoding)
430
for action_line, lines in \
431
self.get_revision_info(revision_id).tree_actions:
432
first = action_line.find(' ')
434
raise BzrError('Bogus action line'
435
' (no opening space): %r' % action_line)
436
second = action_line.find(' ', first+1)
438
raise BzrError('Bogus action line'
439
' (missing second space): %r' % action_line)
440
action = action_line[:first]
441
kind = action_line[first+1:second]
442
if kind not in ('file', 'directory', 'symlink'):
443
raise BzrError('Bogus action line'
444
' (invalid object kind %r): %r' % (kind, action_line))
445
extra = action_line[second+1:]
447
if action not in valid_actions:
448
raise BzrError('Bogus action line'
449
' (unrecognized action): %r' % action_line)
450
valid_actions[action](kind, extra, lines)
452
def install_revisions(self, target_repo, stream_input=True):
453
"""Install revisions and return the target revision
455
:param target_repo: The repository to install into
456
:param stream_input: Ignored by this implementation.
458
apply_bundle.install_bundle(target_repo, self)
461
def get_merge_request(self, target_repo):
462
"""Provide data for performing a merge
464
Returns suggested base, suggested target, and patch verification status
466
return None, self.target, 'inapplicable'
469
class BundleTree(Tree):
471
def __init__(self, base_tree, revision_id):
472
self.base_tree = base_tree
473
self._renamed = {} # Mapping from old_path => new_path
474
self._renamed_r = {} # new_path => old_path
475
self._new_id = {} # new_path => new_id
476
self._new_id_r = {} # new_id => new_path
477
self._kinds = {} # new_path => kind
478
self._last_changed = {} # new_id => revision_id
479
self._executable = {} # new_id => executable value
481
self._targets = {} # new path => new symlink target
483
self.contents_by_id = True
484
self.revision_id = revision_id
485
self._inventory = None
488
return pprint.pformat(self.__dict__)
490
def note_rename(self, old_path, new_path):
491
"""A file/directory has been renamed from old_path => new_path"""
492
if new_path in self._renamed:
493
raise AssertionError(new_path)
494
if old_path in self._renamed_r:
495
raise AssertionError(old_path)
496
self._renamed[new_path] = old_path
497
self._renamed_r[old_path] = new_path
499
def note_id(self, new_id, new_path, kind='file'):
500
"""Files that don't exist in base need a new id."""
501
self._new_id[new_path] = new_id
502
self._new_id_r[new_id] = new_path
503
self._kinds[new_path] = kind
505
def note_last_changed(self, file_id, revision_id):
506
if (file_id in self._last_changed
507
and self._last_changed[file_id] != revision_id):
508
raise BzrError('Mismatched last-changed revision for file_id {%s}'
509
': %s != %s' % (file_id,
510
self._last_changed[file_id],
512
self._last_changed[file_id] = revision_id
514
def note_patch(self, new_path, patch):
515
"""There is a patch for a given filename."""
516
self.patches[new_path] = patch
518
def note_target(self, new_path, target):
519
"""The symlink at the new path has the given target"""
520
self._targets[new_path] = target
522
def note_deletion(self, old_path):
523
"""The file at old_path has been deleted."""
524
self.deleted.append(old_path)
526
def note_executable(self, new_path, executable):
527
self._executable[new_path] = executable
529
def old_path(self, new_path):
530
"""Get the old_path (path in the base_tree) for the file at new_path"""
531
if new_path[:1] in ('\\', '/'):
532
raise ValueError(new_path)
533
old_path = self._renamed.get(new_path)
534
if old_path is not None:
536
dirname, basename = os.path.split(new_path)
537
# dirname is not '' doesn't work, because
538
# dirname may be a unicode entry, and is
539
# requires the objects to be identical
541
old_dir = self.old_path(dirname)
545
old_path = pathjoin(old_dir, basename)
548
#If the new path wasn't in renamed, the old one shouldn't be in
550
if old_path in self._renamed_r:
554
def new_path(self, old_path):
555
"""Get the new_path (path in the target_tree) for the file at old_path
558
if old_path[:1] in ('\\', '/'):
559
raise ValueError(old_path)
560
new_path = self._renamed_r.get(old_path)
561
if new_path is not None:
563
if new_path in self._renamed:
565
dirname, basename = os.path.split(old_path)
567
new_dir = self.new_path(dirname)
571
new_path = pathjoin(new_dir, basename)
574
#If the old path wasn't in renamed, the new one shouldn't be in
576
if new_path in self._renamed:
580
def get_root_id(self):
581
return self.path2id('')
583
def path2id(self, path):
584
"""Return the id of the file present at path in the target tree."""
585
file_id = self._new_id.get(path)
586
if file_id is not None:
588
old_path = self.old_path(path)
591
if old_path in self.deleted:
593
return self.base_tree.path2id(old_path)
595
def id2path(self, file_id):
596
"""Return the new path in the target tree of the file with id file_id"""
597
path = self._new_id_r.get(file_id)
600
old_path = self.base_tree.id2path(file_id)
603
if old_path in self.deleted:
605
return self.new_path(old_path)
607
def old_contents_id(self, file_id):
608
"""Return the id in the base_tree for the given file_id.
609
Return None if the file did not exist in base.
611
if self.contents_by_id:
612
if self.base_tree.has_id(file_id):
616
new_path = self.id2path(file_id)
617
return self.base_tree.path2id(new_path)
619
def get_file(self, path, file_id=None):
620
"""Return a file-like object containing the new contents of the
621
file given by file_id.
623
TODO: It might be nice if this actually generated an entry
624
in the text-store, so that the file contents would
628
file_id = self.path2id(path)
629
base_id = self.old_contents_id(file_id)
630
if (base_id is not None and
631
base_id != self.base_tree.get_root_id()):
632
old_path = self.old_path(path)
633
patch_original = self.base_tree.get_file(
636
patch_original = None
637
file_patch = self.patches.get(path)
638
if file_patch is None:
639
if (patch_original is None and
640
self.kind(path, file_id) == 'directory'):
642
if patch_original is None:
643
raise AssertionError("None: %s" % file_id)
644
return patch_original
646
if file_patch.startswith(b'\\'):
648
'Malformed patch for %s, %r' % (file_id, file_patch))
649
return patched_file(file_patch, patch_original)
651
def get_symlink_target(self, path, file_id=None):
653
return self._targets[path]
655
old_path = self.old_path(path)
656
return self.base_tree.get_symlink_target(old_path, file_id)
658
def kind(self, path, file_id=None):
660
return self._kinds[path]
662
old_path = self.old_path(path)
663
return self.base_tree.kind(old_path, file_id)
665
def get_file_revision(self, path, file_id=None):
666
if path in self._last_changed:
667
return self._last_changed[path]
669
old_path = self.old_path(path)
670
return self.base_tree.get_file_revision(old_path, file_id)
672
def is_executable(self, path, file_id=None):
673
if path in self._executable:
674
return self._executable[path]
676
old_path = self.old_path(path)
677
return self.base_tree.is_executable(old_path, file_id)
679
def get_last_changed(self, path, file_id=None):
680
if path in self._last_changed:
681
return self._last_changed[path]
682
old_path = self.old_path(path)
683
return self.base_tree.get_file_revision(old_path, file_id)
685
def get_size_and_sha1(self, new_path, file_id=None):
686
"""Return the size and sha1 hash of the given file id.
687
If the file was not locally modified, this is extracted
688
from the base_tree. Rather than re-reading the file.
692
if new_path not in self.patches:
693
# If the entry does not have a patch, then the
694
# contents must be the same as in the base_tree
695
base_path = self.old_path(new_path)
696
text_size = self.base_tree.get_file_size(base_path, file_id)
697
text_sha1 = self.base_tree.get_file_sha1(base_path, file_id)
698
return text_size, text_sha1
699
fileobj = self.get_file(new_path, file_id)
700
content = fileobj.read()
701
return len(content), sha_string(content)
703
def _get_inventory(self):
704
"""Build up the inventory entry for the BundleTree.
706
This need to be called before ever accessing self.inventory
708
from os.path import dirname, basename
709
inv = Inventory(None, self.revision_id)
711
def add_entry(path, file_id):
715
parent_path = dirname(path)
716
parent_id = self.path2id(parent_path)
718
kind = self.kind(path, file_id)
719
revision_id = self.get_last_changed(path, file_id)
721
name = basename(path)
722
if kind == 'directory':
723
ie = InventoryDirectory(file_id, name, parent_id)
725
ie = InventoryFile(file_id, name, parent_id)
726
ie.executable = self.is_executable(path, file_id)
727
elif kind == 'symlink':
728
ie = InventoryLink(file_id, name, parent_id)
729
ie.symlink_target = self.get_symlink_target(path, file_id)
730
ie.revision = revision_id
733
ie.text_size, ie.text_sha1 = self.get_size_and_sha1(
735
if ie.text_size is None:
737
'Got a text_size of None for file_id %r' % file_id)
740
sorted_entries = self.sorted_path_id()
741
for path, file_id in sorted_entries:
742
add_entry(path, file_id)
746
# Have to overload the inherited inventory property
747
# because _get_inventory is only called in the parent.
748
# Reading the docs, property() objects do not use
749
# overloading, they use the function as it was defined
751
inventory = property(_get_inventory)
753
root_inventory = property(_get_inventory)
755
def all_file_ids(self):
756
return {entry.file_id for path, entry in self.inventory.iter_entries()}
758
def all_versioned_paths(self):
759
return {path for path, entry in self.inventory.iter_entries()}
761
def list_files(self, include_root=False, from_dir=None, recursive=True):
762
# The only files returned by this are those from the version
767
from_dir_id = inv.path2id(from_dir)
768
if from_dir_id is None:
769
# Directory not versioned
771
entries = inv.iter_entries(from_dir=from_dir_id, recursive=recursive)
772
if inv.root is not None and not include_root and from_dir is None:
773
# skip the root for compatability with the current apis.
775
for path, entry in entries:
776
yield path, 'V', entry.kind, entry.file_id, entry
778
def sorted_path_id(self):
780
for result in viewitems(self._new_id):
782
for id in self.base_tree.all_file_ids():
783
path = self.id2path(id)
786
paths.append((path, id))
791
def patched_file(file_patch, original):
792
"""Produce a file-like object with the patched version of a text"""
793
from breezy.patches import iter_patched
794
from breezy.iterablefile import IterableFile
795
if file_patch == b"":
796
return IterableFile(())
797
# string.splitlines(True) also splits on '\r', but the iter_patched code
798
# only expects to iterate over '\n' style lines
799
return IterableFile(iter_patched(original,
800
BytesIO(file_patch).readlines()))