1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Read in a bundle stream, and process it into a BundleReader object."""
3
Read in a bundle stream, and process it into a BundleReader object.
20
7
from cStringIO import StringIO
29
from bzrlib.bundle import apply_bundle
30
from bzrlib.errors import (TestamentMismatch, BzrError,
31
MalformedHeader, MalformedPatches, NotABundle)
11
from bzrlib.errors import TestamentMismatch, BzrError
12
from bzrlib.bundle.common import get_header, header_str
32
13
from bzrlib.inventory import (Inventory, InventoryEntry,
33
14
InventoryDirectory, InventoryFile,
35
from bzrlib.osutils import sha_file, sha_string, pathjoin
16
from bzrlib.osutils import sha_file, sha_string
36
17
from bzrlib.revision import Revision, NULL_REVISION
37
18
from bzrlib.testament import StrictTestament
38
19
from bzrlib.trace import mutter, warning
39
import bzrlib.transport
40
20
from bzrlib.tree import Tree
41
import bzrlib.urlutils
42
21
from bzrlib.xml5 import serializer_v5
24
class BadBundle(Exception): pass
25
class MalformedHeader(BadBundle): pass
26
class MalformedPatches(BadBundle): pass
27
class MalformedFooter(BadBundle): pass
45
30
class RevisionInfo(object):
46
31
"""Gets filled out for each revision object that is read.
192
158
raise KeyError(revision_id)
194
def revision_tree(self, repository, revision_id, base=None):
195
revision = self.get_revision(revision_id)
196
base = self.get_base(revision)
197
if base == revision_id:
198
raise AssertionError()
199
if not self._validated_revisions_against_repo:
200
self._validate_references_from_repository(repository)
201
revision_info = self.get_revision_info(revision_id)
202
inventory_revision_id = revision_id
203
bundle_tree = BundleTree(repository.revision_tree(base),
204
inventory_revision_id)
205
self._update_tree(bundle_tree, revision_id)
207
inv = bundle_tree.inventory
208
self._validate_inventory(inv, revision_id)
209
self._validate_revision(inv, revision_id)
161
class BundleReader(object):
162
"""This class reads in a bundle from a file, and returns
163
a Bundle object, which can then be applied against a tree.
165
def __init__(self, from_file):
166
"""Read in the bundle from the file.
168
:param from_file: A file-like object (must have iterator support).
170
object.__init__(self)
171
self.from_file = iter(from_file)
172
self._next_line = None
174
self.info = BundleInfo()
175
# We put the actual inventory ids in the footer, so that the patch
176
# is easier to read for humans.
177
# Unfortunately, that means we need to read everything before we
178
# can create a proper bundle.
184
while self._next_line is not None:
185
self._read_revision_header()
186
if self._next_line is None:
192
"""Make sure that the information read in makes sense
193
and passes appropriate checksums.
195
# Fill in all the missing blanks for the revisions
196
# and generate the real_revisions list.
197
self.info.complete_info()
199
def _validate_revision(self, inventory, revision_id):
200
"""Make sure all revision entries match their checksum."""
202
# This is a mapping from each revision id to it's sha hash
205
rev = self.info.get_revision(revision_id)
206
rev_info = self.info.get_revision_info(revision_id)
207
assert rev.revision_id == rev_info.revision_id
208
assert rev.revision_id == revision_id
209
sha1 = StrictTestament(rev, inventory).as_sha1()
210
if sha1 != rev_info.sha1:
211
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
212
if rev_to_sha1.has_key(rev.revision_id):
213
raise BzrError('Revision {%s} given twice in the list'
215
rev_to_sha1[rev.revision_id] = sha1
213
217
def _validate_references_from_repository(self, repository):
214
218
"""Now that we have a repository which should have some of the
236
240
# All of the contained revisions were checked
237
241
# in _validate_revisions
239
for rev_info in self.revisions:
243
for rev_info in self.info.revisions:
240
244
checked[rev_info.revision_id] = True
241
245
add_sha(rev_to_sha, rev_info.revision_id, rev_info.sha1)
243
for (rev, rev_info) in zip(self.real_revisions, self.revisions):
247
for (rev, rev_info) in zip(self.info.real_revisions, self.info.revisions):
244
248
add_sha(inv_to_sha, rev_info.revision_id, rev_info.inventory_sha1)
248
252
for revision_id, sha1 in rev_to_sha.iteritems():
249
253
if repository.has_revision(revision_id):
250
testament = StrictTestament.from_revision(repository,
254
testament = StrictTestament.from_revision(repository,
252
local_sha1 = self._testament_sha1_from_revision(repository,
256
local_sha1 = testament.as_sha1()
254
257
if sha1 != local_sha1:
255
raise BzrError('sha1 mismatch. For revision id {%s}'
258
raise BzrError('sha1 mismatch. For revision id {%s}'
256
259
'local: %s, bundle: %s' % (revision_id, local_sha1, sha1))
259
262
elif revision_id not in checked:
260
263
missing[revision_id] = sha1
265
for inv_id, sha1 in inv_to_sha.iteritems():
266
if repository.has_revision(inv_id):
267
# Note: branch.get_inventory_sha1() just returns the value that
268
# is stored in the revision text, and that value may be out
269
# of date. This is bogus, because that means we aren't
270
# validating the actual text, just that we wrote and read the
271
# string. But for now, what the hell.
272
local_sha1 = repository.get_inventory_sha1(inv_id)
273
if sha1 != local_sha1:
274
raise BzrError('sha1 mismatch. For inventory id {%s}'
275
'local: %s, bundle: %s' %
276
(inv_id, local_sha1, sha1))
262
280
if len(missing) > 0:
263
281
# I don't know if this is an error yet
264
282
warning('Not all revision hashes could be validated.'
265
283
' Unable validate %d hashes' % len(missing))
266
284
mutter('Verified %d sha hashes for the bundle.' % count)
267
self._validated_revisions_against_repo = True
269
286
def _validate_inventory(self, inv, revision_id):
270
287
"""At this point we should have generated the BundleTree,
271
288
so build up an inventory, and make sure the hashes match.
291
assert inv is not None
273
293
# Now we should have a complete inventory entry.
274
294
s = serializer_v5.write_inventory_to_string(inv)
275
295
sha1 = sha_string(s)
276
296
# Target revision is the last entry in the real_revisions list
277
rev = self.get_revision(revision_id)
278
if rev.revision_id != revision_id:
279
raise AssertionError()
297
rev = self.info.get_revision(revision_id)
298
assert rev.revision_id == revision_id
280
299
if sha1 != rev.inventory_sha1:
281
300
open(',,bogus-inv', 'wb').write(s)
282
301
warning('Inventory sha hash mismatch for revision %s. %s'
283
302
' != %s' % (revision_id, sha1, rev.inventory_sha1))
285
def _validate_revision(self, inventory, revision_id):
286
"""Make sure all revision entries match their checksum."""
288
# This is a mapping from each revision id to it's sha hash
291
rev = self.get_revision(revision_id)
292
rev_info = self.get_revision_info(revision_id)
293
if not (rev.revision_id == rev_info.revision_id):
294
raise AssertionError()
295
if not (rev.revision_id == revision_id):
296
raise AssertionError()
297
sha1 = self._testament_sha1(rev, inventory)
298
if sha1 != rev_info.sha1:
299
raise TestamentMismatch(rev.revision_id, rev_info.sha1, sha1)
300
if rev.revision_id in rev_to_sha1:
301
raise BzrError('Revision {%s} given twice in the list'
303
rev_to_sha1[rev.revision_id] = sha1
304
def get_bundle(self, repository):
305
"""Return the meta information, and a Bundle tree which can
306
be used to populate the local stores and working tree, respectively.
308
return self.info, self.revision_tree(repository, self.info.target)
310
def revision_tree(self, repository, revision_id, base=None):
311
revision = self.info.get_revision(revision_id)
312
base = self.info.get_base(revision)
313
assert base != revision_id
314
self._validate_references_from_repository(repository)
315
revision_info = self.info.get_revision_info(revision_id)
316
inventory_revision_id = revision_id
317
bundle_tree = BundleTree(repository.revision_tree(base),
318
inventory_revision_id)
319
self._update_tree(bundle_tree, revision_id)
321
inv = bundle_tree.inventory
322
self._validate_inventory(inv, revision_id)
323
self._validate_revision(inv, revision_id)
328
"""yield the next line, but secretly
329
keep 1 extra line for peeking.
331
for line in self.from_file:
332
last = self._next_line
333
self._next_line = line
335
#mutter('yielding line: %r' % last)
337
last = self._next_line
338
self._next_line = None
339
#mutter('yielding line: %r' % last)
342
def _read_header(self):
343
"""Read the bzr header"""
344
header = get_header()
346
for line in self._next():
348
# not all mailers will keep trailing whitespace
351
if (not line.startswith('# ') or not line.endswith('\n')
352
or line[2:-1].decode('utf-8') != header[0]):
353
raise MalformedHeader('Found a header, but it'
354
' was improperly formatted')
355
header.pop(0) # We read this line.
357
break # We found everything.
358
elif (line.startswith('#') and line.endswith('\n')):
359
line = line[1:-1].strip().decode('utf-8')
360
if line[:len(header_str)] == header_str:
361
if line == header[0]:
364
raise MalformedHeader('Found what looks like'
365
' a header, but did not match')
368
raise MalformedHeader('Did not find an opening header')
370
def _read_revision_header(self):
371
self.info.revisions.append(RevisionInfo(None))
372
for line in self._next():
373
# The bzr header is terminated with a blank line
374
# which does not start with '#'
375
if line is None or line == '\n':
377
self._handle_next(line)
379
def _read_next_entry(self, line, indent=1):
380
"""Read in a key-value pair
382
if not line.startswith('#'):
383
raise MalformedHeader('Bzr header did not start with #')
384
line = line[1:-1].decode('utf-8') # Remove the '#' and '\n'
385
if line[:indent] == ' '*indent:
388
return None, None# Ignore blank lines
390
loc = line.find(': ')
395
value = self._read_many(indent=indent+2)
396
elif line[-1:] == ':':
398
value = self._read_many(indent=indent+2)
400
raise MalformedHeader('While looking for key: value pairs,'
401
' did not find the colon %r' % (line))
403
key = key.replace(' ', '_')
404
#mutter('found %s: %s' % (key, value))
407
def _handle_next(self, line):
410
key, value = self._read_next_entry(line, indent=1)
411
mutter('_handle_next %r => %r' % (key, value))
415
revision_info = self.info.revisions[-1]
416
if hasattr(revision_info, key):
417
if getattr(revision_info, key) is None:
418
setattr(revision_info, key, value)
420
raise MalformedHeader('Duplicated Key: %s' % key)
422
# What do we do with a key we don't recognize
423
raise MalformedHeader('Unknown Key: "%s"' % key)
425
def _read_many(self, indent):
426
"""If a line ends with no entry, that means that it should be
427
followed with multiple lines of values.
429
This detects the end of the list, because it will be a line that
430
does not start properly indented.
433
start = '#' + (' '*indent)
435
if self._next_line is None or self._next_line[:len(start)] != start:
438
for line in self._next():
439
values.append(line[len(start):-1].decode('utf-8'))
440
if self._next_line is None or self._next_line[:len(start)] != start:
444
def _read_one_patch(self):
445
"""Read in one patch, return the complete patch, along with
448
:return: action, lines, do_continue
450
#mutter('_read_one_patch: %r' % self._next_line)
451
# Peek and see if there are no patches
452
if self._next_line is None or self._next_line.startswith('#'):
453
return None, [], False
457
for line in self._next():
459
if not line.startswith('==='):
460
raise MalformedPatches('The first line of all patches'
461
' should be a bzr meta line "==="'
463
action = line[4:-1].decode('utf-8')
464
elif line.startswith('... '):
465
action += line[len('... '):-1].decode('utf-8')
467
if (self._next_line is not None and
468
self._next_line.startswith('===')):
469
return action, lines, True
470
elif self._next_line is None or self._next_line.startswith('#'):
471
return action, lines, False
475
elif not line.startswith('... '):
478
return action, lines, False
480
def _read_patches(self):
482
revision_actions = []
484
action, lines, do_continue = self._read_one_patch()
485
if action is not None:
486
revision_actions.append((action, lines))
487
assert self.info.revisions[-1].tree_actions is None
488
self.info.revisions[-1].tree_actions = revision_actions
490
def _read_footer(self):
491
"""Read the rest of the meta information.
493
:param first_line: The previous step iterates past what it
494
can handle. That extra line is given here.
496
for line in self._next():
497
self._handle_next(line)
498
if not self._next_line.startswith('#'):
501
if self._next_line is None:
305
504
def _update_tree(self, bundle_tree, revision_id):
306
505
"""This fills out a BundleTree based on the information