1
# Copyright (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""bzr upgrade logic."""
19
# change upgrade from .bzr to create a '.bzr-new', then do a bait and switch.
22
# To make this properly useful
24
# 1. assign text version ids, and put those text versions into
25
# the inventory as they're converted.
27
# 2. keep track of the previous version of each file, rather than
28
# just using the last one imported
30
# 3. assign entry versions when files are added, renamed or moved.
32
# 4. when merged-in versions are observed, walk down through them
33
# to discover everything, then commit bottom-up
35
# 5. track ancestry as things are merged in, and commit that in each
38
# Perhaps it's best to first walk the whole graph and make a plan for
39
# what should be imported in what order? Need a kind of topological
40
# sort of all revisions. (Or do we, can we just before doing a revision
41
# see that all its parents have either been converted or abandoned?)
44
# Cannot import a revision until all its parents have been
45
# imported. in other words, we can only import revisions whose
46
# parents have all been imported. the first step must be to
47
# import a revision with no parents, of which there must be at
48
# least one. (So perhaps it's useful to store forward pointers
49
# from a list of parents to their children?)
51
# Another (equivalent?) approach is to build up the ordered
52
# ancestry list for the last revision, and walk through that. We
53
# are going to need that.
55
# We don't want to have to recurse all the way back down the list.
57
# Suppose we keep a queue of the revisions able to be processed at
58
# any point. This starts out with all the revisions having no
61
# This seems like a generally useful algorithm...
63
# The current algorithm is dumb (O(n**2)?) but will do the job, and
64
# takes less than a second on the bzr.dev branch.
66
# This currently does a kind of lazy conversion of file texts, where a
67
# new text is written in every version. That's unnecessary but for
68
# the moment saves us having to worry about when files need new
71
from cStringIO import StringIO
78
from bzrlib.branch import Branch
79
from bzrlib.bzrdir import BzrDirFormat, BzrDirFormat4, BzrDirFormat5, BzrDirFormat6
80
from bzrlib.errors import NoSuchFile, UpgradeReadonly
81
import bzrlib.hashcache as hashcache
82
from bzrlib.lockable_files import LockableFiles
83
from bzrlib.osutils import sha_strings, sha_string, pathjoin, abspath
84
from bzrlib.ui import ui_factory
85
from bzrlib.store.text import TextStore
86
from bzrlib.store.weave import WeaveStore
87
from bzrlib.trace import mutter, note, warning
88
from bzrlib.transactions import PassThroughTransaction
89
from bzrlib.transport import get_transport
90
from bzrlib.transport.local import LocalTransport
91
from bzrlib.weave import Weave
92
from bzrlib.weavefile import read_weave, write_weave
93
from bzrlib.xml4 import serializer_v4
94
from bzrlib.xml5 import serializer_v5
97
class Convert(object):
99
def __init__(self, transport):
100
self.base = transport.base
101
self.converted_revs = set()
102
self.absent_revisions = set()
105
self.transport = transport
106
if self.transport.is_readonly():
107
raise UpgradeReadonly
108
self.control_files = LockableFiles(transport.clone(bzrlib.BZRDIR), 'branch-lock')
109
# Lock the branch (soon to be meta dir) to prevent anyone racing with us
110
# This is currently windows incompatible, it will deadlock. When the upgrade
111
# logic becomes format specific, then we can have the format know how to pass this
112
# on. Also note that we probably have an 'upgrade meta' which upgrades the constituent
114
print "FIXME: control files reuse"
115
self.control_files.lock_write()
119
self.control_files.unlock()
122
if not self._open_branch():
124
note('starting upgrade of %s', self.base)
125
self._backup_control_dir()
126
self.pb = ui_factory.progress_bar()
127
if isinstance(self.old_format, BzrDirFormat4):
128
note('starting upgrade from format 4 to 5')
129
self._convert_to_weaves()
130
if isinstance(self.old_format, BzrDirFormat5):
131
note('starting upgrade from format 5 to 6')
132
self._convert_to_prefixed()
133
if isinstance(self.transport, LocalTransport):
134
cache = hashcache.HashCache(abspath(self.base))
139
def _convert_to_prefixed(self):
140
from bzrlib.store import hash_prefix
141
bzr_transport = self.transport.clone('.bzr')
142
bzr_transport.delete('branch-format')
143
for store_name in ["weaves", "revision-store"]:
144
note("adding prefixes to %s" % store_name)
145
store_transport = bzr_transport.clone(store_name)
146
for filename in store_transport.list_dir('.'):
147
if (filename.endswith(".weave") or
148
filename.endswith(".gz") or
149
filename.endswith(".sig")):
150
file_id = os.path.splitext(filename)[0]
153
prefix_dir = hash_prefix(file_id)
154
# FIXME keep track of the dirs made RBC 20060121
156
store_transport.move(filename, prefix_dir + '/' + filename)
157
except NoSuchFile: # catches missing dirs strangely enough
158
store_transport.mkdir(prefix_dir)
159
store_transport.move(filename, prefix_dir + '/' + filename)
160
self.old_format = BzrDirFormat6()
161
self._set_new_format(self.old_format.get_format_string())
162
self.bzrdir = self.old_format.open(self.transport)
163
self.branch = self.bzrdir.open_branch()
165
def _convert_to_weaves(self):
166
note('note: upgrade may be faster if all store files are ungzipped first')
167
bzr_transport = self.transport.clone('.bzr')
170
stat = bzr_transport.stat('weaves')
171
if not S_ISDIR(stat.st_mode):
172
bzr_transport.delete('weaves')
173
bzr_transport.mkdir('weaves')
175
bzr_transport.mkdir('weaves')
176
self.inv_weave = Weave('inventory')
177
# holds in-memory weaves for all files
178
self.text_weaves = {}
179
bzr_transport.delete('branch-format')
180
self._convert_working_inv()
181
rev_history = self.branch.revision_history()
182
# to_read is a stack holding the revisions we still need to process;
183
# appending to it adds new highest-priority revisions
184
self.known_revisions = set(rev_history)
185
self.to_read = rev_history[-1:]
187
rev_id = self.to_read.pop()
188
if (rev_id not in self.revisions
189
and rev_id not in self.absent_revisions):
190
self._load_one_rev(rev_id)
192
to_import = self._make_order()
193
for i, rev_id in enumerate(to_import):
194
self.pb.update('converting revision', i, len(to_import))
195
self._convert_one_rev(rev_id)
197
self._write_all_weaves()
198
self._write_all_revs()
199
note('upgraded to weaves:')
200
note(' %6d revisions and inventories' % len(self.revisions))
201
note(' %6d revisions not present' % len(self.absent_revisions))
202
note(' %6d texts' % self.text_count)
203
self._cleanup_spare_files_after_format4()
204
self.old_format = BzrDirFormat5()
205
self._set_new_format(self.old_format.get_format_string())
206
self.bzrdir = self.old_format.open(self.transport)
207
self.branch = self.bzrdir.open_branch()
209
def _open_branch(self):
210
self.old_format = BzrDirFormat.find_format(self.transport)
211
self.bzrdir = self.old_format.open(self.transport)
212
self.branch = self.bzrdir.open_branch()
213
if isinstance(self.old_format, BzrDirFormat6):
214
note('this branch is in the most current format (%s)', self.old_format)
216
if (not isinstance(self.old_format, BzrDirFormat4) and
217
not isinstance(self.old_format, BzrDirFormat5)):
218
raise BzrError("cannot upgrade from branch format %s" %
219
self.branch._branch_format)
222
def _set_new_format(self, format):
223
self.branch.control_files.put_utf8('branch-format', format)
225
def _cleanup_spare_files_after_format4(self):
226
transport = self.transport.clone('.bzr')
227
print "FIXME working tree upgrade foo."
228
for n in 'merged-patches', 'pending-merged-patches':
230
## assert os.path.getsize(p) == 0
234
transport.delete_tree('inventory-store')
235
transport.delete_tree('text-store')
237
def _backup_control_dir(self):
238
note('making backup of tree history')
239
self.transport.copy_tree('.bzr', '.bzr.backup')
240
note('%s.bzr has been backed up to %s.bzr.backup',
243
note('if conversion fails, you can move this directory back to .bzr')
244
note('if it succeeds, you can remove this directory if you wish')
246
def _convert_working_inv(self):
248
inv = serializer_v4.read_inventory(branch.control_files.get('inventory'))
249
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
250
print "fixme inventory is a working tree change."
251
branch.control_files.put('inventory', new_inv_xml)
253
def _write_all_weaves(self):
254
bzr_transport = self.transport.clone('.bzr')
255
controlweaves = WeaveStore(bzr_transport, prefixed=False)
256
weave_transport = bzr_transport.clone('weaves')
257
weaves = WeaveStore(weave_transport, prefixed=False)
258
transaction = PassThroughTransaction()
260
controlweaves.put_weave('inventory', self.inv_weave, transaction)
263
for file_id, file_weave in self.text_weaves.items():
264
self.pb.update('writing weave', i, len(self.text_weaves))
265
weaves.put_weave(file_id, file_weave, transaction)
270
def _write_all_revs(self):
271
"""Write all revisions out in new form."""
272
transport = self.transport.clone('.bzr')
273
transport.delete_tree('revision-store')
274
transport.mkdir('revision-store')
275
revision_transport = transport.clone('revision-store')
277
revision_store = TextStore(revision_transport,
281
for i, rev_id in enumerate(self.converted_revs):
282
self.pb.update('write revision', i, len(self.converted_revs))
284
serializer_v5.write_revision(self.revisions[rev_id], rev_tmp)
286
revision_store.add(rev_tmp, rev_id)
291
def _load_one_rev(self, rev_id):
292
"""Load a revision object into memory.
294
Any parents not either loaded or abandoned get queued to be
296
self.pb.update('loading revision',
298
len(self.known_revisions))
299
if not self.branch.repository.revision_store.has_id(rev_id):
301
note('revision {%s} not present in branch; '
302
'will be converted as a ghost',
304
self.absent_revisions.add(rev_id)
306
rev_xml = self.branch.repository.revision_store.get(rev_id).read()
307
rev = serializer_v4.read_revision_from_string(rev_xml)
308
for parent_id in rev.parent_ids:
309
self.known_revisions.add(parent_id)
310
self.to_read.append(parent_id)
311
self.revisions[rev_id] = rev
314
def _load_old_inventory(self, rev_id):
315
assert rev_id not in self.converted_revs
316
old_inv_xml = self.branch.repository.inventory_store.get(rev_id).read()
317
inv = serializer_v4.read_inventory_from_string(old_inv_xml)
318
rev = self.revisions[rev_id]
319
if rev.inventory_sha1:
320
assert rev.inventory_sha1 == sha_string(old_inv_xml), \
321
'inventory sha mismatch for {%s}' % rev_id
325
def _load_updated_inventory(self, rev_id):
326
assert rev_id in self.converted_revs
327
inv_xml = self.inv_weave.get_text(rev_id)
328
inv = serializer_v5.read_inventory_from_string(inv_xml)
332
def _convert_one_rev(self, rev_id):
333
"""Convert revision and all referenced objects to new format."""
334
rev = self.revisions[rev_id]
335
inv = self._load_old_inventory(rev_id)
336
present_parents = [p for p in rev.parent_ids
337
if p not in self.absent_revisions]
338
self._convert_revision_contents(rev, inv, present_parents)
339
self._store_new_weave(rev, inv, present_parents)
340
self.converted_revs.add(rev_id)
343
def _store_new_weave(self, rev, inv, present_parents):
344
# the XML is now updated with text versions
348
if ie.kind == 'root_directory':
350
assert hasattr(ie, 'revision'), \
351
'no revision on {%s} in {%s}' % \
352
(file_id, rev.revision_id)
353
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
354
new_inv_sha1 = sha_string(new_inv_xml)
355
self.inv_weave.add(rev.revision_id,
357
new_inv_xml.splitlines(True),
359
rev.inventory_sha1 = new_inv_sha1
361
def _convert_revision_contents(self, rev, inv, present_parents):
362
"""Convert all the files within a revision.
364
Also upgrade the inventory to refer to the text revision ids."""
365
rev_id = rev.revision_id
366
mutter('converting texts of revision {%s}',
368
parent_invs = map(self._load_updated_inventory, present_parents)
371
self._convert_file_version(rev, ie, parent_invs)
373
def _convert_file_version(self, rev, ie, parent_invs):
374
"""Convert one version of one file.
376
The file needs to be added into the weave if it is a merge
377
of >=2 parents or if it's changed from its parent.
379
if ie.kind == 'root_directory':
382
rev_id = rev.revision_id
383
w = self.text_weaves.get(file_id)
386
self.text_weaves[file_id] = w
388
previous_entries = ie.find_previous_heads(parent_invs, w)
389
for old_revision in previous_entries:
390
# if this fails, its a ghost ?
391
assert old_revision in self.converted_revs
392
self.snapshot_ie(previous_entries, ie, w, rev_id)
394
assert getattr(ie, 'revision', None) is not None
396
def snapshot_ie(self, previous_revisions, ie, w, rev_id):
397
# TODO: convert this logic, which is ~= snapshot to
398
# a call to:. This needs the path figured out. rather than a work_tree
399
# a v4 revision_tree can be given, or something that looks enough like
400
# one to give the file content to the entry if it needs it.
401
# and we need something that looks like a weave store for snapshot to
403
#ie.snapshot(rev, PATH, previous_revisions, REVISION_TREE, InMemoryWeaveStore(self.text_weaves))
404
if len(previous_revisions) == 1:
405
previous_ie = previous_revisions.values()[0]
406
if ie._unchanged(previous_ie):
407
ie.revision = previous_ie.revision
409
parent_indexes = map(w.lookup, previous_revisions)
411
text = self.branch.repository.text_store.get(ie.text_id)
412
file_lines = text.readlines()
413
assert sha_strings(file_lines) == ie.text_sha1
414
assert sum(map(len, file_lines)) == ie.text_size
415
w.add(rev_id, parent_indexes, file_lines, ie.text_sha1)
418
w.add(rev_id, parent_indexes, [], None)
420
##mutter('import text {%s} of {%s}',
421
## ie.text_id, file_id)
423
def _make_order(self):
424
"""Return a suitable order for importing revisions.
426
The order must be such that an revision is imported after all
427
its (present) parents.
429
todo = set(self.revisions.keys())
430
done = self.absent_revisions.copy()
433
# scan through looking for a revision whose parents
435
for rev_id in sorted(list(todo)):
436
rev = self.revisions[rev_id]
437
parent_ids = set(rev.parent_ids)
438
if parent_ids.issubset(done):
439
# can take this one now
447
t = get_transport(url)