3
# Copyright (C) 2005 Canonical Ltd
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
"""Experiment in converting existing bzr branches to weaves."""
21
# To make this properly useful
23
# 1. assign text version ids, and put those text versions into
24
# the inventory as they're converted.
26
# 2. keep track of the previous version of each file, rather than
27
# just using the last one imported
29
# 3. assign entry versions when files are added, renamed or moved.
31
# 4. when merged-in versions are observed, walk down through them
32
# to discover everything, then commit bottom-up
34
# 5. track ancestry as things are merged in, and commit that in each
37
# Perhaps it's best to first walk the whole graph and make a plan for
38
# what should be imported in what order? Need a kind of topological
39
# sort of all revisions. (Or do we, can we just before doing a revision
40
# see that all its parents have either been converted or abandoned?)
43
# Cannot import a revision until all its parents have been
44
# imported. in other words, we can only import revisions whose
45
# parents have all been imported. the first step must be to
46
# import a revision with no parents, of which there must be at
47
# least one. (So perhaps it's useful to store forward pointers
48
# from a list of parents to their children?)
50
# Another (equivalent?) approach is to build up the ordered
51
# ancestry list for the last revision, and walk through that. We
52
# are going to need that.
54
# We don't want to have to recurse all the way back down the list.
56
# Suppose we keep a queue of the revisions able to be processed at
57
# any point. This starts out with all the revisions having no
60
# This seems like a generally useful algorithm...
62
# The current algorithm is dumb (O(n**2)?) but will do the job, and
63
# takes less than a second on the bzr.dev branch.
65
# This currently does a kind of lazy conversion of file texts, where a
66
# new text is written in every version. That's unnecessary but for
67
# the moment saves us having to worry about when files need new
70
# TODO: Check that the working directory is clean before converting
83
import hotshot, hotshot.stats
88
from bzrlib.branch import Branch, find_branch, BZR_BRANCH_FORMAT_5
89
from bzrlib.revfile import Revfile
90
from bzrlib.weave import Weave
91
from bzrlib.weavefile import read_weave, write_weave
92
from bzrlib.progress import ProgressBar
93
from bzrlib.atomicfile import AtomicFile
94
from bzrlib.xml4 import serializer_v4
95
from bzrlib.xml5 import serializer_v5
96
from bzrlib.trace import mutter, note, warning, enable_default_logging
97
from bzrlib.osutils import sha_strings, sha_string
98
from bzrlib.commit import merge_ancestry_lines
101
class Convert(object):
103
self.converted_revs = set()
104
self.absent_revisions = set()
107
self.inventories = {}
113
self._backup_control_dir()
114
self.pb = ProgressBar()
115
if not os.path.isdir('.bzr/weaves'):
116
os.mkdir('.bzr/weaves')
117
self.inv_weave = Weave('__inventory')
118
self.anc_weave = Weave('__ancestry')
120
# holds in-memory weaves for all files
121
self.text_weaves = {}
122
self.branch = Branch('.', relax_version_check=True)
123
os.remove(self.branch.controlfilename('branch-format'))
124
self._convert_working_inv()
125
self._cleanup_spare_files()
126
rev_history = self.branch.revision_history()[:300]
127
# to_read is a stack holding the revisions we still need to process;
128
# appending to it adds new highest-priority revisions
129
self.known_revisions = set(rev_history)
130
self.to_read = [rev_history[-1]]
132
rev_id = self.to_read.pop()
133
if (rev_id not in self.revisions
134
and rev_id not in self.absent_revisions):
135
self._load_one_rev(rev_id)
137
to_import = self._make_order()
138
for i, rev_id in enumerate(to_import):
139
self.pb.update('converting revision', i, len(to_import))
140
self._convert_one_rev(rev_id)
142
note('upgraded to weaves:')
143
note(' %6d revisions and inventories' % len(self.revisions))
144
note(' %6d absent revisions removed' % len(self.absent_revisions))
145
note(' %6d texts' % self.text_count)
146
self._write_all_weaves()
147
self._write_all_revs()
148
f = self.branch.controlfile('branch-format', 'wb')
150
f.write(BZR_BRANCH_FORMAT_5)
155
def _cleanup_spare_files(self):
156
for n in 'merged-patches', 'pending-merged-patches':
157
p = self.branch.controlfilename(n)
158
if not os.path.exists(p):
160
assert os.path.getsize(p) == 0
164
def _backup_control_dir(self):
165
shutil.copytree('.bzr', '.bzr.backup')
166
note('.bzr has been backed up to .bzr.backup')
167
note('if conversion fails, you can move this directory back to .bzr')
168
note('if it succeeds, you can remove this directory if you wish')
171
def _convert_working_inv(self):
173
inv = serializer_v4.read_inventory(branch.controlfile('inventory', 'rb'))
174
serializer_v5.write_inventory(inv, branch.controlfile('inventory', 'wb'))
178
def _write_all_weaves(self):
179
write_a_weave(self.inv_weave, '.bzr/inventory.weave')
180
write_a_weave(self.anc_weave, '.bzr/ancestry.weave')
183
for file_id, file_weave in self.text_weaves.items():
184
self.pb.update('writing weave', i, len(self.text_weaves))
185
write_a_weave(file_weave, '.bzr/weaves/%s.weave' % file_id)
191
def _write_all_revs(self):
192
"""Write all revisions out in new form."""
194
for i, rev_id in enumerate(self.converted_revs):
195
self.pb.update('write revision', i, len(self.converted_revs))
196
f = file('new-revisions/%s' % rev_id, 'wb')
198
serializer_v5.write_revision(self.revisions[rev_id], f)
205
def _load_one_rev(self, rev_id):
206
"""Load a revision object into memory.
208
Any parents not either loaded or abandoned get queued to be
210
self.pb.update('loading revision',
212
len(self.known_revisions))
213
if rev_id not in self.branch.revision_store:
215
note('revision {%s} not present in branch; '
216
'will not be converted',
218
self.absent_revisions.add(rev_id)
220
rev_xml = self.branch.revision_store[rev_id].read()
221
rev = serializer_v4.read_revision_from_string(rev_xml)
222
for parent_id in rev.parent_ids:
223
self.known_revisions.add(parent_id)
224
self.to_read.append(parent_id)
225
self.revisions[rev_id] = rev
226
old_inv_xml = self.branch.inventory_store[rev_id].read()
227
inv = serializer_v4.read_inventory_from_string(old_inv_xml)
228
assert rev.inventory_sha1 == sha_string(old_inv_xml)
229
self.inventories[rev_id] = inv
232
def _convert_one_rev(self, rev_id):
233
"""Convert revision and all referenced objects to new format."""
234
rev = self.revisions[rev_id]
235
inv = self.inventories[rev_id]
236
for parent_id in rev.parent_ids[:]:
237
if parent_id in self.absent_revisions:
238
rev.parent_ids.remove(parent_id)
240
note('remove {%s} as parent of {%s}', parent_id, rev_id)
241
self._convert_revision_contents(rev, inv)
242
# the XML is now updated with text versions
243
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
244
new_inv_sha1 = sha_string(new_inv_xml)
245
self.inv_weave.add(rev_id, rev.parent_ids,
246
new_inv_xml.splitlines(True),
248
# TODO: Upgrade revision XML and write that out
249
rev.inventory_sha1 = new_inv_sha1
250
self._make_rev_ancestry(rev)
251
self.converted_revs.add(rev_id)
254
def _make_rev_ancestry(self, rev):
255
rev_id = rev.revision_id
256
for parent_id in rev.parent_ids:
257
assert parent_id in self.converted_revs
259
lines = list(self.anc_weave.mash_iter(rev.parent_ids))
262
lines.append(rev_id + '\n')
264
parent_ancestries = [self.ancestries[p] for p in rev.parent_ids]
265
new_lines = merge_ancestry_lines(rev_id, parent_ancestries)
266
assert set(lines) == set(new_lines)
267
self.ancestries[rev_id] = new_lines
268
self.anc_weave.add(rev_id, rev.parent_ids, lines)
271
def _convert_revision_contents(self, rev, inv):
272
"""Convert all the files within a revision.
274
Also upgrade the inventory to refer to the text revision ids."""
275
rev_id = rev.revision_id
276
mutter('converting texts of revision {%s}',
280
self._set_name_version(rev, ie)
281
if ie.kind != 'file':
283
self._convert_file_version(rev, ie)
286
def _set_name_version(self, rev, ie):
287
"""Set name version for a file.
289
Done in a slightly lazy way: if the file is renamed or in a merge revision
290
it gets a new version, otherwise the same as before.
293
if len(rev.parent_ids) != 1:
294
ie.name_version = rev.revision_id
296
old_inv = self.inventories[rev.parent_ids[0]]
297
if not old_inv.has_id(file_id):
298
ie.name_version = rev.revision_id
300
old_ie = old_inv[file_id]
301
if (old_ie.parent_id != ie.parent_id
302
or old_ie.name != ie.name):
303
ie.name_version = rev.revision_id
305
ie.name_version = old_ie.name_version
309
def _convert_file_version(self, rev, ie):
310
"""Convert one version of one file.
312
The file needs to be added into the weave if it is a merge
313
of >=2 parents or if it's changed from its parent.
316
rev_id = rev.revision_id
317
w = self.text_weaves.get(file_id)
320
self.text_weaves[file_id] = w
321
file_lines = self.branch.text_store[ie.text_id].readlines()
322
assert sha_strings(file_lines) == ie.text_sha1
323
assert sum(map(len, file_lines)) == ie.text_size
326
for parent_id in rev.parent_ids:
327
##if parent_id in self.absent_revisions:
329
assert parent_id in self.converted_revs, \
330
'parent {%s} not converted' % parent_id
331
parent_inv = self.inventories[parent_id]
332
if parent_inv.has_id(file_id):
333
parent_ie = parent_inv[file_id]
334
old_text_version = parent_ie.text_version
335
assert old_text_version in self.converted_revs
336
if old_text_version not in file_parents:
337
file_parents.append(old_text_version)
338
if parent_ie.text_sha1 != ie.text_sha1:
340
if len(file_parents) != 1 or text_changed:
341
w.add(rev_id, file_parents, file_lines, ie.text_sha1)
342
ie.text_version = rev_id
344
##mutter('import text {%s} of {%s}',
345
## ie.text_id, file_id)
347
##mutter('text of {%s} unchanged from parent', file_id)
348
ie.text_version = file_parents[0]
353
def _make_order(self):
354
"""Return a suitable order for importing revisions.
356
The order must be such that an revision is imported after all
357
its (present) parents.
359
todo = set(self.revisions.keys())
360
done = self.absent_revisions.copy()
363
# scan through looking for a revision whose parents
365
for rev_id in sorted(list(todo)):
366
rev = self.revisions[rev_id]
367
parent_ids = set(rev.parent_ids)
368
if parent_ids.issubset(done):
369
# can take this one now
376
def write_a_weave(weave, filename):
377
inv_wf = file(filename, 'wb')
379
write_weave(weave, inv_wf)
386
def profile_convert():
387
prof_f = tempfile.NamedTemporaryFile()
389
prof = hotshot.Profile(prof_f.name)
391
prof.runcall(Convert)
394
stats = hotshot.stats.load(prof_f.name)
396
stats.sort_stats('time')
397
# XXX: Might like to write to stderr or the trace file instead but
398
# print_stats seems hardcoded to stdout
399
stats.print_stats(100)
402
if __name__ == '__main__':
403
enable_default_logging()
405
if '-p' in sys.argv[1:]: