3
# Copyright (C) 2005 Canonical Ltd
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
"""Experiment in converting existing bzr branches to weaves."""
21
# To make this properly useful
23
# 1. assign text version ids, and put those text versions into
24
# the inventory as they're converted.
26
# 2. keep track of the previous version of each file, rather than
27
# just using the last one imported
29
# 3. assign entry versions when files are added, renamed or moved.
31
# 4. when merged-in versions are observed, walk down through them
32
# to discover everything, then commit bottom-up
34
# 5. track ancestry as things are merged in, and commit that in each
37
# Perhaps it's best to first walk the whole graph and make a plan for
38
# what should be imported in what order? Need a kind of topological
39
# sort of all revisions. (Or do we, can we just before doing a revision
40
# see that all its parents have either been converted or abandoned?)
43
# Cannot import a revision until all its parents have been
44
# imported. in other words, we can only import revisions whose
45
# parents have all been imported. the first step must be to
46
# import a revision with no parents, of which there must be at
47
# least one. (So perhaps it's useful to store forward pointers
48
# from a list of parents to their children?)
50
# Another (equivalent?) approach is to build up the ordered
51
# ancestry list for the last revision, and walk through that. We
52
# are going to need that.
54
# We don't want to have to recurse all the way back down the list.
56
# Suppose we keep a queue of the revisions able to be processed at
57
# any point. This starts out with all the revisions having no
60
# This seems like a generally useful algorithm...
62
# The current algorithm is dumb (O(n**2)?) but will do the job, and
63
# takes less than a second on the bzr.dev branch.
65
# This currently does a kind of lazy conversion of file texts, where a
66
# new text is written in every version. That's unnecessary but for
67
# the moment saves us having to worry about when files need new
70
# TODO: Check that the working directory is clean before converting
83
import hotshot, hotshot.stats
88
from bzrlib.branch import Branch, find_branch
89
from bzrlib.revfile import Revfile
90
from bzrlib.weave import Weave
91
from bzrlib.weavefile import read_weave, write_weave
92
from bzrlib.progress import ProgressBar
93
from bzrlib.atomicfile import AtomicFile
94
from bzrlib.xml4 import serializer_v4
95
from bzrlib.xml5 import serializer_v5
96
from bzrlib.trace import mutter, note, warning, enable_default_logging
97
from bzrlib.osutils import sha_strings, sha_string
98
from bzrlib.commit import merge_ancestry_lines
101
class Convert(object):
103
self.converted_revs = set()
104
self.absent_revisions = set()
107
self.inventories = {}
113
enable_default_logging()
114
self._backup_control_dir()
115
self.pb = ProgressBar()
116
if not os.path.isdir('.bzr/weaves'):
117
os.mkdir('.bzr/weaves')
118
self.inv_weave = Weave('__inventory')
119
self.anc_weave = Weave('__ancestry')
121
# holds in-memory weaves for all files
122
self.text_weaves = {}
123
self.branch = Branch('.', relax_version_check=True)
124
self._convert_working_inv()
125
rev_history = self.branch.revision_history()[:300]
126
# to_read is a stack holding the revisions we still need to process;
127
# appending to it adds new highest-priority revisions
128
self.known_revisions = set(rev_history)
129
self.to_read = [rev_history[-1]]
131
rev_id = self.to_read.pop()
132
if (rev_id not in self.revisions
133
and rev_id not in self.absent_revisions):
134
self._load_one_rev(rev_id)
136
to_import = self._make_order()
137
for i, rev_id in enumerate(to_import):
138
self.pb.update('converting revision', i, len(to_import))
139
self._convert_one_rev(rev_id)
141
note('upgraded to weaves:')
142
note(' %6d revisions and inventories' % len(self.revisions))
143
note(' %6d absent revisions removed' % len(self.absent_revisions))
144
note(' %6d texts' % self.text_count)
145
self._write_all_weaves()
146
self._write_all_revs()
149
def _backup_control_dir(self):
150
shutil.copytree('.bzr', '.bzr.backup')
151
note('.bzr has been backed up to .bzr.backup')
152
note('if conversion fails, you can move this directory back to .bzr')
153
note('if it succeeds, you can remove this directory if you wish')
156
def _convert_working_inv(self):
158
inv = serializer_v4.read_inventory(branch.controlfile('inventory', 'rb'))
159
serializer_v5.write_inventory(inv, branch.controlfile('new-inventory', 'wb'))
163
def _write_all_weaves(self):
164
write_a_weave(self.inv_weave, '.bzr/inventory.weave')
165
write_a_weave(self.anc_weave, '.bzr/ancestry.weave')
168
for file_id, file_weave in self.text_weaves.items():
169
self.pb.update('writing weave', i, len(self.text_weaves))
170
write_a_weave(file_weave, '.bzr/weaves/%s.weave' % file_id)
176
def _write_all_revs(self):
177
"""Write all revisions out in new form."""
179
for i, rev_id in enumerate(self.converted_revs):
180
self.pb.update('write revision', i, len(self.converted_revs))
181
f = file('new-revisions/%s' % rev_id, 'wb')
183
serializer_v5.write_revision(self.revisions[rev_id], f)
190
def _load_one_rev(self, rev_id):
191
"""Load a revision object into memory.
193
Any parents not either loaded or abandoned get queued to be
195
self.pb.update('loading revision',
197
len(self.known_revisions))
198
if rev_id not in self.branch.revision_store:
200
note('revision {%s} not present in branch; '
201
'will not be converted',
203
self.absent_revisions.add(rev_id)
205
rev_xml = self.branch.revision_store[rev_id].read()
206
rev = serializer_v4.read_revision_from_string(rev_xml)
207
for parent_id in rev.parent_ids:
208
self.known_revisions.add(parent_id)
209
self.to_read.append(parent_id)
210
self.revisions[rev_id] = rev
211
old_inv_xml = self.branch.inventory_store[rev_id].read()
212
inv = serializer_v4.read_inventory_from_string(old_inv_xml)
213
assert rev.inventory_sha1 == sha_string(old_inv_xml)
214
self.inventories[rev_id] = inv
217
def _convert_one_rev(self, rev_id):
218
"""Convert revision and all referenced objects to new format."""
219
rev = self.revisions[rev_id]
220
inv = self.inventories[rev_id]
221
for parent_id in rev.parent_ids[:]:
222
if parent_id in self.absent_revisions:
223
rev.parent_ids.remove(parent_id)
225
note('remove {%s} as parent of {%s}', parent_id, rev_id)
226
self._convert_revision_contents(rev, inv)
227
# the XML is now updated with text versions
228
new_inv_xml = serializer_v5.write_inventory_to_string(inv)
229
new_inv_sha1 = sha_string(new_inv_xml)
230
self.inv_weave.add(rev_id, rev.parent_ids,
231
new_inv_xml.splitlines(True),
233
# TODO: Upgrade revision XML and write that out
234
rev.inventory_sha1 = new_inv_sha1
235
self._make_rev_ancestry(rev)
236
self.converted_revs.add(rev_id)
239
def _make_rev_ancestry(self, rev):
240
rev_id = rev.revision_id
241
for parent_id in rev.parent_ids:
242
assert parent_id in self.converted_revs
244
lines = list(self.anc_weave.mash_iter(rev.parent_ids))
247
lines.append(rev_id + '\n')
249
parent_ancestries = [self.ancestries[p] for p in rev.parent_ids]
250
new_lines = merge_ancestry_lines(rev_id, parent_ancestries)
251
assert set(lines) == set(new_lines)
252
self.ancestries[rev_id] = new_lines
253
self.anc_weave.add(rev_id, rev.parent_ids, lines)
256
def _convert_revision_contents(self, rev, inv):
257
"""Convert all the files within a revision.
259
Also upgrade the inventory to refer to the text revision ids."""
260
rev_id = rev.revision_id
261
mutter('converting texts of revision {%s}',
265
self._set_name_version(rev, ie)
266
if ie.kind != 'file':
268
self._convert_file_version(rev, ie)
271
def _set_name_version(self, rev, ie):
272
"""Set name version for a file.
274
Done in a slightly lazy way: if the file is renamed or in a merge revision
275
it gets a new version, otherwise the same as before.
278
if len(rev.parent_ids) != 1:
279
ie.name_version = rev.revision_id
281
old_inv = self.inventories[rev.parent_ids[0]]
282
if not old_inv.has_id(file_id):
283
ie.name_version = rev.revision_id
285
old_ie = old_inv[file_id]
286
if (old_ie.parent_id != ie.parent_id
287
or old_ie.name != ie.name):
288
ie.name_version = rev.revision_id
290
ie.name_version = old_ie.name_version
294
def _convert_file_version(self, rev, ie):
295
"""Convert one version of one file.
297
The file needs to be added into the weave if it is a merge
298
of >=2 parents or if it's changed from its parent.
301
rev_id = rev.revision_id
302
w = self.text_weaves.get(file_id)
305
self.text_weaves[file_id] = w
306
file_lines = self.branch.text_store[ie.text_id].readlines()
307
assert sha_strings(file_lines) == ie.text_sha1
308
assert sum(map(len, file_lines)) == ie.text_size
311
for parent_id in rev.parent_ids:
312
##if parent_id in self.absent_revisions:
314
assert parent_id in self.converted_revs, \
315
'parent {%s} not converted' % parent_id
316
parent_inv = self.inventories[parent_id]
317
if parent_inv.has_id(file_id):
318
parent_ie = parent_inv[file_id]
319
old_text_version = parent_ie.text_version
320
assert old_text_version in self.converted_revs
321
if old_text_version not in file_parents:
322
file_parents.append(old_text_version)
323
if parent_ie.text_sha1 != ie.text_sha1:
325
if len(file_parents) != 1 or text_changed:
326
w.add(rev_id, file_parents, file_lines, ie.text_sha1)
327
ie.text_version = rev_id
329
##mutter('import text {%s} of {%s}',
330
## ie.text_id, file_id)
332
##mutter('text of {%s} unchanged from parent', file_id)
333
ie.text_version = file_parents[0]
338
def _make_order(self):
339
"""Return a suitable order for importing revisions.
341
The order must be such that an revision is imported after all
342
its (present) parents.
344
todo = set(self.revisions.keys())
345
done = self.absent_revisions.copy()
348
# scan through looking for a revision whose parents
350
for rev_id in sorted(list(todo)):
351
rev = self.revisions[rev_id]
352
parent_ids = set(rev.parent_ids)
353
if parent_ids.issubset(done):
354
# can take this one now
361
def write_a_weave(weave, filename):
362
inv_wf = file(filename, 'wb')
364
write_weave(weave, inv_wf)
371
def profile_convert():
372
prof_f = tempfile.NamedTemporaryFile()
374
prof = hotshot.Profile(prof_f.name)
376
prof.runcall(Convert)
379
stats = hotshot.stats.load(prof_f.name)
381
stats.sort_stats('time')
382
# XXX: Might like to write to stderr or the trace file instead but
383
# print_stats seems hardcoded to stdout
384
stats.print_stats(100)
387
if __name__ == '__main__':
388
enable_default_logging()
390
if '-p' in sys.argv[1:]: