230
234
# revisions to exclude now ...
231
235
if start_rev_id is not None:
232
236
self.note("Calculating the revisions to exclude ...")
233
self.excluded_revisions = set([rev_id for rev_id, _, _, _ in
234
self.branch.iter_merge_sorted_revisions(start_rev_id)])
237
self.excluded_revisions = set(
238
[rev_id for rev_id, _, _, _ in self.branch.iter_merge_sorted_revisions(start_rev_id)])
235
239
if self.baseline:
236
240
# needed so the first relative commit knows its parent
237
241
self.excluded_revisions.remove(start_rev_id)
238
242
view_revisions.insert(0, start_rev_id)
239
243
return list(view_revisions)
245
def emit_commits(self, interesting):
247
revobj = self.branch.repository.get_revision(interesting.pop(0))
248
self.emit_baseline(revobj, self.ref)
249
for i in range(0, len(interesting), REVISIONS_CHUNK_SIZE):
250
chunk = interesting[i:i + REVISIONS_CHUNK_SIZE]
251
history = dict(self.branch.repository.iter_revisions(chunk))
255
trees_needed.update(self.preprocess_commit(revid, history[revid], self.ref))
257
for tree in self._get_revision_trees(trees_needed):
258
trees[tree.get_revision_id()] = tree
261
revobj = history[revid]
262
if len(revobj.parent_ids) == 0:
263
parent = breezy.revision.NULL_REVISION
265
parent = revobj.parent_ids[0]
266
self.emit_commit(revobj, self.ref, trees[parent], trees[revid])
242
269
# Export the data
243
270
with self.branch.repository.lock_read():
326
350
for feature in sorted(commands.FEATURE_NAMES):
327
351
self.print_cmd(commands.FeatureCommand(feature))
329
def emit_baseline(self, revid, ref):
353
def emit_baseline(self, revobj, ref):
330
354
# Emit a full source tree of the first commit's parent
331
revobj = self.branch.repository.get_revision(revid)
333
self.revid_to_mark[revid] = mark
334
file_cmds = self._get_filecommands(
335
breezy.revision.NULL_REVISION, revid)
356
self.revid_to_mark[revobj.revision_id] = mark
357
tree_old = self.branch.repository.revision_tree(
358
breezy.revision.NULL_REVISION)
359
[tree_new] = list(self._get_revision_trees([revobj.revision_id]))
360
file_cmds = self._get_filecommands(tree_old, tree_new)
336
361
self.print_cmd(commands.ResetCommand(ref, None))
337
362
self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
339
def emit_commit(self, revid, ref):
364
def preprocess_commit(self, revid, revobj, ref):
340
365
if revid in self.revid_to_mark or revid in self.excluded_revisions:
343
# Get the Revision object
345
revobj = self.branch.repository.get_revision(revid)
346
except bazErrors.NoSuchRevision:
347
368
# This is a ghost revision. Mark it as not found and next!
348
369
self.revid_to_mark[revid] = -1
351
371
# Get the primary parent
352
372
# TODO: Consider the excluded revisions when deciding the parents.
353
373
# Currently, a commit with parents that are excluded ought to be
354
374
# triggering the ref calculation below (and it is not).
356
ncommits = len(self.revid_to_mark)
357
nparents = len(revobj.parent_ids)
376
if len(revobj.parent_ids) == 0:
359
377
parent = breezy.revision.NULL_REVISION
361
379
parent = revobj.parent_ids[0]
382
mark = len(self.revid_to_mark) + 1
383
self.revid_to_mark[revobj.revision_id] = mark
385
return [parent, revobj.revision_id]
387
def emit_commit(self, revobj, ref, tree_old, tree_new):
363
388
# For parentless commits we need to issue reset command first, otherwise
364
389
# git-fast-import will assume previous commit was this one's parent
390
if tree_old.get_revision_id() == breezy.revision.NULL_REVISION:
366
391
self.print_cmd(commands.ResetCommand(ref, None))
370
self.revid_to_mark[revid] = mark
371
file_cmds = self._get_filecommands(parent, revid)
393
file_cmds = self._get_filecommands(tree_old, tree_new)
394
mark = self.revid_to_mark[revobj.revision_id]
372
395
self.print_cmd(self._get_commit_command(ref, mark, revobj, file_cmds))
374
397
# Report progress and checkpoint if it's time for that
398
ncommits = len(self.revid_to_mark)
375
399
self.report_progress(ncommits)
376
400
if (self.checkpoint is not None and self.checkpoint > 0 and ncommits and
377
401
ncommits % self.checkpoint == 0):
452
476
# Build and return the result
453
return commands.CommitCommand(git_ref, mark, author_info,
454
committer_info, revobj.message.encode(
455
"utf-8"), from_, merges, file_cmds,
456
more_authors=more_author_info, properties=properties)
458
def _get_revision_trees(self, parent, revision_id):
460
tree_old = self.branch.repository.revision_tree(parent)
461
except bazErrors.UnexpectedInventoryFormat:
463
"Parent is malformed - diffing against previous parent")
464
# We can't find the old parent. Let's diff against his parent
465
pp = self.branch.repository.get_revision(parent)
466
tree_old = self.branch.repository.revision_tree(pp.parent_ids[0])
469
tree_new = self.branch.repository.revision_tree(revision_id)
470
except bazErrors.UnexpectedInventoryFormat:
471
# We can't really do anything anymore
472
self.warning("Revision %s is malformed - skipping" % revision_id)
473
return tree_old, tree_new
475
def _get_filecommands(self, parent, revision_id):
477
return commands.CommitCommand(
478
git_ref, mark, author_info, committer_info,
479
revobj.message.encode("utf-8"), from_, merges, file_cmds,
480
more_authors=more_author_info, properties=properties)
482
def _get_revision_trees(self, revids):
486
if revid == breezy.revision.NULL_REVISION:
487
by_revid[revid] = self.branch.repository.revision_tree(revid)
488
elif revid not in self.tree_cache:
489
missing.append(revid)
491
for tree in self.branch.repository.revision_trees(missing):
492
by_revid[tree.get_revision_id()] = tree
496
yield self.tree_cache[revid]
498
yield by_revid[revid]
500
for revid, tree in by_revid.items():
501
self.tree_cache[revid] = tree
503
def _get_filecommands(self, tree_old, tree_new):
476
504
"""Get the list of FileCommands for the changes between two revisions."""
477
tree_old, tree_new = self._get_revision_trees(parent, revision_id)
478
if not(tree_old and tree_new):
479
# Something is wrong with this revision - ignore the filecommands
482
505
changes = tree_new.changes_from(tree_old)
484
507
my_modified = list(changes.modified)
486
509
# The potential interaction between renames and deletes is messy.
487
510
# Handle it here ...
488
511
file_cmds, rd_modifies, renamed = self._process_renames_and_deletes(
489
changes.renamed, changes.removed, revision_id, tree_old)
512
changes.renamed, changes.removed, tree_new.get_revision_id(), tree_old)
491
514
for cmd in file_cmds:
494
517
# Map kind changes to a delete followed by an add
495
518
for change in changes.kind_changed:
496
path = self._adjust_path_for_renames(path, renamed, revision_id)
519
path = self._adjust_path_for_renames(
520
path, renamed, tree_new.get_revision_id())
497
521
# IGC: I don't understand why a delete is needed here.
498
522
# In fact, it seems harmful? If you uncomment this line,
499
523
# please file a bug explaining why you needed to.
524
548
self.warning("cannot export '%s' of kind %s yet - ignoring" %
525
549
(change.path[1], change.kind[1]))
526
for (path, mode), chunks in tree_new.iter_files_bytes(
551
# TODO(jelmer): Improve performance on remote repositories
552
# by using Repository.iter_files_bytes for bzr repositories here.
553
for (path, mode), chunks in tree_new.iter_files_bytes(files_to_get):
528
554
yield commands.FileModifyCommand(
529
555
path.encode("utf-8"), mode, None, b''.join(chunks))