1
# Copyright (C) 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Import processor that supports all Bazaar repository formats."""
36
from bzrlib.repofmt import pack_repo
37
from bzrlib.trace import (
41
import bzrlib.util.configobj.configobj as configobj
42
from bzrlib.plugins.fastimport import (
43
errors as plugin_errors,
51
# How many commits before automatically reporting progress
52
_DEFAULT_AUTO_PROGRESS = 1000
54
# How many commits before automatically checkpointing
55
_DEFAULT_AUTO_CHECKPOINT = 10000
57
# How many inventories to cache
58
_DEFAULT_INV_CACHE_SIZE = 10
61
class GenericProcessor(processor.ImportProcessor):
62
"""An import processor that handles basic imports.
64
Current features supported:
66
* blobs are cached in memory
67
* files and symlinks commits are supported
68
* checkpoints automatically happen at a configurable frequency
69
over and above the stream requested checkpoints
70
* timestamped progress reporting, both automatic and stream requested
71
* some basic statistics are dumped on completion.
73
At checkpoints and on completion, the commit-id -> revision-id map is
74
saved to a file called 'fastimport-id-map'. If the import crashes
75
or is interrupted, it can be started again and this file will be
76
used to skip over already loaded revisions. The format of each line
77
is "commit-id revision-id" so commit-ids cannot include spaces.
79
Here are the supported parameters:
81
* info - name of a hints file holding the analysis generated
82
by running the fast-import-info processor in verbose mode. When
83
importing large repositories, this parameter is needed so
84
that the importer knows what blobs to intelligently cache.
86
* trees - update the working trees before completing.
87
By default, the importer updates the repository
88
and branches and the user needs to run 'bzr update' for the
89
branches of interest afterwards.
91
* checkpoint - automatically checkpoint every n commits over and
92
above any checkpoints contained in the import stream.
95
* count - only import this many commits then exit. If not set
96
or negative, all commits are imported.
98
* inv-cache - number of inventories to cache.
99
If not set, the default is 10.
101
* experimental - enable experimental mode, i.e. use features
102
not yet fully tested.
104
* import-marks - name of file to read to load mark information from
106
* export-marks - name of file to write to save mark information to
120
def _import_marks(self, filename):
125
"Could not open import-marks file, not importing marks")
128
firstline = f.readline()
129
match = re.match(r'^format=(\d+)$', firstline)
131
print >>sys.stderr, "%r doesn't look like a mark file" % \
134
elif match.group(1) != '1':
135
print >>sys.stderr, 'format version in mark file not supported'
138
for string in f.readline().rstrip('\n').split('\0'):
141
name, integer = string.rsplit('.', 1)
142
# We really can't do anything with the branch information, so we
145
self.cache_mgr.revision_ids = {}
147
line = line.rstrip('\n')
148
mark, revid = line.split(' ', 1)
149
self.cache_mgr.revision_ids[mark] = revid
152
def export_marks(self, filename):
154
f = file(filename, 'w')
157
"Could not open export-marks file, not exporting marks")
159
f.write('format=1\n')
161
for mark, revid in self.cache_mgr.revision_ids.iteritems():
162
f.write('%s %s\n' % (mark, revid))
165
def pre_process(self):
166
self._start_time = time.time()
167
self._load_info_and_params()
168
self.cache_mgr = GenericCacheManager(self.info, self.verbose,
169
self.inventory_cache_size)
171
if self.params.get("import-marks") is not None:
172
self._import_marks(self.params.get("import-marks"))
173
self.skip_total = False
174
self.first_incremental_commit = True
176
self.first_incremental_commit = False
177
self.skip_total = self._init_id_map()
179
self.note("Found %d commits already loaded - "
180
"skipping over these ...", self.skip_total)
181
self._revision_count = 0
183
# mapping of tag name to revision_id
186
# Create the revision loader needed for committing
187
new_repo_api = hasattr(self.repo, 'revisions')
189
self.loader = revisionloader.RevisionLoader2(self.repo)
190
elif not self._experimental:
191
self.loader = revisionloader.RevisionLoader1(self.repo)
193
def fulltext_when(count):
194
total = self.total_commits
195
if total is not None and count == total:
198
# Create an inventory fulltext every 200 revisions
199
fulltext = count % 200 == 0
201
self.note("%d commits - storing inventory as full-text",
205
self.loader = revisionloader.ImportRevisionLoader1(
206
self.repo, self.inventory_cache_size,
207
fulltext_when=fulltext_when)
209
# Disable autopacking if the repo format supports it.
210
# THIS IS A HACK - there is no sanctioned way of doing this yet.
211
if isinstance(self.repo, pack_repo.KnitPackRepository):
212
self._original_max_pack_count = \
213
self.repo._pack_collection._max_pack_count
214
def _max_pack_count_for_import(total_revisions):
215
return total_revisions + 1
216
self.repo._pack_collection._max_pack_count = \
217
_max_pack_count_for_import
219
self._original_max_pack_count = None
221
# Create a write group. This is committed at the end of the import.
222
# Checkpointing closes the current one and starts a new one.
223
self.repo.start_write_group()
225
def _load_info_and_params(self):
226
self._experimental = bool(self.params.get('experimental', False))
228
# This is currently hard-coded but might be configurable via
229
# parameters one day if that's needed
230
repo_transport = self.repo.control_files._transport
231
self.id_map_path = repo_transport.local_abspath("fastimport-id-map")
233
# Load the info file, if any
234
info_path = self.params.get('info')
235
if info_path is not None:
236
self.info = configobj.ConfigObj(info_path)
240
# Decide how often to automatically report progress
241
# (not a parameter yet)
242
self.progress_every = _DEFAULT_AUTO_PROGRESS
244
self.progress_every = self.progress_every / 10
246
# Decide how often to automatically checkpoint
247
self.checkpoint_every = int(self.params.get('checkpoint',
248
_DEFAULT_AUTO_CHECKPOINT))
250
# Decide how big to make the inventory cache
251
self.inventory_cache_size = int(self.params.get('inv-cache',
252
_DEFAULT_INV_CACHE_SIZE))
254
# Find the maximum number of commits to import (None means all)
255
# and prepare progress reporting. Just in case the info file
256
# has an outdated count of commits, we store the max counts
257
# at which we need to terminate separately to the total used
258
# for progress tracking.
260
self.max_commits = int(self.params['count'])
261
if self.max_commits < 0:
262
self.max_commits = None
264
self.max_commits = None
265
if self.info is not None:
266
self.total_commits = int(self.info['Command counts']['commit'])
267
if (self.max_commits is not None and
268
self.total_commits > self.max_commits):
269
self.total_commits = self.max_commits
271
self.total_commits = self.max_commits
273
def _process(self, command_iter):
274
# if anything goes wrong, abort the write group if any
276
processor.ImportProcessor._process(self, command_iter)
278
if self.repo is not None and self.repo.is_in_write_group():
279
self.repo.abort_write_group()
282
def post_process(self):
283
# Commit the current write group and checkpoint the id map
284
self.repo.commit_write_group()
287
if self.params.get("export-marks") is not None:
288
self.export_marks(self.params.get("export-marks"))
290
# Update the branches
291
self.note("Updating branch information ...")
292
updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
293
helpers.invert_dictset(self.cache_mgr.heads),
294
self.cache_mgr.last_ref, self.tags)
295
branches_updated, branches_lost = updater.update()
296
self._branch_count = len(branches_updated)
298
# Tell the user about branches that were not created
300
if not self.repo.is_shared():
301
self.warning("Cannot import multiple branches into "
302
"an unshared repository")
303
self.warning("Not creating branches for these head revisions:")
304
for lost_info in branches_lost:
305
head_revision = lost_info[1]
306
branch_name = lost_info[0]
307
self.note("\t %s = %s", head_revision, branch_name)
309
# Update the working trees as requested and dump stats
311
remind_about_update = True
312
if self._branch_count == 0:
313
self.note("no branches to update")
314
self.note("no working trees to update")
315
remind_about_update = False
316
elif self.params.get('trees', False):
317
trees = self._get_working_trees(branches_updated)
319
self.note("Updating the working trees ...")
321
report = delta._ChangeReporter()
326
self._tree_count += 1
327
remind_about_update = False
329
self.warning("No working trees available to update")
332
# Finish up by telling the user what to do next.
333
if self._original_max_pack_count:
334
# We earlier disabled autopacking, creating one pack every
335
# checkpoint instead. We now pack the repository to optimise
336
# how data is stored.
337
if self._revision_count > self.checkpoint_every:
338
self.note("Packing repository ...")
340
# To be conservative, packing puts the old packs and
341
# indices in obsolete_packs. We err on the side of
342
# optimism and clear out that directory to save space.
343
self.note("Removing obsolete packs ...")
344
# TODO: Use a public API for this once one exists
345
repo_transport = self.repo._pack_collection.transport
346
repo_transport.clone('obsolete_packs').delete_multi(
347
repo_transport.list_dir('obsolete_packs'))
348
if remind_about_update:
349
# This message is explicitly not timestamped.
350
note("To refresh the working tree for a branch, "
353
def _get_working_trees(self, branches):
354
"""Get the working trees for branches in the repository."""
356
wt_expected = self.repo.make_working_trees()
358
if br == self.branch and br is not None:
359
wt = self.working_tree
362
wt = br.bzrdir.open_workingtree()
363
except errors.NoWorkingTree:
364
self.warning("No working tree for branch %s", br)
371
def dump_stats(self):
372
time_required = progress.str_tdelta(time.time() - self._start_time)
373
rc = self._revision_count - self.skip_total
374
bc = self._branch_count
375
wtc = self._tree_count
376
self.note("Imported %d %s, updating %d %s and %d %s in %s",
377
rc, helpers.single_plural(rc, "revision", "revisions"),
378
bc, helpers.single_plural(bc, "branch", "branches"),
379
wtc, helpers.single_plural(wtc, "tree", "trees"),
382
def _init_id_map(self):
383
"""Load the id-map and check it matches the repository.
385
:return: the number of entries in the map
387
# Currently, we just check the size. In the future, we might
388
# decide to be more paranoid and check that the revision-ids
389
# are identical as well.
390
self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
392
existing_count = len(self.repo.all_revision_ids())
393
if existing_count < known:
394
raise plugin_errors.BadRepositorySize(known, existing_count)
397
def _save_id_map(self):
398
"""Save the id-map."""
399
# Save the whole lot every time. If this proves a problem, we can
400
# change to 'append just the new ones' at a later time.
401
idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
403
def blob_handler(self, cmd):
404
"""Process a BlobCommand."""
405
if cmd.mark is not None:
408
dataref = osutils.sha_strings(cmd.data)
409
self.cache_mgr.store_blob(dataref, cmd.data)
411
def checkpoint_handler(self, cmd):
412
"""Process a CheckpointCommand."""
413
# Commit the current write group and start a new one
414
self.repo.commit_write_group()
416
self.repo.start_write_group()
418
def commit_handler(self, cmd):
419
"""Process a CommitCommand."""
420
if self.skip_total and self._revision_count < self.skip_total:
421
_track_heads(cmd, self.cache_mgr)
422
# Check that we really do know about this commit-id
423
if not self.cache_mgr.revision_ids.has_key(cmd.id):
424
raise plugin_errors.BadRestart(cmd.id)
425
# Consume the file commands and free any non-sticky blobs
426
for fc in cmd.file_iter():
428
self.cache_mgr._blobs = {}
429
self._revision_count += 1
430
# If we're finished getting back to where we were,
431
# load the file-ids cache
432
if self._revision_count == self.skip_total:
433
self._gen_file_ids_cache()
434
self.note("Generated the file-ids cache - %d entries",
435
len(self.cache_mgr.file_ids.keys()))
437
if self.first_incremental_commit:
438
self.first_incremental_commit = None
439
parents = _track_heads(cmd, self.cache_mgr)
440
self._gen_file_ids_cache(parents)
442
# 'Commit' the revision and report progress
443
handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
444
self.loader, self.verbose, self._experimental)
446
self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
447
self._revision_count += 1
448
self.report_progress("(%s)" % cmd.id)
450
# Check if we should finish up or automatically checkpoint
451
if (self.max_commits is not None and
452
self._revision_count >= self.max_commits):
453
self.note("Stopping after reaching requested count of commits")
455
elif self._revision_count % self.checkpoint_every == 0:
456
self.note("%d commits - automatic checkpoint triggered",
457
self._revision_count)
458
self.checkpoint_handler(None)
460
def _gen_file_ids_cache(self, revs=False):
461
"""Generate the file-id cache by searching repository inventories.
463
# Get the interesting revisions - the heads
467
head_ids = self.cache_mgr.heads.keys()
468
revision_ids = [self.cache_mgr.revision_ids[h] for h in head_ids]
470
# Update the fileid cache
472
for revision_id in revision_ids:
473
inv = self.repo.revision_tree(revision_id).inventory
474
# Cache the inventories while we're at it
475
self.cache_mgr.inventories[revision_id] = inv
476
for path, ie in inv.iter_entries():
477
file_ids[path] = ie.file_id
478
self.cache_mgr.file_ids = file_ids
480
def report_progress(self, details=''):
481
# TODO: use a progress bar with ETA enabled
482
if self._revision_count % self.progress_every == 0:
483
if self.total_commits is not None:
484
counts = "%d/%d" % (self._revision_count, self.total_commits)
485
eta = progress.get_eta(self._start_time, self._revision_count,
487
eta_str = progress.str_tdelta(eta)
488
if eta_str.endswith('--'):
491
eta_str = '[%s] ' % eta_str
493
counts = "%d" % (self._revision_count,)
495
self.note("%s commits processed %s%s" % (counts, eta_str, details))
497
def progress_handler(self, cmd):
498
"""Process a ProgressCommand."""
499
# We could use a progress bar here instead
500
self.note("progress %s" % (cmd.message,))
502
def reset_handler(self, cmd):
503
"""Process a ResetCommand."""
504
if cmd.ref.startswith('refs/tags/'):
505
tag_name = cmd.ref[len('refs/tags/'):]
506
if cmd.from_ is not None:
507
self._set_tag(tag_name, cmd.from_)
509
self.warning("ignoring reset refs/tags/%s - no from clause"
513
# FIXME: cmd.from_ is a committish and thus could reference
514
# another branch. Create a method for resolving commitish's.
515
if cmd.from_ is not None:
516
self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
517
# Why is this required now vs at the end?
518
#updater = GenericBranchUpdater(self.repo, self.branch, self.cache_mgr,
519
# helpers.invert_dictset(self.cache_mgr.heads),
520
# self.cache_mgr.last_ref, self.tags)
523
def tag_handler(self, cmd):
524
"""Process a TagCommand."""
525
if cmd.from_ is not None:
526
self._set_tag(cmd.id, cmd.from_)
528
self.warning("ignoring tag %s - no from clause" % cmd.id)
530
def _set_tag(self, name, from_):
531
"""Define a tag given a name and import 'from' reference."""
532
bzr_tag_name = name.decode('utf-8', 'replace')
533
bzr_rev_id = self.cache_mgr.revision_ids[from_]
534
self.tags[bzr_tag_name] = bzr_rev_id
537
class GenericCacheManager(object):
538
"""A manager of caches for the GenericProcessor."""
540
def __init__(self, info, verbose=False, inventory_cache_size=10):
541
"""Create a manager of caches.
543
:param info: a ConfigObj holding the output from
544
the --info processor, or None if no hints are available
546
self.verbose = verbose
548
# dataref -> data. datref is either :mark or the sha-1.
549
# Sticky blobs aren't removed after being referenced.
551
self._sticky_blobs = {}
553
# revision-id -> Inventory cache
554
# these are large and we probably don't need too many as
555
# most parents are recent in history
556
self.inventories = lru_cache.LRUCache(inventory_cache_size)
558
# import commmit-ids -> revision-id lookup table
559
# we need to keep all of these but they are small
560
self.revision_ids = {}
562
# path -> file-ids - as generated
565
# Head tracking: last ref, last id per ref & map of commit ids to ref*s*
570
# Work out the blobs to make sticky - None means all
571
self._blobs_to_keep = None
574
self._blobs_to_keep = info['Blob usage tracking']['multi']
576
# info not in file - possible when no blobs used
579
def store_blob(self, id, data):
580
"""Store a blob of data."""
581
if (self._blobs_to_keep is None or data == '' or
582
id in self._blobs_to_keep):
583
self._sticky_blobs[id] = data
585
self._blobs[id] = data
587
def fetch_blob(self, id):
588
"""Fetch a blob of data."""
590
return self._sticky_blobs[id]
592
return self._blobs.pop(id)
594
def _delete_path(self, path):
595
"""Remove a path from caches."""
596
# we actually want to remember what file-id we gave a path,
597
# even when that file is deleted, so doing nothing is correct
600
def _rename_path(self, old_path, new_path):
601
"""Rename a path in the caches."""
602
# In this case, we need to forget the file-id we gave a path,
603
# otherwise, we'll get duplicate file-ids in the repository.
604
self.file_ids[new_path] = self.file_ids[old_path]
605
del self.file_ids[old_path]
607
def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
608
if parents is not None:
609
for parent in parents:
610
refs = self.heads.get(parent)
612
refs.discard(cmd_ref)
614
del self.heads[parent]
615
self.heads.setdefault(cmd_id, set()).add(cmd_ref)
616
self.last_ids[cmd_ref] = cmd_id
617
self.last_ref = cmd_ref
620
def _track_heads(cmd, cache_mgr):
621
"""Track the repository heads given a CommitCommand.
623
:return: the list of parents in terms of commit-ids
625
# Get the true set of parents
626
if cmd.from_ is not None:
627
parents = [cmd.from_]
629
last_id = cache_mgr.last_ids.get(cmd.ref)
630
if last_id is not None:
634
parents.extend(cmd.merges)
637
cache_mgr.track_heads_for_ref(cmd.ref, cmd.id, parents)
641
class GenericCommitHandler(processor.CommitHandler):
643
def __init__(self, command, repo, cache_mgr, loader, verbose=False,
644
_experimental=False):
645
processor.CommitHandler.__init__(self, command)
647
self.cache_mgr = cache_mgr
649
self.verbose = verbose
650
self._experimental = _experimental
652
def pre_process_files(self):
653
"""Prepare for committing."""
654
self.revision_id = self.gen_revision_id()
655
# cache of texts for this commit, indexed by file-id
656
self.lines_for_commit = {}
657
if self.repo.supports_rich_root():
658
self.lines_for_commit[inventory.ROOT_ID] = []
660
# Track the heads and get the real parent list
661
parents = _track_heads(self.command, self.cache_mgr)
663
# Convert the parent commit-ids to bzr revision-ids
665
self.parents = [self.cache_mgr.revision_ids[p]
669
self.debug("%s id: %s, parents: %s", self.command.id,
670
self.revision_id, str(self.parents))
672
# Seed the inventory from the previous one
673
if len(self.parents) == 0:
674
self.inventory = self.gen_initial_inventory()
676
# use the bzr_revision_id to lookup the inv cache
677
inv = self.get_inventory(self.parents[0])
678
# TODO: Shallow copy - deep inventory copying is expensive
679
self.inventory = inv.copy()
680
if self.repo.supports_rich_root():
681
self.inventory.revision_id = self.revision_id
683
# In this repository, root entries have no knit or weave. When
684
# serializing out to disk and back in, root.revision is always
685
# the new revision_id.
686
self.inventory.root.revision = self.revision_id
688
# directory-path -> inventory-entry for current inventory
689
self.directory_entries = dict(self.inventory.directories())
691
def post_process_files(self):
692
"""Save the revision."""
693
self.cache_mgr.inventories[self.revision_id] = self.inventory
695
# Load the revision into the repository
697
committer = self.command.committer
698
who = "%s <%s>" % (committer[0],committer[1])
699
author = self.command.author
700
if author is not None:
701
author_id = "%s <%s>" % (author[0],author[1])
703
rev_props['author'] = author_id
704
rev = revision.Revision(
705
timestamp=committer[2],
706
timezone=committer[3],
708
message=helpers.escape_commit_message(self.command.message),
709
revision_id=self.revision_id,
710
properties=rev_props,
711
parent_ids=self.parents)
712
self.loader.load(rev, self.inventory, None,
713
lambda file_id: self._get_lines(file_id),
714
lambda revision_ids: self._get_inventories(revision_ids))
716
def modify_handler(self, filecmd):
717
if filecmd.dataref is not None:
718
data = self.cache_mgr.fetch_blob(filecmd.dataref)
721
self.debug("modifying %s", filecmd.path)
722
self._modify_inventory(filecmd.path, filecmd.kind,
723
filecmd.is_executable, data)
725
def _delete_recursive(self, path):
726
self.debug("deleting %s", path)
727
fileid = self.bzr_file_id(path)
728
dirname, basename = osutils.split(path)
729
if (fileid in self.inventory and
730
isinstance(self.inventory[fileid], inventory.InventoryDirectory)):
731
for child_path in self.inventory[fileid].children.keys():
732
self._delete_recursive(os.utils.pathjoin(path, child_path))
734
if self.inventory.id2path(fileid) == path:
735
del self.inventory[fileid]
737
# already added by some other name?
738
if dirname in self.cache_mgr.file_ids:
739
parent_id = self.cache_mgr.file_ids[dirname]
740
del self.inventory[parent_id].children[basename]
742
self._warn_unless_in_merges(fileid, path)
743
except errors.NoSuchId:
744
self._warn_unless_in_merges(fileid, path)
745
except AttributeError, ex:
746
if ex.args[0] == 'children':
747
# A directory has changed into a file and then one
748
# of it's children is being deleted!
749
self._warn_unless_in_merges(fileid, path)
753
self.cache_mgr._delete_path(path)
757
def delete_handler(self, filecmd):
758
self._delete_recursive(filecmd.path)
760
def _warn_unless_in_merges(self, fileid, path):
761
if len(self.parents) <= 1:
763
for parent in self.parents[1:]:
764
if fileid in self.get_inventory(parent):
766
self.warning("ignoring delete of %s as not in parent inventories", path)
768
def copy_handler(self, filecmd):
769
src_path = filecmd.src_path
770
dest_path = filecmd.dest_path
771
self.debug("copying %s to %s", src_path, dest_path)
773
self.warning("ignoring copy of %s to %s - no parent revisions",
776
file_id = self.inventory.path2id(src_path)
778
self.warning("ignoring copy of %s to %s - source does not exist",
781
ie = self.inventory[file_id]
784
content = self._get_content_from_repo(self.parents[0], file_id)
785
self._modify_inventory(dest_path, kind, ie.executable, content)
786
elif kind == 'symlink':
787
self._modify_inventory(dest_path, kind, False, ie.symlink_target)
789
self.warning("ignoring copy of %s %s - feature not yet supported",
792
def _get_content_from_repo(self, revision_id, file_id):
793
"""Get the content of a file for a revision-id."""
794
revtree = self.repo.revision_tree(revision_id)
795
return revtree.get_file_text(file_id)
797
def rename_handler(self, filecmd):
798
old_path = filecmd.old_path
799
new_path = filecmd.new_path
800
self.debug("renaming %s to %s", old_path, new_path)
801
file_id = self.bzr_file_id(old_path)
802
basename, new_parent_ie = self._ensure_directory(new_path)
803
new_parent_id = new_parent_ie.file_id
804
existing_id = self.inventory.path2id(new_path)
805
if existing_id is not None:
806
self.inventory.remove_recursive_id(existing_id)
807
ie = self.inventory[file_id]
808
lines = self.loader._get_lines(file_id, ie.revision)
809
self.lines_for_commit[file_id] = lines
810
self.inventory.rename(file_id, new_parent_id, basename)
811
self.cache_mgr._rename_path(old_path, new_path)
812
self.inventory[file_id].revision = self.revision_id
814
def deleteall_handler(self, filecmd):
815
self.debug("deleting all files (and also all directories)")
816
# Would be nice to have an inventory.clear() method here
817
root_items = [ie for (name, ie) in
818
self.inventory.root.children.iteritems()]
819
for root_item in root_items:
820
self.inventory.remove_recursive_id(root_item.file_id)
822
def bzr_file_id_and_new(self, path):
823
"""Get a Bazaar file identifier and new flag for a path.
825
:return: file_id, is_new where
826
is_new = True if the file_id is newly created
829
id = self.cache_mgr.file_ids[path]
832
id = generate_ids.gen_file_id(path)
833
self.cache_mgr.file_ids[path] = id
834
self.debug("Generated new file id %s for '%s'", id, path)
837
def bzr_file_id(self, path):
838
"""Get a Bazaar file identifier for a path."""
839
return self.bzr_file_id_and_new(path)[0]
841
def gen_initial_inventory(self):
842
"""Generate an inventory for a parentless revision."""
843
inv = inventory.Inventory(revision_id=self.revision_id)
844
if self.repo.supports_rich_root():
845
# The very first root needs to have the right revision
846
inv.root.revision = self.revision_id
849
def gen_revision_id(self):
850
"""Generate a revision id.
852
Subclasses may override this to produce deterministic ids say.
854
committer = self.command.committer
855
# Perhaps 'who' being the person running the import is ok? If so,
856
# it might be a bit quicker and give slightly better compression?
857
who = "%s <%s>" % (committer[0],committer[1])
858
timestamp = committer[2]
859
return generate_ids.gen_revision_id(who, timestamp)
861
def get_inventory(self, revision_id):
862
"""Get the inventory for a revision id."""
864
inv = self.cache_mgr.inventories[revision_id]
867
self.note("get_inventory cache miss for %s", revision_id)
868
# Not cached so reconstruct from repository
869
inv = self.repo.revision_tree(revision_id).inventory
870
self.cache_mgr.inventories[revision_id] = inv
873
def _get_inventories(self, revision_ids):
874
"""Get the inventories for revision-ids.
876
This is a callback used by the RepositoryLoader to
877
speed up inventory reconstruction.
881
# If an inventory is in the cache, we assume it was
882
# successfully loaded into the repsoitory
883
for revision_id in revision_ids:
885
inv = self.cache_mgr.inventories[revision_id]
886
present.append(revision_id)
889
self.note("get_inventories cache miss for %s", revision_id)
890
# Not cached so reconstruct from repository
891
if self.repo.has_revision(revision_id):
892
rev_tree = self.repo.revision_tree(revision_id)
893
present.append(revision_id)
895
rev_tree = self.repo.revision_tree(None)
896
inv = rev_tree.inventory
897
self.cache_mgr.inventories[revision_id] = inv
898
inventories.append(inv)
899
return present, inventories
901
def _get_lines(self, file_id):
902
"""Get the lines for a file-id."""
903
return self.lines_for_commit[file_id]
905
def _modify_inventory(self, path, kind, is_executable, data):
906
"""Add to or change an item in the inventory."""
907
# Create the new InventoryEntry
908
basename, parent_ie = self._ensure_directory(path)
909
file_id = self.bzr_file_id(path)
910
ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
911
ie.revision = self.revision_id
912
if isinstance(ie, inventory.InventoryFile):
913
ie.executable = is_executable
914
lines = osutils.split_lines(data)
915
ie.text_sha1 = osutils.sha_strings(lines)
916
ie.text_size = sum(map(len, lines))
917
self.lines_for_commit[file_id] = lines
918
elif isinstance(ie, inventory.InventoryLink):
919
ie.symlink_target = data.encode('utf8')
920
# There are no lines stored for a symlink so
921
# make sure the cache used by get_lines knows that
922
self.lines_for_commit[file_id] = []
924
raise errors.BzrError("Cannot import items of kind '%s' yet" %
927
# Record this new inventory entry
928
if file_id in self.inventory:
929
# HACK: no API for this (del+add does more than it needs to)
930
self.inventory._byid[file_id] = ie
931
parent_ie.children[basename] = ie
933
self.inventory.add(ie)
935
def _ensure_directory(self, path):
936
"""Ensure that the containing directory exists for 'path'"""
937
dirname, basename = osutils.split(path)
939
# the root node doesn't get updated
940
return basename, self.inventory.root
942
ie = self.directory_entries[dirname]
944
# We will create this entry, since it doesn't exist
949
# No directory existed, we will just create one, first, make sure
951
dir_basename, parent_ie = self._ensure_directory(dirname)
952
dir_file_id = self.bzr_file_id(dirname)
953
ie = inventory.entry_factory['directory'](dir_file_id,
956
ie.revision = self.revision_id
957
self.directory_entries[dirname] = ie
958
# There are no lines stored for a directory so
959
# make sure the cache used by get_lines knows that
960
self.lines_for_commit[dir_file_id] = []
961
#print "adding dir for %s" % path
962
self.inventory.add(ie)
966
class GenericBranchUpdater(object):
968
def __init__(self, repo, branch, cache_mgr, heads_by_ref, last_ref, tags):
969
"""Create an object responsible for updating branches.
971
:param heads_by_ref: a dictionary where
972
names are git-style references like refs/heads/master;
973
values are one item lists of commits marks.
977
self.cache_mgr = cache_mgr
978
self.heads_by_ref = heads_by_ref
979
self.last_ref = last_ref
983
"""Update the Bazaar branches and tips matching the heads.
985
If the repository is shared, this routine creates branches
986
as required. If it isn't, warnings are produced about the
989
:return: updated, lost_heads where
990
updated = the list of branches updated
991
lost_heads = a list of (bazaar-name,revision) for branches that
992
would have been created had the repository been shared
995
branch_tips, lost_heads = self._get_matching_branches()
996
for br, tip in branch_tips:
997
if self._update_branch(br, tip):
999
return updated, lost_heads
1001
def _get_matching_branches(self):
1002
"""Get the Bazaar branches.
1004
:return: default_tip, branch_tips, lost_heads where
1005
default_tip = the last commit mark for the default branch
1006
branch_tips = a list of (branch,tip) tuples for other branches.
1007
lost_heads = a list of (bazaar-name,revision) for branches that
1008
would have been created had the repository been shared and
1009
everything succeeded
1013
ref_names = self.heads_by_ref.keys()
1014
if self.branch is not None:
1015
trunk = self.select_trunk(ref_names)
1016
default_tip = self.heads_by_ref[trunk][0]
1017
branch_tips.append((self.branch, default_tip))
1018
ref_names.remove(trunk)
1020
# Convert the reference names into Bazaar speak
1021
bzr_names = self._get_bzr_names_from_ref_names(ref_names)
1023
# Policy for locating branches
1024
def dir_under_current(name, ref_name):
1025
# Using the Bazaar name, get a directory under the current one
1027
def dir_sister_branch(name, ref_name):
1028
# Using the Bazaar name, get a sister directory to the branch
1029
return osutils.pathjoin(self.branch.base, "..", name)
1030
if self.branch is not None:
1031
dir_policy = dir_sister_branch
1033
dir_policy = dir_under_current
1035
# Create/track missing branches
1036
shared_repo = self.repo.is_shared()
1037
for name in sorted(bzr_names.keys()):
1038
ref_name = bzr_names[name]
1039
tip = self.heads_by_ref[ref_name][0]
1041
location = dir_policy(name, ref_name)
1043
br = self.make_branch(location)
1044
branch_tips.append((br,tip))
1046
except errors.BzrError, ex:
1047
error("ERROR: failed to create branch %s: %s",
1049
lost_head = self.cache_mgr.revision_ids[tip]
1050
lost_info = (name, lost_head)
1051
lost_heads.append(lost_info)
1052
return branch_tips, lost_heads
1054
def select_trunk(self, ref_names):
1055
"""Given a set of ref names, choose one as the trunk."""
1056
for candidate in ['refs/heads/master']:
1057
if candidate in ref_names:
1059
# Use the last reference in the import stream
1060
return self.last_ref
1062
def make_branch(self, location):
1063
"""Make a branch in the repository if not already there."""
1065
return bzrdir.BzrDir.open(location).open_branch()
1066
except errors.NotBranchError, ex:
1067
return bzrdir.BzrDir.create_branch_convenience(location)
1069
def _get_bzr_names_from_ref_names(self, ref_names):
1070
"""Generate Bazaar branch names from import ref names.
1072
:return: a dictionary with Bazaar names as keys and
1073
the original reference names as values.
1076
for ref_name in sorted(ref_names):
1077
parts = ref_name.split('/')
1078
if parts[0] == 'refs':
1080
full_name = "--".join(parts)
1081
bazaar_name = parts[-1]
1082
if bazaar_name in bazaar_names:
1083
if parts[0] == 'remotes':
1084
bazaar_name += ".remote"
1086
bazaar_name = full_name
1087
bazaar_names[bazaar_name] = ref_name
1090
def _update_branch(self, br, last_mark):
1091
"""Update a branch with last revision and tag information.
1093
:return: whether the branch was changed or not
1095
last_rev_id = self.cache_mgr.revision_ids[last_mark]
1096
revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
1098
existing_revno, existing_last_rev_id = br.last_revision_info()
1100
if revno != existing_revno or last_rev_id != existing_last_rev_id:
1101
br.set_last_revision_info(revno, last_rev_id)
1103
# apply tags known in this branch
1106
for tag,rev in self.tags.items():
1110
br.tags._set_tag_dict(my_tags)
1113
tagno = len(my_tags)
1114
note("\t branch %s now has %d %s and %d %s", br.nick,
1115
revno, helpers.single_plural(revno, "revision", "revisions"),
1116
tagno, helpers.single_plural(tagno, "tag", "tags"))