44
# How many commits before automatically checkpointing
45
_DEFAULT_AUTO_CHECKPOINT = 10000
44
47
def _single_plural(n, single, plural):
45
48
"""Return a single or plural form of a noun based on number."""
55
58
Current features supported:
57
* timestamped progress reporting
58
60
* blobs are cached in memory
59
* commits are processed
61
* files and symlinks commits are supported
60
62
* tags are stored in the current branch
63
* checkpoints automatically happen at a configurable frequency
64
over and above the stream requested checkpoints
65
* timestamped progress reporting, both automatic and stream requested
61
66
* LATER: named branch support
62
* checkpoints are ignored
63
67
* some basic statistics are dumped on completion.
65
69
Here are the supported parameters:
67
71
* info - name of a config file holding the analysis generated
68
72
by running the --info processor (this is important for knowing
69
73
what to intelligently cache)
75
* checkpoint - automatically checkpoint every n commits over and
76
above any checkpoints contained in the import stream.
79
* count - only import this many commits then exit. If not set,
80
all commits are imported.
72
known_params = ['info']
83
known_params = ['info', 'checkpoint', 'count']
74
85
def pre_process(self):
75
86
self._start_time = time.time()
87
self._load_info_and_params()
88
self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
89
self.active_branch = self.branch
92
# mapping of tag name to revision_id
95
# Create a write group. This is committed at the end of the import.
96
# Checkpointing closes the current one and starts a new one.
97
self.repo.start_write_group()
99
def _load_info_and_params(self):
77
100
# Load the info file, if any
78
101
info_path = self.params.get('info')
79
102
if info_path is not None:
84
self.cache_mgr = GenericCacheManager(self.info, verbose=self.verbose)
85
self.active_branch = self.branch
87
# mapping of tag name to revision_id
107
# Decide how often to automatically checkpoint
108
self.checkpoint_every = int(self.params.get('checkpoint',
109
_DEFAULT_AUTO_CHECKPOINT))
90
# Prepare progress reporting
111
# Find the maximum number of commits to import (None means all)
112
# and prepare progress reporting. Just in case the info file
113
# has an outdated count of commits, we store the max counts
114
# at which we need to terminate separately to the total used
115
# for progress tracking.
117
self.max_commits = int(self.params['count'])
119
self.max_commits = None
91
120
if self.info is not None:
92
121
self.total_commits = int(self.info['Command counts']['commit'])
122
if (self.max_commits is not None and
123
self.total_commits > self.max_commits):
124
self.total_commits = self.max_commits
94
self.total_commits = None
126
self.total_commits = self.max_commits
96
# Create a write group. This is committed at the end of the import.
97
# Checkpointing closes the current one and starts a new one.
98
self.repo.start_write_group()
100
129
def _process(self, command_iter):
101
130
# if anything goes wrong, abort the write group if any
137
166
bc, _single_plural(bc, "branch", "branches"),
138
167
tc, _single_plural(tc, "tag", "tags"))
169
def note(self, msg, *args):
170
"""Output a note but timestamp it."""
171
msg = "%s %s" % (self._time_of_day(), msg)
140
174
def blob_handler(self, cmd):
141
175
"""Process a BlobCommand."""
142
176
if cmd.mark is not None:
163
197
self.cache_mgr.last_revision_ids[self.active_branch] = rev_id
164
198
self._revision_count += 1
165
199
self.report_progress("(:%s)" % cmd.mark)
200
if (self.max_commits is not None and
201
self._revision_count >= self.max_commits):
202
self.note("stopping after reaching requested count of commits")
204
elif self._revision_count % self.checkpoint_every == 0:
205
self.note("%d commits - automatic checkpoint triggered",
206
self._revision_count)
207
self.checkpoint_handler(None)
167
209
def report_progress(self, details=''):
168
210
# TODO: use a progress bar with ETA enabled
176
218
counts = "%d" % (self._revision_count,)
178
note("%s %s commits processed %s%s" % (self._time_of_day(),
179
counts, eta_str, details))
220
self.note("%s commits processed %s%s" % (counts, eta_str, details))
181
222
def progress_handler(self, cmd):
182
223
"""Process a ProgressCommand."""
183
224
# We could use a progress bar here but timestamped messages
184
225
# is more useful for determining when things might complete
185
note("%s progress %s" % (self._time_of_day(), cmd.message))
226
self.note("progress %s" % (cmd.message,))
187
228
def _time_of_day(self):
188
229
"""Time of day as a string."""