bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
1 |
# Copyright (C) 2008 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""Parser of import data into command objects.
|
|
18 |
||
19 |
In order to reuse existing front-ends, the stream format is a subset of
|
|
20 |
the one used by git-fast-import (as of the 1.5.4 release of git at least).
|
|
21 |
The grammar is:
|
|
22 |
||
23 |
stream ::= cmd*;
|
|
24 |
||
25 |
cmd ::= new_blob
|
|
26 |
| new_commit
|
|
27 |
| new_tag
|
|
28 |
| reset_branch
|
|
29 |
| checkpoint
|
|
30 |
| progress
|
|
31 |
;
|
|
32 |
||
33 |
new_blob ::= 'blob' lf
|
|
34 |
mark?
|
|
35 |
file_content;
|
|
36 |
file_content ::= data;
|
|
37 |
||
38 |
new_commit ::= 'commit' sp ref_str lf
|
|
39 |
mark?
|
|
40 |
('author' sp name '<' email '>' when lf)?
|
|
41 |
'committer' sp name '<' email '>' when lf
|
|
42 |
commit_msg
|
|
43 |
('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
|
|
44 |
('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
|
|
45 |
file_change*
|
|
46 |
lf?;
|
|
47 |
commit_msg ::= data;
|
|
48 |
||
49 |
file_change ::= file_clr
|
|
50 |
| file_del
|
|
51 |
| file_rnm
|
|
52 |
| file_cpy
|
|
53 |
| file_obm
|
|
54 |
| file_inm;
|
|
55 |
file_clr ::= 'deleteall' lf;
|
|
56 |
file_del ::= 'D' sp path_str lf;
|
|
57 |
file_rnm ::= 'R' sp path_str sp path_str lf;
|
|
58 |
file_cpy ::= 'C' sp path_str sp path_str lf;
|
|
59 |
file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
|
|
60 |
file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
|
|
61 |
data;
|
|
62 |
||
63 |
new_tag ::= 'tag' sp tag_str lf
|
|
64 |
'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
|
|
65 |
'tagger' sp name '<' email '>' when lf
|
|
66 |
tag_msg;
|
|
67 |
tag_msg ::= data;
|
|
68 |
||
69 |
reset_branch ::= 'reset' sp ref_str lf
|
|
70 |
('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
|
|
71 |
lf?;
|
|
72 |
||
73 |
checkpoint ::= 'checkpoint' lf
|
|
74 |
lf?;
|
|
75 |
||
76 |
progress ::= 'progress' sp not_lf* lf
|
|
77 |
lf?;
|
|
78 |
||
79 |
# note: the first idnum in a stream should be 1 and subsequent
|
|
80 |
# idnums should not have gaps between values as this will cause
|
|
81 |
# the stream parser to reserve space for the gapped values. An
|
|
82 |
# idnum can be updated in the future to a new object by issuing
|
|
83 |
# a new mark directive with the old idnum.
|
|
84 |
#
|
|
85 |
mark ::= 'mark' sp idnum lf;
|
|
86 |
data ::= (delimited_data | exact_data)
|
|
87 |
lf?;
|
|
88 |
||
89 |
# note: delim may be any string but must not contain lf.
|
|
90 |
# data_line may contain any data but must not be exactly
|
|
|
0.88.2
by Samuel Bronson
Implement here-document style input data. |
91 |
# delim. The lf after the final data_line is included in
|
92 |
# the data.
|
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
93 |
delimited_data ::= 'data' sp '<<' delim lf
|
94 |
(data_line lf)*
|
|
95 |
delim lf;
|
|
96 |
||
97 |
# note: declen indicates the length of binary_data in bytes.
|
|
98 |
# declen does not include the lf preceeding the binary data.
|
|
99 |
#
|
|
100 |
exact_data ::= 'data' sp declen lf
|
|
101 |
binary_data;
|
|
102 |
||
103 |
# note: quoted strings are C-style quoting supporting \c for
|
|
104 |
# common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
|
|
105 |
# is the signed byte value in octal. Note that the only
|
|
106 |
# characters which must actually be escaped to protect the
|
|
107 |
# stream formatting is: \, " and LF. Otherwise these values
|
|
108 |
# are UTF8.
|
|
109 |
#
|
|
110 |
ref_str ::= ref;
|
|
111 |
sha1exp_str ::= sha1exp;
|
|
112 |
tag_str ::= tag;
|
|
113 |
path_str ::= path | '"' quoted(path) '"' ;
|
|
114 |
mode ::= '100644' | '644'
|
|
115 |
| '100755' | '755'
|
|
116 |
| '120000'
|
|
117 |
;
|
|
118 |
||
119 |
declen ::= # unsigned 32 bit value, ascii base10 notation;
|
|
120 |
bigint ::= # unsigned integer value, ascii base10 notation;
|
|
121 |
binary_data ::= # file content, not interpreted;
|
|
122 |
||
123 |
when ::= raw_when | rfc2822_when;
|
|
124 |
raw_when ::= ts sp tz;
|
|
125 |
rfc2822_when ::= # Valid RFC 2822 date and time;
|
|
126 |
||
127 |
sp ::= # ASCII space character;
|
|
128 |
lf ::= # ASCII newline (LF) character;
|
|
129 |
||
130 |
# note: a colon (':') must precede the numerical value assigned to
|
|
131 |
# an idnum. This is to distinguish it from a ref or tag name as
|
|
132 |
# GIT does not permit ':' in ref or tag strings.
|
|
133 |
#
|
|
134 |
idnum ::= ':' bigint;
|
|
135 |
path ::= # GIT style file path, e.g. "a/b/c";
|
|
136 |
ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
|
|
137 |
tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
|
|
138 |
sha1exp ::= # Any valid GIT SHA1 expression;
|
|
139 |
hexsha1 ::= # SHA1 in hexadecimal format;
|
|
140 |
||
141 |
# note: name and email are UTF8 strings, however name must not
|
|
142 |
# contain '<' or lf and email must not contain any of the
|
|
143 |
# following: '<', '>', lf.
|
|
144 |
#
|
|
145 |
name ::= # valid GIT author/committer name;
|
|
146 |
email ::= # valid GIT author/committer email;
|
|
147 |
ts ::= # time since the epoch in seconds, ascii base10 notation;
|
|
148 |
tz ::= # GIT style timezone;
|
|
149 |
||
150 |
# note: comments may appear anywhere in the input, except
|
|
151 |
# within a data command. Any form of the data command
|
|
152 |
# always escapes the related input from comment processing.
|
|
153 |
#
|
|
154 |
# In case it is not clear, the '#' that starts the comment
|
|
155 |
# must be the first character on that the line (an lf have
|
|
156 |
# preceeded it).
|
|
157 |
#
|
|
158 |
comment ::= '#' not_lf* lf;
|
|
159 |
not_lf ::= # Any byte that is not ASCII newline (LF);
|
|
160 |
"""
|
|
161 |
||
162 |
||
163 |
import re |
|
164 |
import sys |
|
165 |
||
166 |
import commands |
|
167 |
import dates |
|
168 |
import errors |
|
169 |
||
170 |
||
171 |
## Stream parsing ##
|
|
172 |
||
173 |
class LineBasedParser(object): |
|
174 |
||
175 |
def __init__(self, input): |
|
176 |
"""A Parser that keeps track of line numbers. |
|
177 |
||
178 |
:param input: the file-like object to read from
|
|
179 |
"""
|
|
180 |
self.input = input |
|
181 |
self.lineno = 0 |
|
182 |
# Lines pushed back onto the input stream
|
|
183 |
self._buffer = [] |
|
184 |
||
185 |
def abort(self, exception, *args): |
|
186 |
"""Raise an exception providing line number information.""" |
|
187 |
raise exception(self.lineno, *args) |
|
188 |
||
189 |
def readline(self): |
|
190 |
"""Get the next line including the newline or '' on EOF.""" |
|
191 |
self.lineno += 1 |
|
192 |
if self._buffer: |
|
193 |
return self._buffer.pop() |
|
194 |
else: |
|
195 |
return self.input.readline() |
|
196 |
||
197 |
def next_line(self): |
|
198 |
"""Get the next line without the newline or None on EOF.""" |
|
199 |
line = self.readline() |
|
200 |
if line: |
|
201 |
return line[:-1] |
|
202 |
else: |
|
203 |
return None |
|
204 |
||
205 |
def push_line(self, line): |
|
206 |
"""Push line back onto the line buffer. |
|
207 |
|
|
208 |
:param line: the line with no trailing newline
|
|
209 |
"""
|
|
210 |
self.lineno -= 1 |
|
211 |
self._buffer.append(line + "\n") |
|
212 |
||
213 |
def read_bytes(self, count): |
|
214 |
"""Read a given number of bytes from the input stream. |
|
215 |
|
|
216 |
Throws MissingBytes if the bytes are not found.
|
|
217 |
||
218 |
Note: This method does not read from the line buffer.
|
|
|
0.64.6
by Ian Clatworthy
generic processing method working for one revision in one branch |
219 |
|
220 |
:return: a string
|
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
221 |
"""
|
|
0.64.143
by Ian Clatworthy
speed up blob parsing |
222 |
result = self.input.read(count) |
223 |
found = len(result) |
|
224 |
self.lineno += result.count("\n") |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
225 |
if found != count: |
226 |
self.abort(errors.MissingBytes, count, found) |
|
|
0.64.143
by Ian Clatworthy
speed up blob parsing |
227 |
return result |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
228 |
|
229 |
def read_until(self, terminator): |
|
230 |
"""Read the input stream until the terminator is found. |
|
231 |
|
|
232 |
Throws MissingTerminator if the terminator is not found.
|
|
233 |
||
234 |
Note: This method does not read from the line buffer.
|
|
235 |
||
236 |
:return: the bytes read up to but excluding the terminator.
|
|
237 |
"""
|
|
|
0.88.2
by Samuel Bronson
Implement here-document style input data. |
238 |
|
239 |
lines = [] |
|
240 |
term = terminator + '\n' |
|
241 |
while True: |
|
242 |
line = self.input.readline() |
|
243 |
if line == term: |
|
244 |
break
|
|
245 |
else: |
|
246 |
lines.append(line) |
|
247 |
return ''.join(lines) |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
248 |
|
249 |
||
|
0.64.3
by Ian Clatworthy
tweak parser for better git-fast-export compatibility |
250 |
# Regular expression used for parsing. (Note: The spec states that the name
|
251 |
# part should be non-empty but git-fast-export doesn't always do that so
|
|
|
0.65.2
by James Westby
The space between the author and email is optional in committer. |
252 |
# the first bit is \w*, not \w+.) Also git-fast-import code says the
|
253 |
# space before the email is optional.
|
|
|
0.64.103
by Ian Clatworthy
handle empty emails & names/paths that aren't utf8 encoded |
254 |
_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)') |
255 |
_WHO_RE = re.compile(r'([^<]*)<(.*)>') |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
256 |
|
257 |
||
258 |
class ImportParser(LineBasedParser): |
|
259 |
||
|
0.64.252
by Ian Clatworthy
Add --user-map option to both fast-import and fast-import-filter |
260 |
def __init__(self, input, verbose=False, output=sys.stdout, |
261 |
user_mapper=None): |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
262 |
"""A Parser of import commands. |
263 |
||
264 |
:param input: the file-like object to read from
|
|
265 |
:param verbose: display extra information of not
|
|
266 |
:param output: the file-like object to write messages to (YAGNI?)
|
|
|
0.64.252
by Ian Clatworthy
Add --user-map option to both fast-import and fast-import-filter |
267 |
:param user_mapper: if not None, the UserMapper used to adjust
|
268 |
user-ids for authors, committers and taggers.
|
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
269 |
"""
|
270 |
LineBasedParser.__init__(self, input) |
|
271 |
self.verbose = verbose |
|
272 |
self.output = output |
|
|
0.64.252
by Ian Clatworthy
Add --user-map option to both fast-import and fast-import-filter |
273 |
self.user_mapper = user_mapper |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
274 |
# We auto-detect the date format when a date is first encountered
|
275 |
self.date_parser = None |
|
276 |
||
|
0.64.241
by Ian Clatworthy
fix warning messages on non-utf8 names & emails |
277 |
def warning(self, msg): |
|
0.64.203
by Ian Clatworthy
fix reporting of non-utf8 commits to go to stderr |
278 |
sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg)) |
|
0.64.191
by Ian Clatworthy
Force parser to generate commit messages in unicode - even when not in utf8 in the input stream |
279 |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
280 |
def iter_commands(self): |
281 |
"""Iterator returning ImportCommand objects.""" |
|
282 |
while True: |
|
283 |
line = self.next_line() |
|
284 |
if line is None: |
|
285 |
break
|
|
286 |
elif len(line) == 0 or line.startswith('#'): |
|
287 |
continue
|
|
288 |
# Search for commands in order of likelihood
|
|
289 |
elif line.startswith('commit '): |
|
290 |
yield self._parse_commit(line[len('commit '):]) |
|
291 |
elif line.startswith('blob'): |
|
292 |
yield self._parse_blob() |
|
293 |
elif line.startswith('progress '): |
|
294 |
yield commands.ProgressCommand(line[len('progress '):]) |
|
295 |
elif line.startswith('reset '): |
|
296 |
yield self._parse_reset(line[len('reset '):]) |
|
297 |
elif line.startswith('tag '): |
|
298 |
yield self._parse_tag(line[len('tag '):]) |
|
299 |
elif line.startswith('checkpoint'): |
|
300 |
yield commands.CheckpointCommand() |
|
|
0.102.8
by Ian Clatworthy
feature parsing |
301 |
elif line.startswith('feature'): |
302 |
yield self._parse_feature(line[len('feature '):]) |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
303 |
else: |
304 |
self.abort(errors.InvalidCommand, line) |
|
305 |
||
306 |
def iter_file_commands(self): |
|
307 |
"""Iterator returning FileCommand objects. |
|
308 |
|
|
309 |
If an invalid file command is found, the line is silently
|
|
310 |
pushed back and iteration ends.
|
|
311 |
"""
|
|
312 |
while True: |
|
313 |
line = self.next_line() |
|
314 |
if line is None: |
|
315 |
break
|
|
316 |
elif len(line) == 0 or line.startswith('#'): |
|
317 |
continue
|
|
318 |
# Search for file commands in order of likelihood
|
|
319 |
elif line.startswith('M '): |
|
320 |
yield self._parse_file_modify(line[2:]) |
|
321 |
elif line.startswith('D '): |
|
322 |
path = self._path(line[2:]) |
|
323 |
yield commands.FileDeleteCommand(path) |
|
324 |
elif line.startswith('R '): |
|
325 |
old, new = self._path_pair(line[2:]) |
|
326 |
yield commands.FileRenameCommand(old, new) |
|
327 |
elif line.startswith('C '): |
|
328 |
src, dest = self._path_pair(line[2:]) |
|
|
0.77.11
by Ian Clatworthy
fix parsing of filecopy command |
329 |
yield commands.FileCopyCommand(src, dest) |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
330 |
elif line.startswith('deleteall'): |
331 |
yield commands.FileDeleteAllCommand() |
|
332 |
else: |
|
333 |
self.push_line(line) |
|
334 |
break
|
|
335 |
||
336 |
def _parse_blob(self): |
|
337 |
"""Parse a blob command.""" |
|
|
0.64.35
by Ian Clatworthy
identify unmarked blobs and commits by line numbers |
338 |
lineno = self.lineno |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
339 |
mark = self._get_mark_if_any() |
340 |
data = self._get_data('blob') |
|
|
0.64.35
by Ian Clatworthy
identify unmarked blobs and commits by line numbers |
341 |
return commands.BlobCommand(mark, data, lineno) |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
342 |
|
343 |
def _parse_commit(self, ref): |
|
344 |
"""Parse a commit command.""" |
|
|
0.64.35
by Ian Clatworthy
identify unmarked blobs and commits by line numbers |
345 |
lineno = self.lineno |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
346 |
mark = self._get_mark_if_any() |
347 |
author = self._get_user_info('commit', 'author', False) |
|
|
0.102.9
by Ian Clatworthy
parsing of multiple authors and commit properties |
348 |
more_authors = [] |
349 |
while True: |
|
350 |
another_author = self._get_user_info('commit', 'author', False) |
|
351 |
if another_author is not None: |
|
352 |
more_authors.append(another_author) |
|
353 |
else: |
|
354 |
break
|
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
355 |
committer = self._get_user_info('commit', 'committer') |
|
0.64.78
by Ian Clatworthy
fix from Pieter de Bie - hack around broken front-ends |
356 |
message = self._get_data('commit', 'message') |
357 |
try: |
|
358 |
message = message.decode('utf_8') |
|
359 |
except UnicodeDecodeError: |
|
|
0.64.241
by Ian Clatworthy
fix warning messages on non-utf8 names & emails |
360 |
self.warning( |
|
0.64.191
by Ian Clatworthy
Force parser to generate commit messages in unicode - even when not in utf8 in the input stream |
361 |
"commit message not in utf8 - replacing unknown characters") |
362 |
message = message.decode('utf_8', 'replace') |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
363 |
from_ = self._get_from() |
|
0.64.60
by Ian Clatworthy
support merges when from clause implicit |
364 |
merges = [] |
365 |
while True: |
|
366 |
merge = self._get_merge() |
|
367 |
if merge is not None: |
|
|
0.64.104
by Ian Clatworthy
handle multiple merges in the one merge clause |
368 |
# while the spec suggests it's illegal, git-fast-export
|
369 |
# outputs multiple merges on the one line, e.g.
|
|
370 |
# merge :x :y :z
|
|
371 |
these_merges = merge.split(" ") |
|
372 |
merges.extend(these_merges) |
|
|
0.64.60
by Ian Clatworthy
support merges when from clause implicit |
373 |
else: |
374 |
break
|
|
|
0.102.9
by Ian Clatworthy
parsing of multiple authors and commit properties |
375 |
properties = {} |
376 |
while True: |
|
377 |
name_value = self._get_property() |
|
378 |
if name_value is not None: |
|
379 |
name, value = name_value |
|
380 |
properties[name] = value |
|
381 |
else: |
|
382 |
break
|
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
383 |
return commands.CommitCommand(ref, mark, author, committer, message, |
|
0.102.9
by Ian Clatworthy
parsing of multiple authors and commit properties |
384 |
from_, merges, self.iter_file_commands, lineno=lineno, |
385 |
more_authors=more_authors, properties=properties) |
|
|
0.102.8
by Ian Clatworthy
feature parsing |
386 |
|
387 |
def _parse_feature(self, info): |
|
388 |
"""Parse a feature command.""" |
|
389 |
parts = info.split("=", 1) |
|
390 |
name = parts[0] |
|
391 |
if len(parts) > 1: |
|
392 |
value = self._path(parts[1]) |
|
393 |
else: |
|
394 |
value = None |
|
395 |
return commands.FeatureCommand(name, value, lineno=self.lineno) |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
396 |
|
397 |
def _parse_file_modify(self, info): |
|
398 |
"""Parse a filemodify command within a commit. |
|
399 |
||
400 |
:param info: a string in the format "mode dataref path"
|
|
401 |
(where dataref might be the hard-coded literal 'inline').
|
|
402 |
"""
|
|
403 |
params = info.split(' ', 2) |
|
404 |
path = self._path(params[2]) |
|
|
0.64.229
by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them |
405 |
is_executable, kind = self._mode(params[0]) |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
406 |
if params[1] == 'inline': |
407 |
dataref = None |
|
408 |
data = self._get_data('filemodify') |
|
409 |
else: |
|
410 |
dataref = params[1] |
|
411 |
data = None |
|
412 |
return commands.FileModifyCommand(path, kind, is_executable, dataref, |
|
413 |
data) |
|
414 |
||
415 |
def _parse_reset(self, ref): |
|
416 |
"""Parse a reset command.""" |
|
417 |
from_ = self._get_from() |
|
418 |
return commands.ResetCommand(ref, from_) |
|
419 |
||
420 |
def _parse_tag(self, name): |
|
421 |
"""Parse a tag command.""" |
|
422 |
from_ = self._get_from('tag') |
|
|
0.64.78
by Ian Clatworthy
fix from Pieter de Bie - hack around broken front-ends |
423 |
tagger = self._get_user_info('tag', 'tagger', accept_just_who=True) |
|
0.64.76
by Ian Clatworthy
fix utf-8 decoding bugs |
424 |
message = self._get_data('tag', 'message').decode('utf_8') |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
425 |
return commands.TagCommand(name, from_, tagger, message) |
426 |
||
427 |
def _get_mark_if_any(self): |
|
428 |
"""Parse a mark section.""" |
|
429 |
line = self.next_line() |
|
430 |
if line.startswith('mark :'): |
|
431 |
return line[len('mark :'):] |
|
432 |
else: |
|
433 |
self.push_line(line) |
|
434 |
return None |
|
435 |
||
436 |
def _get_from(self, required_for=None): |
|
437 |
"""Parse a from section.""" |
|
438 |
line = self.next_line() |
|
|
0.64.172
by Ian Clatworthy
handle commit commands with no from clause and no sub-commands |
439 |
if line is None: |
440 |
return None |
|
441 |
elif line.startswith('from '): |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
442 |
return line[len('from '):] |
443 |
elif required_for: |
|
444 |
self.abort(errors.MissingSection, required_for, 'from') |
|
445 |
else: |
|
446 |
self.push_line(line) |
|
447 |
return None |
|
448 |
||
449 |
def _get_merge(self): |
|
450 |
"""Parse a merge section.""" |
|
451 |
line = self.next_line() |
|
|
0.64.172
by Ian Clatworthy
handle commit commands with no from clause and no sub-commands |
452 |
if line is None: |
453 |
return None |
|
454 |
elif line.startswith('merge '): |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
455 |
return line[len('merge '):] |
456 |
else: |
|
457 |
self.push_line(line) |
|
458 |
return None |
|
459 |
||
|
0.102.9
by Ian Clatworthy
parsing of multiple authors and commit properties |
460 |
def _get_property(self): |
461 |
"""Parse a property section.""" |
|
462 |
line = self.next_line() |
|
463 |
if line is None: |
|
464 |
return None |
|
465 |
elif line.startswith('property '): |
|
466 |
return self._name_value(line[len('property '):]) |
|
467 |
else: |
|
468 |
self.push_line(line) |
|
469 |
return None |
|
470 |
||
|
0.64.78
by Ian Clatworthy
fix from Pieter de Bie - hack around broken front-ends |
471 |
def _get_user_info(self, cmd, section, required=True, |
472 |
accept_just_who=False): |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
473 |
"""Parse a user section.""" |
474 |
line = self.next_line() |
|
475 |
if line.startswith(section + ' '): |
|
|
0.64.78
by Ian Clatworthy
fix from Pieter de Bie - hack around broken front-ends |
476 |
return self._who_when(line[len(section + ' '):], cmd, section, |
477 |
accept_just_who=accept_just_who) |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
478 |
elif required: |
479 |
self.abort(errors.MissingSection, cmd, section) |
|
480 |
else: |
|
481 |
self.push_line(line) |
|
482 |
return None |
|
483 |
||
484 |
def _get_data(self, required_for, section='data'): |
|
485 |
"""Parse a data section.""" |
|
486 |
line = self.next_line() |
|
487 |
if line.startswith('data '): |
|
488 |
rest = line[len('data '):] |
|
489 |
if rest.startswith('<<'): |
|
490 |
return self.read_until(rest[2:]) |
|
491 |
else: |
|
492 |
size = int(rest) |
|
|
0.65.1
by James Westby
The data sections have an optional LF at the end in the byte count format. |
493 |
read_bytes = self.read_bytes(size) |
494 |
# optional LF after data.
|
|
495 |
next = self.input.readline() |
|
496 |
self.lineno += 1 |
|
497 |
if len(next) > 1 or next != "\n": |
|
|
0.65.5
by James Westby
Make the parser handle multiple words in the committer name. |
498 |
self.push_line(next[:-1]) |
|
0.65.1
by James Westby
The data sections have an optional LF at the end in the byte count format. |
499 |
return read_bytes |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
500 |
else: |
501 |
self.abort(errors.MissingSection, required_for, section) |
|
502 |
||
|
0.64.78
by Ian Clatworthy
fix from Pieter de Bie - hack around broken front-ends |
503 |
def _who_when(self, s, cmd, section, accept_just_who=False): |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
504 |
"""Parse who and when information from a string. |
505 |
|
|
|
0.65.5
by James Westby
Make the parser handle multiple words in the committer name. |
506 |
:return: a tuple of (name,email,timestamp,timezone). name may be
|
507 |
the empty string if only an email address was given.
|
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
508 |
"""
|
509 |
match = _WHO_AND_WHEN_RE.search(s) |
|
510 |
if match: |
|
|
0.64.246
by Ian Clatworthy
fix date parsing bug found while importing samba |
511 |
datestr = match.group(3).lstrip() |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
512 |
if self.date_parser is None: |
513 |
# auto-detect the date format
|
|
|
0.64.3
by Ian Clatworthy
tweak parser for better git-fast-export compatibility |
514 |
if len(datestr.split(' ')) == 2: |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
515 |
format = 'raw' |
516 |
elif datestr == 'now': |
|
517 |
format = 'now' |
|
518 |
else: |
|
519 |
format = 'rfc2822' |
|
520 |
self.date_parser = dates.DATE_PARSERS_BY_NAME[format] |
|
|
0.64.246
by Ian Clatworthy
fix date parsing bug found while importing samba |
521 |
try: |
522 |
when = self.date_parser(datestr, self.lineno) |
|
523 |
except ValueError: |
|
524 |
print "failed to parse datestr '%s'" % (datestr,) |
|
525 |
raise
|
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
526 |
else: |
|
0.64.78
by Ian Clatworthy
fix from Pieter de Bie - hack around broken front-ends |
527 |
match = _WHO_RE.search(s) |
528 |
if accept_just_who and match: |
|
529 |
# HACK around missing time
|
|
530 |
# TODO: output a warning here
|
|
531 |
when = dates.DATE_PARSERS_BY_NAME['now']('now') |
|
532 |
else: |
|
533 |
self.abort(errors.BadFormat, cmd, section, s) |
|
534 |
name = match.group(1) |
|
535 |
if len(name) > 0: |
|
536 |
if name[-1] == " ": |
|
|
0.64.103
by Ian Clatworthy
handle empty emails & names/paths that aren't utf8 encoded |
537 |
try: |
538 |
name = name[:-1].decode('utf_8') |
|
539 |
except UnicodeDecodeError: |
|
540 |
# The spec says names are *typically* utf8 encoded
|
|
541 |
# but that isn't enforced by git-fast-export (at least)
|
|
|
0.64.240
by Ian Clatworthy
Fix unicode email address parsing |
542 |
self.warning("%s name not in utf8 - replacing unknown " |
543 |
"characters" % (section,)) |
|
544 |
name = name[:-1].decode('utf_8', 'replace') |
|
|
0.64.197
by Ian Clatworthy
Gracefully handle email addresses with unicode characters |
545 |
email = match.group(2) |
546 |
# While it shouldn't happen, some datasets have email addresses
|
|
547 |
# which contain unicode characters. See bug 338186. We sanitize
|
|
548 |
# the data at this level just in case.
|
|
549 |
try: |
|
|
0.64.240
by Ian Clatworthy
Fix unicode email address parsing |
550 |
email = email.decode('utf_8') |
|
0.64.197
by Ian Clatworthy
Gracefully handle email addresses with unicode characters |
551 |
except UnicodeDecodeError: |
|
0.64.240
by Ian Clatworthy
Fix unicode email address parsing |
552 |
self.warning("%s email not in utf8 - replacing unknown characters" |
553 |
% (section,)) |
|
554 |
email = email.decode('utf_8', 'replace') |
|
|
0.64.252
by Ian Clatworthy
Add --user-map option to both fast-import and fast-import-filter |
555 |
if self.user_mapper: |
556 |
name, email = self.user_mapper.map_name_and_email(name, email) |
|
|
0.64.197
by Ian Clatworthy
Gracefully handle email addresses with unicode characters |
557 |
return (name, email, when[0], when[1]) |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
558 |
|
|
0.102.9
by Ian Clatworthy
parsing of multiple authors and commit properties |
559 |
def _name_value(self, s): |
560 |
"""Parse a (name,value) tuple from 'name value-length value'.""" |
|
561 |
parts = s.split(' ', 2) |
|
|
0.102.10
by Ian Clatworthy
Store multiple authors and revision properties when defined |
562 |
name = parts[0] |
|
0.102.9
by Ian Clatworthy
parsing of multiple authors and commit properties |
563 |
if len(parts) == 1: |
564 |
value = None |
|
565 |
else: |
|
566 |
size = int(parts[1]) |
|
567 |
value = parts[2] |
|
568 |
still_to_read = size - len(value) |
|
|
0.64.255
by Ian Clatworthy
Fix parsing error when a property is found after a multi-line one |
569 |
if still_to_read > 0: |
570 |
read_bytes = self.read_bytes(still_to_read) |
|
571 |
value += "\n" + read_bytes[:still_to_read - 1] |
|
|
0.102.9
by Ian Clatworthy
parsing of multiple authors and commit properties |
572 |
value = value.decode('utf8') |
573 |
return (name, value) |
|
574 |
||
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
575 |
def _path(self, s): |
576 |
"""Parse a path.""" |
|
|
0.64.10
by Ian Clatworthy
1st cut are dequoting paths |
577 |
if s.startswith('"'): |
578 |
if s[-1] != '"': |
|
|
0.64.175
by Ian Clatworthy
fix parsing when a rename old-path has spaces in it |
579 |
self.abort(errors.BadFormat, '?', '?', s) |
|
0.64.10
by Ian Clatworthy
1st cut are dequoting paths |
580 |
else: |
581 |
return _unquote_c_string(s[1:-1]) |
|
|
0.64.103
by Ian Clatworthy
handle empty emails & names/paths that aren't utf8 encoded |
582 |
try: |
583 |
return s.decode('utf_8') |
|
584 |
except UnicodeDecodeError: |
|
585 |
# The spec recommends utf8 encoding but that isn't enforced
|
|
586 |
return s |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
587 |
|
588 |
def _path_pair(self, s): |
|
589 |
"""Parse two paths separated by a space.""" |
|
|
0.64.10
by Ian Clatworthy
1st cut are dequoting paths |
590 |
# TODO: handle a space in the first path
|
|
0.64.175
by Ian Clatworthy
fix parsing when a rename old-path has spaces in it |
591 |
if s.startswith('"'): |
592 |
parts = s[1:].split('" ', 1) |
|
593 |
else: |
|
594 |
parts = s.split(' ', 1) |
|
|
0.64.181
by Ian Clatworthy
Ignore quotes around second path in a pair (Dmitri Paduchikh) |
595 |
if len(parts) != 2: |
596 |
self.abort(errors.BadFormat, '?', '?', s) |
|
597 |
elif parts[1].startswith('"') and parts[1].endswith('"'): |
|
598 |
parts[1] = parts[1][1:-1] |
|
599 |
elif parts[1].startswith('"') or parts[1].endswith('"'): |
|
600 |
self.abort(errors.BadFormat, '?', '?', s) |
|
601 |
return map(_unquote_c_string, parts) |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
602 |
|
603 |
def _mode(self, s): |
|
|
0.64.229
by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them |
604 |
"""Parse a file mode into executable and kind. |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
605 |
|
|
0.64.229
by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them |
606 |
:return (is_executable, kind)
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
607 |
"""
|
608 |
# Note: Output from git-fast-export slightly different to spec
|
|
609 |
if s in ['644', '100644', '0100644']: |
|
|
0.64.229
by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them |
610 |
return False, commands.FILE_KIND |
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
611 |
elif s in ['755', '100755', '0100755']: |
|
0.64.229
by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them |
612 |
return True, commands.FILE_KIND |
|
0.102.14
by Ian Clatworthy
export and import empty directories |
613 |
elif s in ['040000', '0040000']: |
614 |
return False, commands.DIRECTORY_KIND |
|
|
0.64.3
by Ian Clatworthy
tweak parser for better git-fast-export compatibility |
615 |
elif s in ['120000', '0120000']: |
|
0.64.229
by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them |
616 |
return False, commands.SYMLINK_KIND |
617 |
elif s in ['160000', '0160000']: |
|
618 |
return False, commands.TREE_REFERENCE_KIND |
|
|
0.64.1
by Ian Clatworthy
1st cut: gfi parser + --info processing method |
619 |
else: |
620 |
self.abort(errors.BadFormat, 'filemodify', 'mode', s) |
|
621 |
||
|
0.64.10
by Ian Clatworthy
1st cut are dequoting paths |
622 |
|
623 |
def _unquote_c_string(s): |
|
624 |
"""replace C-style escape sequences (\n, \", etc.) with real chars.""" |
|
625 |
# HACK: Python strings are close enough
|
|
626 |
return s.decode('string_escape', 'replace') |