/brz/remove-bazaar : contents of parser.py at revision 0.64.2

: (revision 0.64.2)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""Parser of import data into command objects.

In order to reuse existing front-ends, the stream format is a subset of
the one used by git-fast-import (as of the 1.5.4 release of git at least).
The grammar is:

  stream ::= cmd*;

  cmd ::= new_blob
        | new_commit
        | new_tag
        | reset_branch
        | checkpoint
        | progress
        ;

  new_blob ::= 'blob' lf
    mark?
    file_content;
  file_content ::= data;

  new_commit ::= 'commit' sp ref_str lf
    mark?
    ('author' sp name '<' email '>' when lf)?
    'committer' sp name '<' email '>' when lf
    commit_msg
    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
    ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
    file_change*
    lf?;
  commit_msg ::= data;

  file_change ::= file_clr
    | file_del
    | file_rnm
    | file_cpy
    | file_obm
    | file_inm;
  file_clr ::= 'deleteall' lf;
  file_del ::= 'D' sp path_str lf;
  file_rnm ::= 'R' sp path_str sp path_str lf;
  file_cpy ::= 'C' sp path_str sp path_str lf;
  file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
  file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
    data;

  new_tag ::= 'tag' sp tag_str lf
    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
    'tagger' sp name '<' email '>' when lf
    tag_msg;
  tag_msg ::= data;

  reset_branch ::= 'reset' sp ref_str lf
    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
    lf?;

  checkpoint ::= 'checkpoint' lf
    lf?;

  progress ::= 'progress' sp not_lf* lf
    lf?;

     # note: the first idnum in a stream should be 1 and subsequent
     # idnums should not have gaps between values as this will cause
     # the stream parser to reserve space for the gapped values.  An
     # idnum can be updated in the future to a new object by issuing
     # a new mark directive with the old idnum.
     #
  mark ::= 'mark' sp idnum lf;
  data ::= (delimited_data | exact_data)
    lf?;

    # note: delim may be any string but must not contain lf.
    # data_line may contain any data but must not be exactly
    # delim.
  delimited_data ::= 'data' sp '<<' delim lf
    (data_line lf)*
    delim lf;

     # note: declen indicates the length of binary_data in bytes.
     # declen does not include the lf preceeding the binary data.
     #
  exact_data ::= 'data' sp declen lf
    binary_data;

     # note: quoted strings are C-style quoting supporting \c for
     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
     # is the signed byte value in octal.  Note that the only
     # characters which must actually be escaped to protect the
     # stream formatting is: \, " and LF.  Otherwise these values
     # are UTF8.
     #
  ref_str     ::= ref;
  sha1exp_str ::= sha1exp;
  tag_str     ::= tag;
  path_str    ::= path    | '"' quoted(path)    '"' ;
  mode        ::= '100644' | '644'
                | '100755' | '755'
                | '120000'
                ;

  declen ::= # unsigned 32 bit value, ascii base10 notation;
  bigint ::= # unsigned integer value, ascii base10 notation;
  binary_data ::= # file content, not interpreted;

  when         ::= raw_when | rfc2822_when;
  raw_when     ::= ts sp tz;
  rfc2822_when ::= # Valid RFC 2822 date and time;

  sp ::= # ASCII space character;
  lf ::= # ASCII newline (LF) character;

     # note: a colon (':') must precede the numerical value assigned to
     # an idnum.  This is to distinguish it from a ref or tag name as
     # GIT does not permit ':' in ref or tag strings.
     #
  idnum   ::= ':' bigint;
  path    ::= # GIT style file path, e.g. "a/b/c";
  ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
  tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
  sha1exp ::= # Any valid GIT SHA1 expression;
  hexsha1 ::= # SHA1 in hexadecimal format;

     # note: name and email are UTF8 strings, however name must not
     # contain '<' or lf and email must not contain any of the
     # following: '<', '>', lf.
     #
  name  ::= # valid GIT author/committer name;
  email ::= # valid GIT author/committer email;
  ts    ::= # time since the epoch in seconds, ascii base10 notation;
  tz    ::= # GIT style timezone;

     # note: comments may appear anywhere in the input, except
     # within a data command.  Any form of the data command
     # always escapes the related input from comment processing.
     #
     # In case it is not clear, the '#' that starts the comment
     # must be the first character on that the line (an lf have
     # preceeded it).
     #
  comment ::= '#' not_lf* lf;
  not_lf  ::= # Any byte that is not ASCII newline (LF);
"""


import re
import sys

import commands
import dates
import errors


## Stream parsing ##

class LineBasedParser(object):

    def __init__(self, input):
        """A Parser that keeps track of line numbers.

        :param input: the file-like object to read from
        """
        self.input = input
        self.lineno = 0
        # Lines pushed back onto the input stream
        self._buffer = []

    def abort(self, exception, *args):
        """Raise an exception providing line number information."""
        raise exception(self.lineno, *args)

    def readline(self):
        """Get the next line including the newline or '' on EOF."""
        self.lineno += 1
        if self._buffer:
            return self._buffer.pop()
        else:
            return self.input.readline()

    def next_line(self):
        """Get the next line without the newline or None on EOF."""
        line = self.readline()
        if line:
            return line[:-1]
        else:
            return None

    def push_line(self, line):
        """Push line back onto the line buffer.
        
        :param line: the line with no trailing newline
        """
        self.lineno -= 1
        self._buffer.append(line + "\n")

    def read_bytes(self, count):
        """Read a given number of bytes from the input stream.
        
        Throws MissingBytes if the bytes are not found.

        Note: This method does not read from the line buffer.
        """
        lines = []
        left = count
        while left > 0:
            line = self.input.readline(left)
            if line:
                left -= len(line)
                lines.append(line)
            else:
                left = 0
        result = ''.join(lines)
        found = len(result)
        if found != count:
            self.abort(errors.MissingBytes, count, found)
        return result

    def read_until(self, terminator):
        """Read the input stream until the terminator is found.
        
        Throws MissingTerminator if the terminator is not found.

        Note: This method does not read from the line buffer.

        :return: the bytes read up to but excluding the terminator.
        """
        raise NotImplementedError(self.read_until)


# Regular expressions used for parsing
_WHO_AND_WHEN_RE = re.compile(r'(\w+) <(.+)> (.+)')


class ImportParser(LineBasedParser):

    def __init__(self, input, verbose=False, output=sys.stdout):
        """A Parser of import commands.

        :param input: the file-like object to read from
        :param verbose: display extra information of not
        :param output: the file-like object to write messages to (YAGNI?)
        """
        LineBasedParser.__init__(self, input)
        self.verbose = verbose
        self.output = output
        # We auto-detect the date format when a date is first encountered
        self.date_parser = None

    def iter_commands(self):
        """Iterator returning ImportCommand objects."""
        while True:
            line = self.next_line()
            if line is None:
                break
            elif len(line) == 0 or line.startswith('#'):
                continue
            # Search for commands in order of likelihood
            elif line.startswith('commit '):
                yield self._parse_commit(line[len('commit '):])
            elif line.startswith('blob'):
                yield self._parse_blob()
            elif line.startswith('progress '):
                yield commands.ProgressCommand(line[len('progress '):])
            elif line.startswith('reset '):
                yield self._parse_reset(line[len('reset '):])
            elif line.startswith('tag '):
                yield self._parse_tag(line[len('tag '):])
            elif line.startswith('checkpoint'):
                yield commands.CheckpointCommand()
            else:
                self.abort(errors.InvalidCommand, line)

    def iter_file_commands(self):
        """Iterator returning FileCommand objects.
        
        If an invalid file command is found, the line is silently
        pushed back and iteration ends.
        """
        while True:
            line = self.next_line()
            if line is None:
                break
            elif len(line) == 0 or line.startswith('#'):
                continue
            # Search for file commands in order of likelihood
            elif line.startswith('M '):
                yield self._parse_file_modify(line[2:])
            elif line.startswith('D '):
                path = self._path(line[2:])
                yield commands.FileDeleteCommand(path)
            elif line.startswith('R '):
                old, new = self._path_pair(line[2:])
                yield commands.FileRenameCommand(old, new)
            elif line.startswith('C '):
                src, dest = self._path_pair(line[2:])
                yield commands.FileRenameCommand(src, dest)
            elif line.startswith('deleteall'):
                yield commands.FileDeleteAllCommand()
            else:
                self.push_line(line)
                break

    def _parse_blob(self):
        """Parse a blob command."""
        mark = self._get_mark_if_any()
        data = self._get_data('blob')
        return commands.BlobCommand(mark, data)

    def _parse_commit(self, ref):
        """Parse a commit command."""
        mark = self._get_mark_if_any()
        author = self._get_user_info('commit', 'author', False)
        committer = self._get_user_info('commit', 'committer')
        message = self._get_data('commit', 'message')
        from_ = self._get_from()
        if from_ is not None:
            parents = [from_]
            while True:
                merge = self._get_merge()
                if merge is not None:
                    parents.append(merge)
                else:
                    break
        else:
            parents = []
        return commands.CommitCommand(ref, mark, author, committer, message,
            parents, self.iter_file_commands)

    def _parse_file_modify(self, info):
        """Parse a filemodify command within a commit.

        :param info: a string in the format "mode dataref path"
          (where dataref might be the hard-coded literal 'inline').
        """
        params = info.split(' ', 2)
        path = self._path(params[2])
        is_executable, is_symlink = self._mode(params[0])
        if is_symlink:
            kind = commands.SYMLINK_KIND
        else:
            kind = commands.FILE_KIND
        if params[1] == 'inline':
            dataref = None
            data = self._get_data('filemodify')
        else:
            dataref = params[1]
            data = None
        return commands.FileModifyCommand(path, kind, is_executable, dataref,
            data)

    def _parse_reset(self, ref):
        """Parse a reset command."""
        from_ = self._get_from()
        return commands.ResetCommand(ref, from_)

    def _parse_tag(self, name):
        """Parse a tag command."""
        from_ = self._get_from('tag')
        tagger = self._get_user_info('tag', 'tagger')
        message = self._get_data('tag', 'message')
        return commands.TagCommand(name, from_, tagger, message)

    def _get_mark_if_any(self):
        """Parse a mark section."""
        line = self.next_line()
        if line.startswith('mark :'):
            return line[len('mark :'):]
        else:
            self.push_line(line)
            return None

    def _get_from(self, required_for=None):
        """Parse a from section."""
        line = self.next_line()
        if line.startswith('from '):
            return line[len('from '):]
        elif required_for:
            self.abort(errors.MissingSection, required_for, 'from')
        else:
            self.push_line(line)
            return None

    def _get_merge(self):
        """Parse a merge section."""
        line = self.next_line()
        if line.startswith('merge '):
            return line[len('merge '):]
        else:
            self.push_line(line)
            return None

    def _get_user_info(self, cmd, section, required=True):
        """Parse a user section."""
        line = self.next_line()
        if line.startswith(section + ' '):
            return self._who_when(line[len(section + ' '):], cmd, section)
        elif required:
            self.abort(errors.MissingSection, cmd, section)
        else:
            self.push_line(line)
            return None

    def _get_data(self, required_for, section='data'):
        """Parse a data section."""
        line = self.next_line()
        if line.startswith('data '):
            rest = line[len('data '):]
            if rest.startswith('<<'):
                return self.read_until(rest[2:])
            else:
                size = int(rest)
                return self.read_bytes(size)
        else:
            self.abort(errors.MissingSection, required_for, section)

    def _who_when(self, s, cmd, section):
        """Parse who and when information from a string.
        
        :return: a tuple of (who,email,when) where who and
          email are strings and when is a datetime object
        """
        match = _WHO_AND_WHEN_RE.search(s)
        if match:
            datestr = match.group(3)
            if self.date_parser is None:
                # auto-detect the date format
                if len(datestr) == 16:
                    format = 'raw'
                elif datestr == 'now':
                    format = 'now'
                else:
                    format = 'rfc2822'
                self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
            when = self.date_parser(datestr)
            return (match.group(1), match.group(2), when)
        else:
            self.abort(errors.BadFormat, cmd, section, s)

    def _path(self, s):
        """Parse a path."""
        # TODO: handle quoted paths
        return s

    def _path_pair(self, s):
        """Parse two paths separated by a space."""
        # TODO: handle quoted paths
        return tuple(s.split(' ', 1))

    def _mode(self, s):
        """Parse a file mode into executable and symlink flags.
        
        :return (is_executable, is_symlink)
        """
        # Note: Output from git-fast-export slightly different to spec
        if s in ['644', '100644', '0100644']:
            return False, False
        elif s in ['755', '100755', '0100755']:
            return True, False
        elif s == '120000':
            return False, True
        else:
            self.abort(errors.BadFormat, 'filemodify', 'mode', s)


0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	1	# Copyright (C) 2008 Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	"""Parser of import data into command objects.
	18
	19	In order to reuse existing front-ends, the stream format is a subset of
	20	the one used by git-fast-import (as of the 1.5.4 release of git at least).
	21	The grammar is:
	22
	23	stream ::= cmd*;
	24
	25	cmd ::= new_blob
	26	\| new_commit
	27	\| new_tag
	28	\| reset_branch
	29	\| checkpoint
	30	\| progress
	31	;
	32
	33	new_blob ::= 'blob' lf
	34	mark?
	35	file_content;
	36	file_content ::= data;
	37
	38	new_commit ::= 'commit' sp ref_str lf
	39	mark?
	40	('author' sp name '<' email '>' when lf)?
	41	'committer' sp name '<' email '>' when lf
	42	commit_msg
	43	('from' sp (ref_str \| hexsha1 \| sha1exp_str \| idnum) lf)?
	44	('merge' sp (ref_str \| hexsha1 \| sha1exp_str \| idnum) lf)*
	45	file_change*
	46	lf?;
	47	commit_msg ::= data;
	48
	49	file_change ::= file_clr
	50	\| file_del
	51	\| file_rnm
	52	\| file_cpy
	53	\| file_obm
	54	\| file_inm;
	55	file_clr ::= 'deleteall' lf;
	56	file_del ::= 'D' sp path_str lf;
	57	file_rnm ::= 'R' sp path_str sp path_str lf;
	58	file_cpy ::= 'C' sp path_str sp path_str lf;
	59	file_obm ::= 'M' sp mode sp (hexsha1 \| idnum) sp path_str lf;
	60	file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
	61	data;
	62
	63	new_tag ::= 'tag' sp tag_str lf
	64	'from' sp (ref_str \| hexsha1 \| sha1exp_str \| idnum) lf
65	'tagger' sp name '<' email '>' when lf
66	tag_msg;
67	tag_msg ::= data;
68
69	reset_branch ::= 'reset' sp ref_str lf
70	('from' sp (ref_str \| hexsha1 \| sha1exp_str \| idnum) lf)?
71	lf?;
72
73	checkpoint ::= 'checkpoint' lf
74	lf?;
75
76	progress ::= 'progress' sp not_lf* lf
77	lf?;
78
79	# note: the first idnum in a stream should be 1 and subsequent
80	# idnums should not have gaps between values as this will cause
81	# the stream parser to reserve space for the gapped values. An
82	# idnum can be updated in the future to a new object by issuing
83	# a new mark directive with the old idnum.
84	#
85	mark ::= 'mark' sp idnum lf;
86	data ::= (delimited_data \| exact_data)
87	lf?;
88
89	# note: delim may be any string but must not contain lf.
90	# data_line may contain any data but must not be exactly
91	# delim.
92	delimited_data ::= 'data' sp '<<' delim lf
93	(data_line lf)*
94	delim lf;
95
96	# note: declen indicates the length of binary_data in bytes.
97	# declen does not include the lf preceeding the binary data.
98	#
99	exact_data ::= 'data' sp declen lf
100	binary_data;
101
102	# note: quoted strings are C-style quoting supporting \c for
103	# common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
104	# is the signed byte value in octal. Note that the only
105	# characters which must actually be escaped to protect the
106	# stream formatting is: \, " and LF. Otherwise these values
107	# are UTF8.
108	#
109	ref_str ::= ref;
110	sha1exp_str ::= sha1exp;
111	tag_str ::= tag;
112	path_str ::= path \| '"' quoted(path) '"' ;
113	mode ::= '100644' \| '644'
114	\| '100755' \| '755'
115	\| '120000'
116	;
117
118	declen ::= # unsigned 32 bit value, ascii base10 notation;
119	bigint ::= # unsigned integer value, ascii base10 notation;
120	binary_data ::= # file content, not interpreted;
121
122	when ::= raw_when \| rfc2822_when;
123	raw_when ::= ts sp tz;
124	rfc2822_when ::= # Valid RFC 2822 date and time;
125
126	sp ::= # ASCII space character;
127	lf ::= # ASCII newline (LF) character;
128
129	# note: a colon (':') must precede the numerical value assigned to
130	# an idnum. This is to distinguish it from a ref or tag name as
131	# GIT does not permit ':' in ref or tag strings.
132	#
133	idnum ::= ':' bigint;
134	path ::= # GIT style file path, e.g. "a/b/c";
135	ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
136	tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
137	sha1exp ::= # Any valid GIT SHA1 expression;
138	hexsha1 ::= # SHA1 in hexadecimal format;
139
140	# note: name and email are UTF8 strings, however name must not
141	# contain '<' or lf and email must not contain any of the
142	# following: '<', '>', lf.
143	#
144	name ::= # valid GIT author/committer name;
145	email ::= # valid GIT author/committer email;
146	ts ::= # time since the epoch in seconds, ascii base10 notation;
147	tz ::= # GIT style timezone;
148
149	# note: comments may appear anywhere in the input, except
150	# within a data command. Any form of the data command
151	# always escapes the related input from comment processing.
152	#
153	# In case it is not clear, the '#' that starts the comment
154	# must be the first character on that the line (an lf have
155	# preceeded it).
156	#
157	comment ::= '#' not_lf* lf;
158	not_lf ::= # Any byte that is not ASCII newline (LF);
159	"""
160
161
162	import re
163	import sys
164
165	import commands
166	import dates
167	import errors
168
169
170	## Stream parsing ##
171
172	class LineBasedParser(object):
173
174	def __init__(self, input):
175	"""A Parser that keeps track of line numbers.
176
177	:param input: the file-like object to read from
178	"""
179	self.input = input
180	self.lineno = 0
181	# Lines pushed back onto the input stream
182	self._buffer = []
183
184	def abort(self, exception, *args):
185	"""Raise an exception providing line number information."""
186	raise exception(self.lineno, *args)
187
188	def readline(self):
189	"""Get the next line including the newline or '' on EOF."""
190	self.lineno += 1
191	if self._buffer:
192	return self._buffer.pop()
193	else:
194	return self.input.readline()
195
196	def next_line(self):
197	"""Get the next line without the newline or None on EOF."""
198	line = self.readline()
199	if line:
200	return line[:-1]
201	else:
202	return None
203
204	def push_line(self, line):
205	"""Push line back onto the line buffer.
206
207	:param line: the line with no trailing newline
208	"""
209	self.lineno -= 1
210	self._buffer.append(line + "\n")
211
212	def read_bytes(self, count):
213	"""Read a given number of bytes from the input stream.
214
215	Throws MissingBytes if the bytes are not found.
216
217	Note: This method does not read from the line buffer.
218	"""
219	lines = []
220	left = count
221	while left > 0:
222	line = self.input.readline(left)
223	if line:
224	left -= len(line)
225	lines.append(line)
226	else:
227	left = 0
228	result = ''.join(lines)
229	found = len(result)
230	if found != count:
231	self.abort(errors.MissingBytes, count, found)
232	return result
233
234	def read_until(self, terminator):
235	"""Read the input stream until the terminator is found.
236
237	Throws MissingTerminator if the terminator is not found.
238
239	Note: This method does not read from the line buffer.
240
241	:return: the bytes read up to but excluding the terminator.
242	"""
243	raise NotImplementedError(self.read_until)
244
245
246	# Regular expressions used for parsing
247	_WHO_AND_WHEN_RE = re.compile(r'(\w+) <(.+)> (.+)')
248
249
250	class ImportParser(LineBasedParser):
251
252	def __init__(self, input, verbose=False, output=sys.stdout):
253	"""A Parser of import commands.
254
255	:param input: the file-like object to read from
256	:param verbose: display extra information of not
257	:param output: the file-like object to write messages to (YAGNI?)
258	"""
259	LineBasedParser.__init__(self, input)
260	self.verbose = verbose
261	self.output = output
262	# We auto-detect the date format when a date is first encountered
263	self.date_parser = None
264
265	def iter_commands(self):
266	"""Iterator returning ImportCommand objects."""
267	while True:
268	line = self.next_line()
269	if line is None:
270	break
271	elif len(line) == 0 or line.startswith('#'):
272	continue
273	# Search for commands in order of likelihood
274	elif line.startswith('commit '):
275	yield self._parse_commit(line[len('commit '):])
276	elif line.startswith('blob'):
277	yield self._parse_blob()
278	elif line.startswith('progress '):
279	yield commands.ProgressCommand(line[len('progress '):])
280	elif line.startswith('reset '):
281	yield self._parse_reset(line[len('reset '):])
282	elif line.startswith('tag '):
283	yield self._parse_tag(line[len('tag '):])
284	elif line.startswith('checkpoint'):
285	yield commands.CheckpointCommand()
286	else:
287	self.abort(errors.InvalidCommand, line)
288
289	def iter_file_commands(self):
290	"""Iterator returning FileCommand objects.
291
292	If an invalid file command is found, the line is silently
293	pushed back and iteration ends.
294	"""
295	while True:
296	line = self.next_line()
297	if line is None:
298	break
299	elif len(line) == 0 or line.startswith('#'):
300	continue
301	# Search for file commands in order of likelihood
302	elif line.startswith('M '):
303	yield self._parse_file_modify(line[2:])
304	elif line.startswith('D '):
305	path = self._path(line[2:])
306	yield commands.FileDeleteCommand(path)
307	elif line.startswith('R '):
308	old, new = self._path_pair(line[2:])
309	yield commands.FileRenameCommand(old, new)
310	elif line.startswith('C '):
311	src, dest = self._path_pair(line[2:])
312	yield commands.FileRenameCommand(src, dest)
313	elif line.startswith('deleteall'):
314	yield commands.FileDeleteAllCommand()
315	else:
316	self.push_line(line)
317	break
318
319	def _parse_blob(self):
320	"""Parse a blob command."""
321	mark = self._get_mark_if_any()
322	data = self._get_data('blob')
323	return commands.BlobCommand(mark, data)
324
325	def _parse_commit(self, ref):
326	"""Parse a commit command."""
327	mark = self._get_mark_if_any()
328	author = self._get_user_info('commit', 'author', False)
329	committer = self._get_user_info('commit', 'committer')
330	message = self._get_data('commit', 'message')
331	from_ = self._get_from()
332	if from_ is not None:
333	parents = [from_]
334	while True:
335	merge = self._get_merge()
336	if merge is not None:
337	parents.append(merge)
338	else:
339	break
340	else:
341	parents = []
342	return commands.CommitCommand(ref, mark, author, committer, message,
343	parents, self.iter_file_commands)
344
345	def _parse_file_modify(self, info):
346	"""Parse a filemodify command within a commit.
347
348	:param info: a string in the format "mode dataref path"
349	(where dataref might be the hard-coded literal 'inline').
350	"""
351	params = info.split(' ', 2)
352	path = self._path(params[2])
353	is_executable, is_symlink = self._mode(params[0])
354	if is_symlink:
355	kind = commands.SYMLINK_KIND
356	else:
357	kind = commands.FILE_KIND
358	if params[1] == 'inline':
359	dataref = None
360	data = self._get_data('filemodify')
361	else:
362	dataref = params[1]
363	data = None
364	return commands.FileModifyCommand(path, kind, is_executable, dataref,
365	data)
366
367	def _parse_reset(self, ref):
368	"""Parse a reset command."""
369	from_ = self._get_from()
370	return commands.ResetCommand(ref, from_)
371
372	def _parse_tag(self, name):
373	"""Parse a tag command."""
374	from_ = self._get_from('tag')
375	tagger = self._get_user_info('tag', 'tagger')
376	message = self._get_data('tag', 'message')
377	return commands.TagCommand(name, from_, tagger, message)
378
379	def _get_mark_if_any(self):
380	"""Parse a mark section."""
381	line = self.next_line()
382	if line.startswith('mark :'):
383	return line[len('mark :'):]
384	else:
385	self.push_line(line)
386	return None
387
388	def _get_from(self, required_for=None):
389	"""Parse a from section."""
390	line = self.next_line()
391	if line.startswith('from '):
392	return line[len('from '):]
393	elif required_for:
394	self.abort(errors.MissingSection, required_for, 'from')
395	else:
396	self.push_line(line)
397	return None
398
399	def _get_merge(self):
400	"""Parse a merge section."""
401	line = self.next_line()
402	if line.startswith('merge '):
403	return line[len('merge '):]
404	else:
405	self.push_line(line)
406	return None
407
408	def _get_user_info(self, cmd, section, required=True):
409	"""Parse a user section."""
410	line = self.next_line()
411	if line.startswith(section + ' '):
412	return self._who_when(line[len(section + ' '):], cmd, section)
413	elif required:
414	self.abort(errors.MissingSection, cmd, section)
415	else:
416	self.push_line(line)
417	return None
418
419	def _get_data(self, required_for, section='data'):
420	"""Parse a data section."""
421	line = self.next_line()
422	if line.startswith('data '):
423	rest = line[len('data '):]
424	if rest.startswith('<<'):
425	return self.read_until(rest[2:])
426	else:
427	size = int(rest)
428	return self.read_bytes(size)
429	else:
430	self.abort(errors.MissingSection, required_for, section)
431
432	def _who_when(self, s, cmd, section):
433	"""Parse who and when information from a string.
434
435	:return: a tuple of (who,email,when) where who and
436	email are strings and when is a datetime object
437	"""
438	match = _WHO_AND_WHEN_RE.search(s)
439	if match:
440	datestr = match.group(3)
441	if self.date_parser is None:
442	# auto-detect the date format
443	if len(datestr) == 16:
444	format = 'raw'
445	elif datestr == 'now':
446	format = 'now'
447	else:
448	format = 'rfc2822'
449	self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
450	when = self.date_parser(datestr)
451	return (match.group(1), match.group(2), when)
452	else:
453	self.abort(errors.BadFormat, cmd, section, s)
454
455	def _path(self, s):
456	"""Parse a path."""
457	# TODO: handle quoted paths
458	return s
459
460	def _path_pair(self, s):
461	"""Parse two paths separated by a space."""
462	# TODO: handle quoted paths
463	return tuple(s.split(' ', 1))
464
465	def _mode(self, s):
466	"""Parse a file mode into executable and symlink flags.
467
468	:return (is_executable, is_symlink)
469	"""
470	# Note: Output from git-fast-export slightly different to spec
471	if s in ['644', '100644', '0100644']:
472	return False, False
473	elif s in ['755', '100755', '0100755']:
474	return True, False
475	elif s == '120000':
476	return False, True
477	else:
478	self.abort(errors.BadFormat, 'filemodify', 'mode', s)
479