/brz/remove-bazaar : contents of parser.py at revision 0.64.7

: (revision 0.64.7)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""Parser of import data into command objects.

In order to reuse existing front-ends, the stream format is a subset of
the one used by git-fast-import (as of the 1.5.4 release of git at least).
The grammar is:

  stream ::= cmd*;

  cmd ::= new_blob
        | new_commit
        | new_tag
        | reset_branch
        | checkpoint
        | progress
        ;

  new_blob ::= 'blob' lf
    mark?
    file_content;
  file_content ::= data;

  new_commit ::= 'commit' sp ref_str lf
    mark?
    ('author' sp name '<' email '>' when lf)?
    'committer' sp name '<' email '>' when lf
    commit_msg
    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
    ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*
    file_change*
    lf?;
  commit_msg ::= data;

  file_change ::= file_clr
    | file_del
    | file_rnm
    | file_cpy
    | file_obm
    | file_inm;
  file_clr ::= 'deleteall' lf;
  file_del ::= 'D' sp path_str lf;
  file_rnm ::= 'R' sp path_str sp path_str lf;
  file_cpy ::= 'C' sp path_str sp path_str lf;
  file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
  file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
    data;

  new_tag ::= 'tag' sp tag_str lf
    'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
    'tagger' sp name '<' email '>' when lf
    tag_msg;
  tag_msg ::= data;

  reset_branch ::= 'reset' sp ref_str lf
    ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
    lf?;

  checkpoint ::= 'checkpoint' lf
    lf?;

  progress ::= 'progress' sp not_lf* lf
    lf?;

     # note: the first idnum in a stream should be 1 and subsequent
     # idnums should not have gaps between values as this will cause
     # the stream parser to reserve space for the gapped values.  An
     # idnum can be updated in the future to a new object by issuing
     # a new mark directive with the old idnum.
     #
  mark ::= 'mark' sp idnum lf;
  data ::= (delimited_data | exact_data)
    lf?;

    # note: delim may be any string but must not contain lf.
    # data_line may contain any data but must not be exactly
    # delim.
  delimited_data ::= 'data' sp '<<' delim lf
    (data_line lf)*
    delim lf;

     # note: declen indicates the length of binary_data in bytes.
     # declen does not include the lf preceeding the binary data.
     #
  exact_data ::= 'data' sp declen lf
    binary_data;

     # note: quoted strings are C-style quoting supporting \c for
     # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
     # is the signed byte value in octal.  Note that the only
     # characters which must actually be escaped to protect the
     # stream formatting is: \, " and LF.  Otherwise these values
     # are UTF8.
     #
  ref_str     ::= ref;
  sha1exp_str ::= sha1exp;
  tag_str     ::= tag;
  path_str    ::= path    | '"' quoted(path)    '"' ;
  mode        ::= '100644' | '644'
                | '100755' | '755'
                | '120000'
                ;

  declen ::= # unsigned 32 bit value, ascii base10 notation;
  bigint ::= # unsigned integer value, ascii base10 notation;
  binary_data ::= # file content, not interpreted;

  when         ::= raw_when | rfc2822_when;
  raw_when     ::= ts sp tz;
  rfc2822_when ::= # Valid RFC 2822 date and time;

  sp ::= # ASCII space character;
  lf ::= # ASCII newline (LF) character;

     # note: a colon (':') must precede the numerical value assigned to
     # an idnum.  This is to distinguish it from a ref or tag name as
     # GIT does not permit ':' in ref or tag strings.
     #
  idnum   ::= ':' bigint;
  path    ::= # GIT style file path, e.g. "a/b/c";
  ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
  tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
  sha1exp ::= # Any valid GIT SHA1 expression;
  hexsha1 ::= # SHA1 in hexadecimal format;

     # note: name and email are UTF8 strings, however name must not
     # contain '<' or lf and email must not contain any of the
     # following: '<', '>', lf.
     #
  name  ::= # valid GIT author/committer name;
  email ::= # valid GIT author/committer email;
  ts    ::= # time since the epoch in seconds, ascii base10 notation;
  tz    ::= # GIT style timezone;

     # note: comments may appear anywhere in the input, except
     # within a data command.  Any form of the data command
     # always escapes the related input from comment processing.
     #
     # In case it is not clear, the '#' that starts the comment
     # must be the first character on that the line (an lf have
     # preceeded it).
     #
  comment ::= '#' not_lf* lf;
  not_lf  ::= # Any byte that is not ASCII newline (LF);
"""


import re
import sys

import commands
import dates
import errors


## Stream parsing ##

class LineBasedParser(object):

    def __init__(self, input):
        """A Parser that keeps track of line numbers.

        :param input: the file-like object to read from
        """
        self.input = input
        self.lineno = 0
        # Lines pushed back onto the input stream
        self._buffer = []

    def abort(self, exception, *args):
        """Raise an exception providing line number information."""
        raise exception(self.lineno, *args)

    def readline(self):
        """Get the next line including the newline or '' on EOF."""
        self.lineno += 1
        if self._buffer:
            return self._buffer.pop()
        else:
            return self.input.readline()

    def next_line(self):
        """Get the next line without the newline or None on EOF."""
        line = self.readline()
        if line:
            return line[:-1]
        else:
            return None

    def push_line(self, line):
        """Push line back onto the line buffer.
        
        :param line: the line with no trailing newline
        """
        self.lineno -= 1
        self._buffer.append(line + "\n")

    def read_bytes(self, count):
        """Read a given number of bytes from the input stream.
        
        Throws MissingBytes if the bytes are not found.

        Note: This method does not read from the line buffer.

        :return: a string
        """
        lines = []
        left = count
        found = 0
        while left > 0:
            line = self.input.readline(left)
            if line:
                line_len = len(line)
                left -= line_len
                found += line_len
                lines.append(line)
            else:
                left = 0
        if found != count:
            self.abort(errors.MissingBytes, count, found)
        return ''.join(lines)

    def read_until(self, terminator):
        """Read the input stream until the terminator is found.
        
        Throws MissingTerminator if the terminator is not found.

        Note: This method does not read from the line buffer.

        :return: the bytes read up to but excluding the terminator.
        """
        raise NotImplementedError(self.read_until)


# Regular expression used for parsing. (Note: The spec states that the name
# part should be non-empty but git-fast-export doesn't always do that so
# the first bit is \w*, not \w+.)
_WHO_AND_WHEN_RE = re.compile(r'(\w*) <(.+)> (.+)')


class ImportParser(LineBasedParser):

    def __init__(self, input, verbose=False, output=sys.stdout):
        """A Parser of import commands.

        :param input: the file-like object to read from
        :param verbose: display extra information of not
        :param output: the file-like object to write messages to (YAGNI?)
        """
        LineBasedParser.__init__(self, input)
        self.verbose = verbose
        self.output = output
        # We auto-detect the date format when a date is first encountered
        self.date_parser = None

    def iter_commands(self):
        """Iterator returning ImportCommand objects."""
        while True:
            line = self.next_line()
            if line is None:
                break
            elif len(line) == 0 or line.startswith('#'):
                continue
            # Search for commands in order of likelihood
            elif line.startswith('commit '):
                yield self._parse_commit(line[len('commit '):])
            elif line.startswith('blob'):
                yield self._parse_blob()
            elif line.startswith('progress '):
                yield commands.ProgressCommand(line[len('progress '):])
            elif line.startswith('reset '):
                yield self._parse_reset(line[len('reset '):])
            elif line.startswith('tag '):
                yield self._parse_tag(line[len('tag '):])
            elif line.startswith('checkpoint'):
                yield commands.CheckpointCommand()
            else:
                self.abort(errors.InvalidCommand, line)

    def iter_file_commands(self):
        """Iterator returning FileCommand objects.
        
        If an invalid file command is found, the line is silently
        pushed back and iteration ends.
        """
        while True:
            line = self.next_line()
            if line is None:
                break
            elif len(line) == 0 or line.startswith('#'):
                continue
            # Search for file commands in order of likelihood
            elif line.startswith('M '):
                yield self._parse_file_modify(line[2:])
            elif line.startswith('D '):
                path = self._path(line[2:])
                yield commands.FileDeleteCommand(path)
            elif line.startswith('R '):
                old, new = self._path_pair(line[2:])
                yield commands.FileRenameCommand(old, new)
            elif line.startswith('C '):
                src, dest = self._path_pair(line[2:])
                yield commands.FileRenameCommand(src, dest)
            elif line.startswith('deleteall'):
                yield commands.FileDeleteAllCommand()
            else:
                self.push_line(line)
                break

    def _parse_blob(self):
        """Parse a blob command."""
        mark = self._get_mark_if_any()
        data = self._get_data('blob')
        return commands.BlobCommand(mark, data)

    def _parse_commit(self, ref):
        """Parse a commit command."""
        mark = self._get_mark_if_any()
        author = self._get_user_info('commit', 'author', False)
        committer = self._get_user_info('commit', 'committer')
        message = self._get_data('commit', 'message')
        from_ = self._get_from()
        if from_ is not None:
            parents = [from_]
            while True:
                merge = self._get_merge()
                if merge is not None:
                    parents.append(merge)
                else:
                    break
        else:
            parents = []
        return commands.CommitCommand(ref, mark, author, committer, message,
            parents, self.iter_file_commands)

    def _parse_file_modify(self, info):
        """Parse a filemodify command within a commit.

        :param info: a string in the format "mode dataref path"
          (where dataref might be the hard-coded literal 'inline').
        """
        params = info.split(' ', 2)
        path = self._path(params[2])
        is_executable, is_symlink = self._mode(params[0])
        if is_symlink:
            kind = commands.SYMLINK_KIND
        else:
            kind = commands.FILE_KIND
        if params[1] == 'inline':
            dataref = None
            data = self._get_data('filemodify')
        else:
            dataref = params[1]
            data = None
        return commands.FileModifyCommand(path, kind, is_executable, dataref,
            data)

    def _parse_reset(self, ref):
        """Parse a reset command."""
        from_ = self._get_from()
        return commands.ResetCommand(ref, from_)

    def _parse_tag(self, name):
        """Parse a tag command."""
        from_ = self._get_from('tag')
        tagger = self._get_user_info('tag', 'tagger')
        message = self._get_data('tag', 'message')
        return commands.TagCommand(name, from_, tagger, message)

    def _get_mark_if_any(self):
        """Parse a mark section."""
        line = self.next_line()
        if line.startswith('mark :'):
            return line[len('mark :'):]
        else:
            self.push_line(line)
            return None

    def _get_from(self, required_for=None):
        """Parse a from section."""
        line = self.next_line()
        if line.startswith('from '):
            return line[len('from '):]
        elif required_for:
            self.abort(errors.MissingSection, required_for, 'from')
        else:
            self.push_line(line)
            return None

    def _get_merge(self):
        """Parse a merge section."""
        line = self.next_line()
        if line.startswith('merge '):
            return line[len('merge '):]
        else:
            self.push_line(line)
            return None

    def _get_user_info(self, cmd, section, required=True):
        """Parse a user section."""
        line = self.next_line()
        if line.startswith(section + ' '):
            return self._who_when(line[len(section + ' '):], cmd, section)
        elif required:
            self.abort(errors.MissingSection, cmd, section)
        else:
            self.push_line(line)
            return None

    def _get_data(self, required_for, section='data'):
        """Parse a data section."""
        line = self.next_line()
        if line.startswith('data '):
            rest = line[len('data '):]
            if rest.startswith('<<'):
                return self.read_until(rest[2:])
            else:
                size = int(rest)
                return self.read_bytes(size)
        else:
            self.abort(errors.MissingSection, required_for, section)

    def _who_when(self, s, cmd, section):
        """Parse who and when information from a string.
        
        :return: a tuple of (name,email,timestamp,timezone)
        """
        match = _WHO_AND_WHEN_RE.search(s)
        if match:
            datestr = match.group(3)
            if self.date_parser is None:
                # auto-detect the date format
                if len(datestr.split(' ')) == 2:
                    format = 'raw'
                elif datestr == 'now':
                    format = 'now'
                else:
                    format = 'rfc2822'
                self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
            when = self.date_parser(datestr)
            return (match.group(1),match.group(2),when[0],when[1])
        else:
            self.abort(errors.BadFormat, cmd, section, s)

    def _path(self, s):
        """Parse a path."""
        # TODO: handle quoted paths
        return s

    def _path_pair(self, s):
        """Parse two paths separated by a space."""
        # TODO: handle quoted paths
        return tuple(s.split(' ', 1))

    def _mode(self, s):
        """Parse a file mode into executable and symlink flags.
        
        :return (is_executable, is_symlink)
        """
        # Note: Output from git-fast-export slightly different to spec
        if s in ['644', '100644', '0100644']:
            return False, False
        elif s in ['755', '100755', '0100755']:
            return True, False
        elif s in ['120000', '0120000']:
            return False, True
        else:
            self.abort(errors.BadFormat, 'filemodify', 'mode', s)


0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	1	# Copyright (C) 2008 Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	"""Parser of import data into command objects.
	18
	19	In order to reuse existing front-ends, the stream format is a subset of
	20	the one used by git-fast-import (as of the 1.5.4 release of git at least).
	21	The grammar is:
	22
	23	stream ::= cmd*;
	24
	25	cmd ::= new_blob
	26	\| new_commit
	27	\| new_tag
	28	\| reset_branch
	29	\| checkpoint
	30	\| progress
	31	;
	32
	33	new_blob ::= 'blob' lf
	34	mark?
	35	file_content;
	36	file_content ::= data;
	37
	38	new_commit ::= 'commit' sp ref_str lf
	39	mark?
	40	('author' sp name '<' email '>' when lf)?
	41	'committer' sp name '<' email '>' when lf
	42	commit_msg
	43	('from' sp (ref_str \| hexsha1 \| sha1exp_str \| idnum) lf)?
	44	('merge' sp (ref_str \| hexsha1 \| sha1exp_str \| idnum) lf)*
	45	file_change*
	46	lf?;
	47	commit_msg ::= data;
	48
	49	file_change ::= file_clr
	50	\| file_del
	51	\| file_rnm
	52	\| file_cpy
	53	\| file_obm
	54	\| file_inm;
	55	file_clr ::= 'deleteall' lf;
	56	file_del ::= 'D' sp path_str lf;
	57	file_rnm ::= 'R' sp path_str sp path_str lf;
	58	file_cpy ::= 'C' sp path_str sp path_str lf;
	59	file_obm ::= 'M' sp mode sp (hexsha1 \| idnum) sp path_str lf;
	60	file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
	61	data;
	62
	63	new_tag ::= 'tag' sp tag_str lf
	64	'from' sp (ref_str \| hexsha1 \| sha1exp_str \| idnum) lf
65	'tagger' sp name '<' email '>' when lf
66	tag_msg;
67	tag_msg ::= data;
68
69	reset_branch ::= 'reset' sp ref_str lf
70	('from' sp (ref_str \| hexsha1 \| sha1exp_str \| idnum) lf)?
71	lf?;
72
73	checkpoint ::= 'checkpoint' lf
74	lf?;
75
76	progress ::= 'progress' sp not_lf* lf
77	lf?;
78
79	# note: the first idnum in a stream should be 1 and subsequent
80	# idnums should not have gaps between values as this will cause
81	# the stream parser to reserve space for the gapped values. An
82	# idnum can be updated in the future to a new object by issuing
83	# a new mark directive with the old idnum.
84	#
85	mark ::= 'mark' sp idnum lf;
86	data ::= (delimited_data \| exact_data)
87	lf?;
88
89	# note: delim may be any string but must not contain lf.
90	# data_line may contain any data but must not be exactly
91	# delim.
92	delimited_data ::= 'data' sp '<<' delim lf
93	(data_line lf)*
94	delim lf;
95
96	# note: declen indicates the length of binary_data in bytes.
97	# declen does not include the lf preceeding the binary data.
98	#
99	exact_data ::= 'data' sp declen lf
100	binary_data;
101
102	# note: quoted strings are C-style quoting supporting \c for
103	# common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
104	# is the signed byte value in octal. Note that the only
105	# characters which must actually be escaped to protect the
106	# stream formatting is: \, " and LF. Otherwise these values
107	# are UTF8.
108	#
109	ref_str ::= ref;
110	sha1exp_str ::= sha1exp;
111	tag_str ::= tag;
112	path_str ::= path \| '"' quoted(path) '"' ;
113	mode ::= '100644' \| '644'
114	\| '100755' \| '755'
115	\| '120000'
116	;
117
118	declen ::= # unsigned 32 bit value, ascii base10 notation;
119	bigint ::= # unsigned integer value, ascii base10 notation;
120	binary_data ::= # file content, not interpreted;
121
122	when ::= raw_when \| rfc2822_when;
123	raw_when ::= ts sp tz;
124	rfc2822_when ::= # Valid RFC 2822 date and time;
125
126	sp ::= # ASCII space character;
127	lf ::= # ASCII newline (LF) character;
128
129	# note: a colon (':') must precede the numerical value assigned to
130	# an idnum. This is to distinguish it from a ref or tag name as
131	# GIT does not permit ':' in ref or tag strings.
132	#
133	idnum ::= ':' bigint;
134	path ::= # GIT style file path, e.g. "a/b/c";
135	ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
136	tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
137	sha1exp ::= # Any valid GIT SHA1 expression;
138	hexsha1 ::= # SHA1 in hexadecimal format;
139
140	# note: name and email are UTF8 strings, however name must not
141	# contain '<' or lf and email must not contain any of the
142	# following: '<', '>', lf.
143	#
144	name ::= # valid GIT author/committer name;
145	email ::= # valid GIT author/committer email;
146	ts ::= # time since the epoch in seconds, ascii base10 notation;
147	tz ::= # GIT style timezone;
148
149	# note: comments may appear anywhere in the input, except
150	# within a data command. Any form of the data command
151	# always escapes the related input from comment processing.
152	#
153	# In case it is not clear, the '#' that starts the comment
154	# must be the first character on that the line (an lf have
155	# preceeded it).
156	#
157	comment ::= '#' not_lf* lf;
158	not_lf ::= # Any byte that is not ASCII newline (LF);
159	"""
160
161
162	import re
163	import sys
164
165	import commands
166	import dates
167	import errors
168
169
170	## Stream parsing ##
171
172	class LineBasedParser(object):
173
174	def __init__(self, input):
175	"""A Parser that keeps track of line numbers.
176
177	:param input: the file-like object to read from
178	"""
179	self.input = input
180	self.lineno = 0
181	# Lines pushed back onto the input stream
182	self._buffer = []
183
184	def abort(self, exception, *args):
185	"""Raise an exception providing line number information."""
186	raise exception(self.lineno, *args)
187
188	def readline(self):
189	"""Get the next line including the newline or '' on EOF."""
190	self.lineno += 1
191	if self._buffer:
192	return self._buffer.pop()
193	else:
194	return self.input.readline()
195
196	def next_line(self):
197	"""Get the next line without the newline or None on EOF."""
198	line = self.readline()
199	if line:
200	return line[:-1]
201	else:
202	return None
203
204	def push_line(self, line):
205	"""Push line back onto the line buffer.
206
207	:param line: the line with no trailing newline
208	"""
209	self.lineno -= 1
210	self._buffer.append(line + "\n")
211
212	def read_bytes(self, count):
213	"""Read a given number of bytes from the input stream.
214
215	Throws MissingBytes if the bytes are not found.
216
217	Note: This method does not read from the line buffer.
0.64.6 by Ian Clatworthy generic processing method working for one revision in one branch	218
	219	:return: a string
0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	220	"""
	221	lines = []
	222	left = count
0.64.6 by Ian Clatworthy generic processing method working for one revision in one branch	223	found = 0
0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	224	while left > 0:
	225	line = self.input.readline(left)
	226	if line:
0.64.6 by Ian Clatworthy generic processing method working for one revision in one branch	227	line_len = len(line)
	228	left -= line_len
	229	found += line_len
0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	230	lines.append(line)
	231	else:
	232	left = 0
	233	if found != count:
	234	self.abort(errors.MissingBytes, count, found)
0.64.6 by Ian Clatworthy generic processing method working for one revision in one branch	235	return ''.join(lines)
0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	236
	237	def read_until(self, terminator):
	238	"""Read the input stream until the terminator is found.
	239
	240	Throws MissingTerminator if the terminator is not found.
	241
	242	Note: This method does not read from the line buffer.
	243
	244	:return: the bytes read up to but excluding the terminator.
	245	"""
	246	raise NotImplementedError(self.read_until)
	247
	248
0.64.3 by Ian Clatworthy tweak parser for better git-fast-export compatibility	249	# Regular expression used for parsing. (Note: The spec states that the name
	250	# part should be non-empty but git-fast-export doesn't always do that so
	251	# the first bit is \w*, not \w+.)
	252	_WHO_AND_WHEN_RE = re.compile(r'(\w*) <(.+)> (.+)')
0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	253
	254
	255	class ImportParser(LineBasedParser):
	256
	257	def __init__(self, input, verbose=False, output=sys.stdout):
	258	"""A Parser of import commands.
	259
	260	:param input: the file-like object to read from
	261	:param verbose: display extra information of not
	262	:param output: the file-like object to write messages to (YAGNI?)
	263	"""
	264	LineBasedParser.__init__(self, input)
	265	self.verbose = verbose
	266	self.output = output
	267	# We auto-detect the date format when a date is first encountered
	268	self.date_parser = None
	269
	270	def iter_commands(self):
	271	"""Iterator returning ImportCommand objects."""
	272	while True:
	273	line = self.next_line()
	274	if line is None:
	275	break
	276	elif len(line) == 0 or line.startswith('#'):
	277	continue
	278	# Search for commands in order of likelihood
	279	elif line.startswith('commit '):
	280	yield self._parse_commit(line[len('commit '):])
	281	elif line.startswith('blob'):
	282	yield self._parse_blob()
	283	elif line.startswith('progress '):
	284	yield commands.ProgressCommand(line[len('progress '):])
	285	elif line.startswith('reset '):
	286	yield self._parse_reset(line[len('reset '):])
	287	elif line.startswith('tag '):
	288	yield self._parse_tag(line[len('tag '):])
	289	elif line.startswith('checkpoint'):
	290	yield commands.CheckpointCommand()
	291	else:
	292	self.abort(errors.InvalidCommand, line)
	293
	294	def iter_file_commands(self):
	295	"""Iterator returning FileCommand objects.
	296
	297	If an invalid file command is found, the line is silently
	298	pushed back and iteration ends.
	299	"""
	300	while True:
	301	line = self.next_line()
	302	if line is None:
	303	break
	304	elif len(line) == 0 or line.startswith('#'):
	305	continue
	306	# Search for file commands in order of likelihood
	307	elif line.startswith('M '):
	308	yield self._parse_file_modify(line[2:])
	309	elif line.startswith('D '):
	310	path = self._path(line[2:])
	311	yield commands.FileDeleteCommand(path)
	312	elif line.startswith('R '):
	313	old, new = self._path_pair(line[2:])
	314	yield commands.FileRenameCommand(old, new)
	315	elif line.startswith('C '):
	316	src, dest = self._path_pair(line[2:])
317	yield commands.FileRenameCommand(src, dest)
318	elif line.startswith('deleteall'):
319	yield commands.FileDeleteAllCommand()
320	else:
321	self.push_line(line)
322	break
323
324	def _parse_blob(self):
325	"""Parse a blob command."""
326	mark = self._get_mark_if_any()
327	data = self._get_data('blob')
328	return commands.BlobCommand(mark, data)
329
330	def _parse_commit(self, ref):
331	"""Parse a commit command."""
332	mark = self._get_mark_if_any()
333	author = self._get_user_info('commit', 'author', False)
334	committer = self._get_user_info('commit', 'committer')
335	message = self._get_data('commit', 'message')
336	from_ = self._get_from()
337	if from_ is not None:
338	parents = [from_]
339	while True:
340	merge = self._get_merge()
341	if merge is not None:
342	parents.append(merge)
343	else:
344	break
345	else:
346	parents = []
347	return commands.CommitCommand(ref, mark, author, committer, message,
348	parents, self.iter_file_commands)
349
350	def _parse_file_modify(self, info):
351	"""Parse a filemodify command within a commit.
352
353	:param info: a string in the format "mode dataref path"
354	(where dataref might be the hard-coded literal 'inline').
355	"""
356	params = info.split(' ', 2)
357	path = self._path(params[2])
358	is_executable, is_symlink = self._mode(params[0])
359	if is_symlink:
360	kind = commands.SYMLINK_KIND
361	else:
362	kind = commands.FILE_KIND
363	if params[1] == 'inline':
364	dataref = None
365	data = self._get_data('filemodify')
366	else:
367	dataref = params[1]
368	data = None
369	return commands.FileModifyCommand(path, kind, is_executable, dataref,
370	data)
371
372	def _parse_reset(self, ref):
373	"""Parse a reset command."""
374	from_ = self._get_from()
375	return commands.ResetCommand(ref, from_)
376
377	def _parse_tag(self, name):
378	"""Parse a tag command."""
379	from_ = self._get_from('tag')
380	tagger = self._get_user_info('tag', 'tagger')
381	message = self._get_data('tag', 'message')
382	return commands.TagCommand(name, from_, tagger, message)
383
384	def _get_mark_if_any(self):
385	"""Parse a mark section."""
386	line = self.next_line()
387	if line.startswith('mark :'):
388	return line[len('mark :'):]
389	else:
390	self.push_line(line)
391	return None
392
393	def _get_from(self, required_for=None):
394	"""Parse a from section."""
395	line = self.next_line()
396	if line.startswith('from '):
397	return line[len('from '):]
398	elif required_for:
399	self.abort(errors.MissingSection, required_for, 'from')
400	else:
401	self.push_line(line)
402	return None
403
404	def _get_merge(self):
405	"""Parse a merge section."""
406	line = self.next_line()
407	if line.startswith('merge '):
408	return line[len('merge '):]
409	else:
410	self.push_line(line)
411	return None
412
413	def _get_user_info(self, cmd, section, required=True):
414	"""Parse a user section."""
415	line = self.next_line()
416	if line.startswith(section + ' '):
417	return self._who_when(line[len(section + ' '):], cmd, section)
418	elif required:
419	self.abort(errors.MissingSection, cmd, section)
420	else:
421	self.push_line(line)
422	return None
423
424	def _get_data(self, required_for, section='data'):
425	"""Parse a data section."""
426	line = self.next_line()
427	if line.startswith('data '):
428	rest = line[len('data '):]
429	if rest.startswith('<<'):
430	return self.read_until(rest[2:])
431	else:
432	size = int(rest)
433	return self.read_bytes(size)
434	else:
435	self.abort(errors.MissingSection, required_for, section)
436
437	def _who_when(self, s, cmd, section):
438	"""Parse who and when information from a string.
439
0.64.3 by Ian Clatworthy tweak parser for better git-fast-export compatibility	440	:return: a tuple of (name,email,timestamp,timezone)
0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	441	"""
	442	match = _WHO_AND_WHEN_RE.search(s)
	443	if match:
	444	datestr = match.group(3)
	445	if self.date_parser is None:
	446	# auto-detect the date format
0.64.3 by Ian Clatworthy tweak parser for better git-fast-export compatibility	447	if len(datestr.split(' ')) == 2:
0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	448	format = 'raw'
	449	elif datestr == 'now':
	450	format = 'now'
	451	else:
	452	format = 'rfc2822'
	453	self.date_parser = dates.DATE_PARSERS_BY_NAME[format]
	454	when = self.date_parser(datestr)
0.64.3 by Ian Clatworthy tweak parser for better git-fast-export compatibility	455	return (match.group(1),match.group(2),when[0],when[1])
0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	456	else:
	457	self.abort(errors.BadFormat, cmd, section, s)
	458
	459	def _path(self, s):
	460	"""Parse a path."""
	461	# TODO: handle quoted paths
	462	return s
	463
	464	def _path_pair(self, s):
	465	"""Parse two paths separated by a space."""
	466	# TODO: handle quoted paths
	467	return tuple(s.split(' ', 1))
	468
	469	def _mode(self, s):
	470	"""Parse a file mode into executable and symlink flags.
	471
	472	:return (is_executable, is_symlink)
	473	"""
	474	# Note: Output from git-fast-export slightly different to spec
	475	if s in ['644', '100644', '0100644']:
	476	return False, False
	477	elif s in ['755', '100755', '0100755']:
	478	return True, False
0.64.3 by Ian Clatworthy tweak parser for better git-fast-export compatibility	479	elif s in ['120000', '0120000']:
0.64.1 by Ian Clatworthy 1st cut: gfi parser + --info processing method	480	return False, True
	481	else:
	482	self.abort(errors.BadFormat, 'filemodify', 'mode', s)
	483