/brz/remove-bazaar : revision 0.64.246

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to parser.py

Committer: Ian Clatworthy
Date: 2009-10-09 05:42:53 UTC
mto: (0.123.10 trunk) (6622.6.1 bundle) (6628.1.3 integration-fastimport) (7194.1.1 fastexport-empty-parents)
mto: This revision was merged to the branch mainline in revision 6631.
Revision ID: ian.clatworthy@canonical.com-20091009054253-kmsj7k9w49u58pla

fix date parsing bug found while importing samba

files added:
.bzrignore

COPYING.txt

NEWS

README.txt

__init__.py

branch_mapper.py

branch_updater.py

bzr_commit_handler.py

bzr_exporter.py

cache_manager.py

commands.py

dates.py

doc/notes.txt

errors.py

exporters

exporters/Makefile

exporters/__init__.py

exporters/bzr-fast-export.LICENSE

exporters/bzr-fast-export.README

exporters/darcs

exporters/darcs/.gitignore

exporters/darcs/Makefile

exporters/darcs/NEWS

exporters/darcs/README

exporters/darcs/TODO

exporters/darcs/asciidoc.conf

exporters/darcs/d2x

exporters/darcs/d2x.txt

exporters/darcs/darcs-fast-export

exporters/darcs/darcs-fast-export.txt

exporters/darcs/darcs-fast-import

exporters/darcs/darcs-fast-import.txt

exporters/darcs/git-darcs

exporters/darcs/git-darcs.txt

exporters/darcs/t

exporters/darcs/t/Makefile

exporters/darcs/t/bench-results

exporters/darcs/t/bench-results/Makefile

exporters/darcs/t/bench-results/bench-results.gnu

exporters/darcs/t/bench-results/bench-results.py

exporters/darcs/t/bench-tailor.sh

exporters/darcs/t/bench.sh

exporters/darcs/t/data

exporters/darcs/t/data/hungarian.gif

exporters/darcs/t/lib.sh

exporters/darcs/t/test

exporters/darcs/t/test-bzr.sh

exporters/darcs/t/test-git-d2x.sh

exporters/darcs/t/test-git-incremental.sh

exporters/darcs/t/test-git-progress.sh

exporters/darcs/t/test-git.sh

exporters/darcs/t/test-hg-d2x.sh

exporters/darcs/t/test-hg.sh

exporters/darcs/t/test/_darcs

exporters/darcs/t/test/_darcs/patches

exporters/darcs/t/test/_darcs/prefs

exporters/darcs/t/test/_darcs/pristine

exporters/darcs/t/test2

exporters/darcs/t/test2-bzr-d2x.sh

exporters/darcs/t/test2-bzr-incremental.sh

exporters/darcs/t/test2-git-funny-tagname.sh

exporters/darcs/t/test2-git-incremental-specworkdir.sh

exporters/darcs/t/test2-git-incremental.sh

exporters/darcs/t/test2-git.sh

exporters/darcs/t/test2/_darcs

exporters/darcs/t/test2/_darcs/inventories

exporters/darcs/t/test2/_darcs/patches

exporters/darcs/t/test2/_darcs/prefs

exporters/darcs/t/test2/_darcs/pristine.hashed

exporters/darcs/t/testimport-bzr-x2d.sh

exporters/darcs/t/testimport-bzr.sh

exporters/darcs/t/testimport-copy.sh

exporters/darcs/t/testimport-darcs.sh

exporters/darcs/t/testimport-deleteall.sh

exporters/darcs/t/testimport-git-incremental.sh

exporters/darcs/t/testimport-git-twoway-gd.sh

exporters/darcs/t/testimport-git-twoway.sh

exporters/darcs/t/testimport-git-x2d.sh

exporters/darcs/t/testimport-git.sh

exporters/darcs/t/testimport-hg-x2d.sh

exporters/darcs/t/testimport-hg.sh

exporters/darcs/t/testimport-rename.sh

exporters/darcs/x2d

exporters/darcs/x2d.txt

exporters/hg-fast-export.README

exporters/hg-fast-export.py

exporters/hg-fast-export.sh

exporters/hg2git.py

exporters/svn-archive.c

exporters/svn-fast-export.c

exporters/svn-fast-export.py

helpers.py

idmapfile.py

marks_file.py

parser.py

processor.py

processors

processors/__init__.py

processors/filter_processor.py

processors/generic_processor.py

processors/info_processor.py

processors/query_processor.py

revision_store.py

setup.py

tests

tests/__init__.py

tests/test_branch_mapper.py

tests/test_commands.py

tests/test_errors.py

tests/test_filter_processor.py

tests/test_generic_processor.py

tests/test_head_tracking.py

tests/test_helpers.py

tests/test_parser.py

files removed:
.bzrignore

.rsyncexclude

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

generate_docs.py

notes

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

Show diffs side-by-side

added added

removed removed

parser.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Parser of import data into command objects.

In order to reuse existing front-ends, the stream format is a subset of

the one used by git-fast-import (as of the 1.5.4 release of git at least).

The grammar is:

stream ::= cmd*;

cmd ::= new_blob

| new_commit

| new_tag

| reset_branch

| checkpoint

| progress

;

new_blob ::= 'blob' lf

mark?

file_content;

file_content ::= data;

new_commit ::= 'commit' sp ref_str lf

mark?

('author' sp name '<' email '>' when lf)?

'committer' sp name '<' email '>' when lf

commit_msg

('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?

('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*

file_change*

lf?;

commit_msg ::= data;

file_change ::= file_clr

| file_del

| file_rnm

| file_cpy

| file_obm

| file_inm;

file_clr ::= 'deleteall' lf;

file_del ::= 'D' sp path_str lf;

file_rnm ::= 'R' sp path_str sp path_str lf;

file_cpy ::= 'C' sp path_str sp path_str lf;

file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;

file_inm ::= 'M' sp mode sp 'inline' sp path_str lf

data;

new_tag ::= 'tag' sp tag_str lf

'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf

'tagger' sp name '<' email '>' when lf

tag_msg;

tag_msg ::= data;

reset_branch ::= 'reset' sp ref_str lf

('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?

lf?;

checkpoint ::= 'checkpoint' lf

lf?;

progress ::= 'progress' sp not_lf* lf

lf?;

# note: the first idnum in a stream should be 1 and subsequent

# idnums should not have gaps between values as this will cause

# the stream parser to reserve space for the gapped values. An

# idnum can be updated in the future to a new object by issuing

# a new mark directive with the old idnum.

mark ::= 'mark' sp idnum lf;

data ::= (delimited_data | exact_data)

lf?;

# note: delim may be any string but must not contain lf.

# data_line may contain any data but must not be exactly

# delim. The lf after the final data_line is included in

# the data.

delimited_data ::= 'data' sp '<<' delim lf

(data_line lf)*

delim lf;

# note: declen indicates the length of binary_data in bytes.

# declen does not include the lf preceeding the binary data.

100

exact_data ::= 'data' sp declen lf

101

binary_data;

102

103

# note: quoted strings are C-style quoting supporting \c for

104

# common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn

105

# is the signed byte value in octal. Note that the only

106

# characters which must actually be escaped to protect the

107

# stream formatting is: \, " and LF. Otherwise these values

108

# are UTF8.

109

110

ref_str ::= ref;

111

sha1exp_str ::= sha1exp;

112

tag_str ::= tag;

113

path_str ::= path | '"' quoted(path) '"' ;

114

mode ::= '100644' | '644'

115

| '100755' | '755'

116

| '120000'

117

;

118

119

declen ::= # unsigned 32 bit value, ascii base10 notation;

120

bigint ::= # unsigned integer value, ascii base10 notation;

121

binary_data ::= # file content, not interpreted;

122

123

when ::= raw_when | rfc2822_when;

124

raw_when ::= ts sp tz;

125

rfc2822_when ::= # Valid RFC 2822 date and time;

126

127

sp ::= # ASCII space character;

128

lf ::= # ASCII newline (LF) character;

129

130

# note: a colon (':') must precede the numerical value assigned to

131

# an idnum. This is to distinguish it from a ref or tag name as

132

# GIT does not permit ':' in ref or tag strings.

133

134

idnum ::= ':' bigint;

135

path ::= # GIT style file path, e.g. "a/b/c";

136

ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";

137

tag ::= # GIT tag name, e.g. "FIREFOX_1_5";

138

sha1exp ::= # Any valid GIT SHA1 expression;

139

hexsha1 ::= # SHA1 in hexadecimal format;

140

141

# note: name and email are UTF8 strings, however name must not

142

# contain '<' or lf and email must not contain any of the

143

# following: '<', '>', lf.

144

145

name ::= # valid GIT author/committer name;

146

email ::= # valid GIT author/committer email;

147

ts ::= # time since the epoch in seconds, ascii base10 notation;

148

tz ::= # GIT style timezone;

149

150

# note: comments may appear anywhere in the input, except

151

# within a data command. Any form of the data command

152

# always escapes the related input from comment processing.

153

154

# In case it is not clear, the '#' that starts the comment

155

# must be the first character on that the line (an lf have

156

# preceeded it).

157

158

comment ::= '#' not_lf* lf;

159

not_lf ::= # Any byte that is not ASCII newline (LF);

160

"""

161

162

163

import re

164

import sys

165

166

import commands

167

import dates

168

import errors

169

170

171

## Stream parsing ##

172

173

class LineBasedParser(object):

174

175

def __init__(self, input):

176

"""A Parser that keeps track of line numbers.

177

178

:param input: the file-like object to read from

179

"""

180

self.input = input

181

self.lineno = 0

182

# Lines pushed back onto the input stream

183

self._buffer = []

184

185

def abort(self, exception, *args):

186

"""Raise an exception providing line number information."""

187

raise exception(self.lineno, *args)

188

189

def readline(self):

190

"""Get the next line including the newline or '' on EOF."""

191

self.lineno += 1

192

if self._buffer:

193

return self._buffer.pop()

194

else:

195

return self.input.readline()

196

197

def next_line(self):

198

"""Get the next line without the newline or None on EOF."""

199

line = self.readline()

200

if line:

201

return line[:-1]

202

else:

203

return None

204

205

def push_line(self, line):

206

"""Push line back onto the line buffer.

207

208

:param line: the line with no trailing newline

209

"""

210

self.lineno -= 1

211

self._buffer.append(line + "\n")

212

213

def read_bytes(self, count):

214

"""Read a given number of bytes from the input stream.

215

216

Throws MissingBytes if the bytes are not found.

217

218

Note: This method does not read from the line buffer.

219

220

:return: a string

221

"""

222

result = self.input.read(count)

223

found = len(result)

224

self.lineno += result.count("\n")

225

if found != count:

226

self.abort(errors.MissingBytes, count, found)

227

return result

228

229

def read_until(self, terminator):

230

"""Read the input stream until the terminator is found.

231

232

Throws MissingTerminator if the terminator is not found.

233

234

Note: This method does not read from the line buffer.

235

236

:return: the bytes read up to but excluding the terminator.

237

"""

238

239

lines = []

240

term = terminator + '\n'

241

while True:

242

line = self.input.readline()

243

if line == term:

244

break

245

else:

246

lines.append(line)

247

return ''.join(lines)

248

249

250

# Regular expression used for parsing. (Note: The spec states that the name

251

# part should be non-empty but git-fast-export doesn't always do that so

252

# the first bit is \w*, not \w+.) Also git-fast-import code says the

253

# space before the email is optional.

254

_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)')

255

_WHO_RE = re.compile(r'([^<]*)<(.*)>')

256

257

258

class ImportParser(LineBasedParser):

259

260

def __init__(self, input, verbose=False, output=sys.stdout):

261

"""A Parser of import commands.

262

263

:param input: the file-like object to read from

264

:param verbose: display extra information of not

265

:param output: the file-like object to write messages to (YAGNI?)

266

"""

267

LineBasedParser.__init__(self, input)

268

self.verbose = verbose

269

self.output = output

270

# We auto-detect the date format when a date is first encountered

271

self.date_parser = None

272

273

def warning(self, msg):

274

sys.stderr.write("warning line %d: %s\n" % (self.lineno, msg))

275

276

def iter_commands(self):

277

"""Iterator returning ImportCommand objects."""

278

while True:

279

line = self.next_line()

280

if line is None:

281

break

282

elif len(line) == 0 or line.startswith('#'):

283

continue

284

# Search for commands in order of likelihood

285

elif line.startswith('commit '):

286

yield self._parse_commit(line[len('commit '):])

287

elif line.startswith('blob'):

288

yield self._parse_blob()

289

elif line.startswith('progress '):

290

yield commands.ProgressCommand(line[len('progress '):])

291

elif line.startswith('reset '):

292

yield self._parse_reset(line[len('reset '):])

293

elif line.startswith('tag '):

294

yield self._parse_tag(line[len('tag '):])

295

elif line.startswith('checkpoint'):

296

yield commands.CheckpointCommand()

297

elif line.startswith('feature'):

298

yield self._parse_feature(line[len('feature '):])

299

else:

300

self.abort(errors.InvalidCommand, line)

301

302

def iter_file_commands(self):

303

"""Iterator returning FileCommand objects.

304

305

If an invalid file command is found, the line is silently

306

pushed back and iteration ends.

307

"""

308

while True:

309

line = self.next_line()

310

if line is None:

311

break

312

elif len(line) == 0 or line.startswith('#'):

313

continue

314

# Search for file commands in order of likelihood

315

elif line.startswith('M '):

316

yield self._parse_file_modify(line[2:])

317

elif line.startswith('D '):

318

path = self._path(line[2:])

319

yield commands.FileDeleteCommand(path)

320

elif line.startswith('R '):

321

old, new = self._path_pair(line[2:])

322

yield commands.FileRenameCommand(old, new)

323

elif line.startswith('C '):

324

src, dest = self._path_pair(line[2:])

325

yield commands.FileCopyCommand(src, dest)

326

elif line.startswith('deleteall'):

327

yield commands.FileDeleteAllCommand()

328

else:

329

self.push_line(line)

330

break

331

332

def _parse_blob(self):

333

"""Parse a blob command."""

334

lineno = self.lineno

335

mark = self._get_mark_if_any()

336

data = self._get_data('blob')

337

return commands.BlobCommand(mark, data, lineno)

338

339

def _parse_commit(self, ref):

340

"""Parse a commit command."""

341

lineno = self.lineno

342

mark = self._get_mark_if_any()

343

author = self._get_user_info('commit', 'author', False)

344

more_authors = []

345

while True:

346

another_author = self._get_user_info('commit', 'author', False)

347

if another_author is not None:

348

more_authors.append(another_author)

349

else:

350

break

351

committer = self._get_user_info('commit', 'committer')

352

message = self._get_data('commit', 'message')

353

try:

354

message = message.decode('utf_8')

355

except UnicodeDecodeError:

356

self.warning(

357

"commit message not in utf8 - replacing unknown characters")

358

message = message.decode('utf_8', 'replace')

359

from_ = self._get_from()

360

merges = []

361

while True:

362

merge = self._get_merge()

363

if merge is not None:

364

# while the spec suggests it's illegal, git-fast-export

365

# outputs multiple merges on the one line, e.g.

366

# merge :x :y :z

367

these_merges = merge.split(" ")

368

merges.extend(these_merges)

369

else:

370

break

371

properties = {}

372

while True:

373

name_value = self._get_property()

374

if name_value is not None:

375

name, value = name_value

376

properties[name] = value

377

else:

378

break

379

return commands.CommitCommand(ref, mark, author, committer, message,

380

from_, merges, self.iter_file_commands, lineno=lineno,

381

more_authors=more_authors, properties=properties)

382

383

def _parse_feature(self, info):

384

"""Parse a feature command."""

385

parts = info.split("=", 1)

386

name = parts[0]

387

if len(parts) > 1:

388

value = self._path(parts[1])

389

else:

390

value = None

391

return commands.FeatureCommand(name, value, lineno=self.lineno)

392

393

def _parse_file_modify(self, info):

394

"""Parse a filemodify command within a commit.

395

396

:param info: a string in the format "mode dataref path"

397

(where dataref might be the hard-coded literal 'inline').

398

"""

399

params = info.split(' ', 2)

400

path = self._path(params[2])

401

is_executable, kind = self._mode(params[0])

402

if params[1] == 'inline':

403

dataref = None

404

data = self._get_data('filemodify')

405

else:

406

dataref = params[1]

407

data = None

408

return commands.FileModifyCommand(path, kind, is_executable, dataref,

409

data)

410

411

def _parse_reset(self, ref):

412

"""Parse a reset command."""

413

from_ = self._get_from()

414

return commands.ResetCommand(ref, from_)

415

416

def _parse_tag(self, name):

417

"""Parse a tag command."""

418

from_ = self._get_from('tag')

419

tagger = self._get_user_info('tag', 'tagger', accept_just_who=True)

420

message = self._get_data('tag', 'message').decode('utf_8')

421

return commands.TagCommand(name, from_, tagger, message)

422

423

def _get_mark_if_any(self):

424

"""Parse a mark section."""

425

line = self.next_line()

426

if line.startswith('mark :'):

427

return line[len('mark :'):]

428

else:

429

self.push_line(line)

430

return None

431

432

def _get_from(self, required_for=None):

433

"""Parse a from section."""

434

line = self.next_line()

435

if line is None:

436

return None

437

elif line.startswith('from '):

438

return line[len('from '):]

439

elif required_for:

440

self.abort(errors.MissingSection, required_for, 'from')

441

else:

442

self.push_line(line)

443

return None

444

445

def _get_merge(self):

446

"""Parse a merge section."""

447

line = self.next_line()

448

if line is None:

449

return None

450

elif line.startswith('merge '):

451

return line[len('merge '):]

452

else:

453

self.push_line(line)

454

return None

455

456

def _get_property(self):

457

"""Parse a property section."""

458

line = self.next_line()

459

if line is None:

460

return None

461

elif line.startswith('property '):

462

return self._name_value(line[len('property '):])

463

else:

464

self.push_line(line)

465

return None

466

467

def _get_user_info(self, cmd, section, required=True,

468

accept_just_who=False):

469

"""Parse a user section."""

470

line = self.next_line()

471

if line.startswith(section + ' '):

472

return self._who_when(line[len(section + ' '):], cmd, section,

473

accept_just_who=accept_just_who)

474

elif required:

475

self.abort(errors.MissingSection, cmd, section)

476

else:

477

self.push_line(line)

478

return None

479

480

def _get_data(self, required_for, section='data'):

481

"""Parse a data section."""

482

line = self.next_line()

483

if line.startswith('data '):

484

rest = line[len('data '):]

485

if rest.startswith('<<'):

486

return self.read_until(rest[2:])

487

else:

488

size = int(rest)

489

read_bytes = self.read_bytes(size)

490

# optional LF after data.

491

next = self.input.readline()

492

self.lineno += 1

493

if len(next) > 1 or next != "\n":

494

self.push_line(next[:-1])

495

return read_bytes

496

else:

497

self.abort(errors.MissingSection, required_for, section)

498

499

def _who_when(self, s, cmd, section, accept_just_who=False):

500

"""Parse who and when information from a string.

501

502

:return: a tuple of (name,email,timestamp,timezone). name may be

503

the empty string if only an email address was given.

504

"""

505

match = _WHO_AND_WHEN_RE.search(s)

506

if match:

507

datestr = match.group(3).lstrip()

508

if self.date_parser is None:

509

# auto-detect the date format

510

if len(datestr.split(' ')) == 2:

511

format = 'raw'

512

elif datestr == 'now':

513

format = 'now'

514

else:

515

format = 'rfc2822'

516

self.date_parser = dates.DATE_PARSERS_BY_NAME[format]

517

try:

518

when = self.date_parser(datestr, self.lineno)

519

except ValueError:

520

print "failed to parse datestr '%s'" % (datestr,)

521

raise

522

else:

523

match = _WHO_RE.search(s)

524

if accept_just_who and match:

525

# HACK around missing time

526

# TODO: output a warning here

527

when = dates.DATE_PARSERS_BY_NAME['now']('now')

528

else:

529

self.abort(errors.BadFormat, cmd, section, s)

530

name = match.group(1)

531

if len(name) > 0:

532

if name[-1] == " ":

533

try:

534

name = name[:-1].decode('utf_8')

535

except UnicodeDecodeError:

536

# The spec says names are *typically* utf8 encoded

537

# but that isn't enforced by git-fast-export (at least)

538

self.warning("%s name not in utf8 - replacing unknown "

539

"characters" % (section,))

540

name = name[:-1].decode('utf_8', 'replace')

541

email = match.group(2)

542

# While it shouldn't happen, some datasets have email addresses

543

# which contain unicode characters. See bug 338186. We sanitize

544

# the data at this level just in case.

545

try:

546

email = email.decode('utf_8')

547

except UnicodeDecodeError:

548

self.warning("%s email not in utf8 - replacing unknown characters"

549

% (section,))

550

email = email.decode('utf_8', 'replace')

551

return (name, email, when[0], when[1])

552

553

def _name_value(self, s):

554

"""Parse a (name,value) tuple from 'name value-length value'."""

555

parts = s.split(' ', 2)

556

name = parts[0]

557

if len(parts) == 1:

558

value = None

559

else:

560

size = int(parts[1])

561

value = parts[2]

562

still_to_read = size - len(value)

563

if still_to_read == 1:

564

value += "\n"

565

elif still_to_read > 0:

566

read_bytes = self.read_bytes(still_to_read - 1)

567

value += "\n" + read_bytes

568

value = value.decode('utf8')

569

return (name, value)

570

571

def _path(self, s):

572

"""Parse a path."""

573

if s.startswith('"'):

574

if s[-1] != '"':

575

self.abort(errors.BadFormat, '?', '?', s)

576

else:

577

return _unquote_c_string(s[1:-1])

578

try:

579

return s.decode('utf_8')

580

except UnicodeDecodeError:

581

# The spec recommends utf8 encoding but that isn't enforced

582

return s

583

584

def _path_pair(self, s):

585

"""Parse two paths separated by a space."""

586

# TODO: handle a space in the first path

587

if s.startswith('"'):

588

parts = s[1:].split('" ', 1)

589

else:

590

parts = s.split(' ', 1)

591

if len(parts) != 2:

592

self.abort(errors.BadFormat, '?', '?', s)

593

elif parts[1].startswith('"') and parts[1].endswith('"'):

594

parts[1] = parts[1][1:-1]

595

elif parts[1].startswith('"') or parts[1].endswith('"'):

596

self.abort(errors.BadFormat, '?', '?', s)

597

return map(_unquote_c_string, parts)

598

599

def _mode(self, s):

600

"""Parse a file mode into executable and kind.

601

602

:return (is_executable, kind)

603

"""

604

# Note: Output from git-fast-export slightly different to spec

605

if s in ['644', '100644', '0100644']:

606

return False, commands.FILE_KIND

607

elif s in ['755', '100755', '0100755']:

608

return True, commands.FILE_KIND

609

elif s in ['040000', '0040000']:

610

return False, commands.DIRECTORY_KIND

611

elif s in ['120000', '0120000']:

612

return False, commands.SYMLINK_KIND

613

elif s in ['160000', '0160000']:

614

return False, commands.TREE_REFERENCE_KIND

615

else:

616

self.abort(errors.BadFormat, 'filemodify', 'mode', s)

617

618

619

def _unquote_c_string(s):

620

"""replace C-style escape sequences (\n, \", etc.) with real chars."""

621

# HACK: Python strings are close enough

622

return s.decode('string_escape', 'replace')

Older »