/brz/remove-bazaar : revision 0.76.2

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to parser.py

Committer: Ian Clatworthy
Date: 2009-02-17 23:37:24 UTC
mto: (0.64.114 trunk)
mto: This revision was merged to the branch mainline in revision 6631.
Revision ID: ian.clatworthy@canonical.com-20090217233724-y6q12cyoyln6vkh6

code & tests for file copying

files added:
.bzrignore

COPYING.txt

NEWS

README.txt

__init__.py

commands.py

dates.py

doc/notes.txt

errors.py

exporters

exporters/Makefile

exporters/bzr-fast-export

exporters/bzr-fast-export.LICENSE

exporters/bzr-fast-export.README

exporters/hg-fast-export.README

exporters/hg-fast-export.py

exporters/hg-fast-export.sh

exporters/hg2git.py

exporters/svn-archive.c

exporters/svn-fast-export.c

exporters/svn-fast-export.py

helpers.py

idmapfile.py

parser.py

processor.py

processors

processors/__init__.py

processors/generic_processor.py

processors/info_processor.py

processors/query_processor.py

revisionloader.py

setup.py

tests

tests/__init__.py

tests/test_errors.py

tests/test_generic_processor.py

tests/test_parser.py

files removed:
.bzrignore

.rsyncexclude

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_basis_inventory.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

generate_docs.py

notes

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

tutorial.txt

Show diffs side-by-side

added added

removed removed

parser.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Parser of import data into command objects.

In order to reuse existing front-ends, the stream format is a subset of

the one used by git-fast-import (as of the 1.5.4 release of git at least).

The grammar is:

stream ::= cmd*;

cmd ::= new_blob

| new_commit

| new_tag

| reset_branch

| checkpoint

| progress

;

new_blob ::= 'blob' lf

mark?

file_content;

file_content ::= data;

new_commit ::= 'commit' sp ref_str lf

mark?

('author' sp name '<' email '>' when lf)?

'committer' sp name '<' email '>' when lf

commit_msg

('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?

('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)*

file_change*

lf?;

commit_msg ::= data;

file_change ::= file_clr

| file_del

| file_rnm

| file_cpy

| file_obm

| file_inm;

file_clr ::= 'deleteall' lf;

file_del ::= 'D' sp path_str lf;

file_rnm ::= 'R' sp path_str sp path_str lf;

file_cpy ::= 'C' sp path_str sp path_str lf;

file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;

file_inm ::= 'M' sp mode sp 'inline' sp path_str lf

data;

new_tag ::= 'tag' sp tag_str lf

'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf

'tagger' sp name '<' email '>' when lf

tag_msg;

tag_msg ::= data;

reset_branch ::= 'reset' sp ref_str lf

('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?

lf?;

checkpoint ::= 'checkpoint' lf

lf?;

progress ::= 'progress' sp not_lf* lf

lf?;

# note: the first idnum in a stream should be 1 and subsequent

# idnums should not have gaps between values as this will cause

# the stream parser to reserve space for the gapped values. An

# idnum can be updated in the future to a new object by issuing

# a new mark directive with the old idnum.

mark ::= 'mark' sp idnum lf;

data ::= (delimited_data | exact_data)

lf?;

# note: delim may be any string but must not contain lf.

# data_line may contain any data but must not be exactly

# delim.

delimited_data ::= 'data' sp '<<' delim lf

(data_line lf)*

delim lf;

# note: declen indicates the length of binary_data in bytes.

# declen does not include the lf preceeding the binary data.

exact_data ::= 'data' sp declen lf

100

binary_data;

101

102

# note: quoted strings are C-style quoting supporting \c for

103

# common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn

104

# is the signed byte value in octal. Note that the only

105

# characters which must actually be escaped to protect the

106

# stream formatting is: \, " and LF. Otherwise these values

107

# are UTF8.

108

109

ref_str ::= ref;

110

sha1exp_str ::= sha1exp;

111

tag_str ::= tag;

112

path_str ::= path | '"' quoted(path) '"' ;

113

mode ::= '100644' | '644'

114

| '100755' | '755'

115

| '120000'

116

;

117

118

declen ::= # unsigned 32 bit value, ascii base10 notation;

119

bigint ::= # unsigned integer value, ascii base10 notation;

120

binary_data ::= # file content, not interpreted;

121

122

when ::= raw_when | rfc2822_when;

123

raw_when ::= ts sp tz;

124

rfc2822_when ::= # Valid RFC 2822 date and time;

125

126

sp ::= # ASCII space character;

127

lf ::= # ASCII newline (LF) character;

128

129

# note: a colon (':') must precede the numerical value assigned to

130

# an idnum. This is to distinguish it from a ref or tag name as

131

# GIT does not permit ':' in ref or tag strings.

132

133

idnum ::= ':' bigint;

134

path ::= # GIT style file path, e.g. "a/b/c";

135

ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";

136

tag ::= # GIT tag name, e.g. "FIREFOX_1_5";

137

sha1exp ::= # Any valid GIT SHA1 expression;

138

hexsha1 ::= # SHA1 in hexadecimal format;

139

140

# note: name and email are UTF8 strings, however name must not

141

# contain '<' or lf and email must not contain any of the

142

# following: '<', '>', lf.

143

144

name ::= # valid GIT author/committer name;

145

email ::= # valid GIT author/committer email;

146

ts ::= # time since the epoch in seconds, ascii base10 notation;

147

tz ::= # GIT style timezone;

148

149

# note: comments may appear anywhere in the input, except

150

# within a data command. Any form of the data command

151

# always escapes the related input from comment processing.

152

153

# In case it is not clear, the '#' that starts the comment

154

# must be the first character on that the line (an lf have

155

# preceeded it).

156

157

comment ::= '#' not_lf* lf;

158

not_lf ::= # Any byte that is not ASCII newline (LF);

159

"""

160

161

162

import re

163

import sys

164

165

import commands

166

import dates

167

import errors

168

169

170

## Stream parsing ##

171

172

class LineBasedParser(object):

173

174

def __init__(self, input):

175

"""A Parser that keeps track of line numbers.

176

177

:param input: the file-like object to read from

178

"""

179

self.input = input

180

self.lineno = 0

181

# Lines pushed back onto the input stream

182

self._buffer = []

183

184

def abort(self, exception, *args):

185

"""Raise an exception providing line number information."""

186

raise exception(self.lineno, *args)

187

188

def readline(self):

189

"""Get the next line including the newline or '' on EOF."""

190

self.lineno += 1

191

if self._buffer:

192

return self._buffer.pop()

193

else:

194

return self.input.readline()

195

196

def next_line(self):

197

"""Get the next line without the newline or None on EOF."""

198

line = self.readline()

199

if line:

200

return line[:-1]

201

else:

202

return None

203

204

def push_line(self, line):

205

"""Push line back onto the line buffer.

206

207

:param line: the line with no trailing newline

208

"""

209

self.lineno -= 1

210

self._buffer.append(line + "\n")

211

212

def read_bytes(self, count):

213

"""Read a given number of bytes from the input stream.

214

215

Throws MissingBytes if the bytes are not found.

216

217

Note: This method does not read from the line buffer.

218

219

:return: a string

220

"""

221

lines = ''

222

left = count

223

found = 0

224

while left > 0:

225

line = self.input.readline(left)

226

if line:

227

line_len = len(line)

228

left -= line_len

229

found += line_len

230

lines += line

231

if line.endswith('\n'):

232

self.lineno += 1

233

else:

234

left = 0

235

if found != count:

236

self.abort(errors.MissingBytes, count, found)

237

return lines

238

239

def read_until(self, terminator):

240

"""Read the input stream until the terminator is found.

241

242

Throws MissingTerminator if the terminator is not found.

243

244

Note: This method does not read from the line buffer.

245

246

:return: the bytes read up to but excluding the terminator.

247

"""

248

raise NotImplementedError(self.read_until)

249

250

251

# Regular expression used for parsing. (Note: The spec states that the name

252

# part should be non-empty but git-fast-export doesn't always do that so

253

# the first bit is \w*, not \w+.) Also git-fast-import code says the

254

# space before the email is optional.

255

_WHO_AND_WHEN_RE = re.compile(r'([^<]*)<(.*)> (.+)')

256

_WHO_RE = re.compile(r'([^<]*)<(.*)>')

257

258

259

class ImportParser(LineBasedParser):

260

261

def __init__(self, input, verbose=False, output=sys.stdout):

262

"""A Parser of import commands.

263

264

:param input: the file-like object to read from

265

:param verbose: display extra information of not

266

:param output: the file-like object to write messages to (YAGNI?)

267

"""

268

LineBasedParser.__init__(self, input)

269

self.verbose = verbose

270

self.output = output

271

# We auto-detect the date format when a date is first encountered

272

self.date_parser = None

273

274

def iter_commands(self):

275

"""Iterator returning ImportCommand objects."""

276

while True:

277

line = self.next_line()

278

if line is None:

279

break

280

elif len(line) == 0 or line.startswith('#'):

281

continue

282

# Search for commands in order of likelihood

283

elif line.startswith('commit '):

284

yield self._parse_commit(line[len('commit '):])

285

elif line.startswith('blob'):

286

yield self._parse_blob()

287

elif line.startswith('progress '):

288

yield commands.ProgressCommand(line[len('progress '):])

289

elif line.startswith('reset '):

290

yield self._parse_reset(line[len('reset '):])

291

elif line.startswith('tag '):

292

yield self._parse_tag(line[len('tag '):])

293

elif line.startswith('checkpoint'):

294

yield commands.CheckpointCommand()

295

else:

296

self.abort(errors.InvalidCommand, line)

297

298

def iter_file_commands(self):

299

"""Iterator returning FileCommand objects.

300

301

If an invalid file command is found, the line is silently

302

pushed back and iteration ends.

303

"""

304

while True:

305

line = self.next_line()

306

if line is None:

307

break

308

elif len(line) == 0 or line.startswith('#'):

309

continue

310

# Search for file commands in order of likelihood

311

elif line.startswith('M '):

312

yield self._parse_file_modify(line[2:])

313

elif line.startswith('D '):

314

path = self._path(line[2:])

315

yield commands.FileDeleteCommand(path)

316

elif line.startswith('R '):

317

old, new = self._path_pair(line[2:])

318

yield commands.FileRenameCommand(old, new)

319

elif line.startswith('C '):

320

src, dest = self._path_pair(line[2:])

321

yield commands.FileCopyCommand(src, dest)

322

elif line.startswith('deleteall'):

323

yield commands.FileDeleteAllCommand()

324

else:

325

self.push_line(line)

326

break

327

328

def _parse_blob(self):

329

"""Parse a blob command."""

330

lineno = self.lineno

331

mark = self._get_mark_if_any()

332

data = self._get_data('blob')

333

return commands.BlobCommand(mark, data, lineno)

334

335

def _parse_commit(self, ref):

336

"""Parse a commit command."""

337

lineno = self.lineno

338

mark = self._get_mark_if_any()

339

author = self._get_user_info('commit', 'author', False)

340

committer = self._get_user_info('commit', 'committer')

341

message = self._get_data('commit', 'message')

342

try:

343

message = message.decode('utf_8')

344

except UnicodeDecodeError:

345

# TODO: output a warning here about a broken front-end

346

pass

347

from_ = self._get_from()

348

merges = []

349

while True:

350

merge = self._get_merge()

351

if merge is not None:

352

# while the spec suggests it's illegal, git-fast-export

353

# outputs multiple merges on the one line, e.g.

354

# merge :x :y :z

355

these_merges = merge.split(" ")

356

merges.extend(these_merges)

357

else:

358

break

359

return commands.CommitCommand(ref, mark, author, committer, message,

360

from_, merges, self.iter_file_commands, lineno)

361

362

def _parse_file_modify(self, info):

363

"""Parse a filemodify command within a commit.

364

365

:param info: a string in the format "mode dataref path"

366

(where dataref might be the hard-coded literal 'inline').

367

"""

368

params = info.split(' ', 2)

369

path = self._path(params[2])

370

is_executable, is_symlink = self._mode(params[0])

371

if is_symlink:

372

kind = commands.SYMLINK_KIND

373

else:

374

kind = commands.FILE_KIND

375

if params[1] == 'inline':

376

dataref = None

377

data = self._get_data('filemodify')

378

else:

379

dataref = params[1]

380

data = None

381

return commands.FileModifyCommand(path, kind, is_executable, dataref,

382

data)

383

384

def _parse_reset(self, ref):

385

"""Parse a reset command."""

386

from_ = self._get_from()

387

return commands.ResetCommand(ref, from_)

388

389

def _parse_tag(self, name):

390

"""Parse a tag command."""

391

from_ = self._get_from('tag')

392

tagger = self._get_user_info('tag', 'tagger', accept_just_who=True)

393

message = self._get_data('tag', 'message').decode('utf_8')

394

return commands.TagCommand(name, from_, tagger, message)

395

396

def _get_mark_if_any(self):

397

"""Parse a mark section."""

398

line = self.next_line()

399

if line.startswith('mark :'):

400

return line[len('mark :'):]

401

else:

402

self.push_line(line)

403

return None

404

405

def _get_from(self, required_for=None):

406

"""Parse a from section."""

407

line = self.next_line()

408

if line.startswith('from '):

409

return line[len('from '):]

410

elif required_for:

411

self.abort(errors.MissingSection, required_for, 'from')

412

else:

413

self.push_line(line)

414

return None

415

416

def _get_merge(self):

417

"""Parse a merge section."""

418

line = self.next_line()

419

if line.startswith('merge '):

420

return line[len('merge '):]

421

else:

422

self.push_line(line)

423

return None

424

425

def _get_user_info(self, cmd, section, required=True,

426

accept_just_who=False):

427

"""Parse a user section."""

428

line = self.next_line()

429

if line.startswith(section + ' '):

430

return self._who_when(line[len(section + ' '):], cmd, section,

431

accept_just_who=accept_just_who)

432

elif required:

433

self.abort(errors.MissingSection, cmd, section)

434

else:

435

self.push_line(line)

436

return None

437

438

def _get_data(self, required_for, section='data'):

439

"""Parse a data section."""

440

line = self.next_line()

441

if line.startswith('data '):

442

rest = line[len('data '):]

443

if rest.startswith('<<'):

444

return self.read_until(rest[2:])

445

else:

446

size = int(rest)

447

read_bytes = self.read_bytes(size)

448

# optional LF after data.

449

next = self.input.readline()

450

self.lineno += 1

451

if len(next) > 1 or next != "\n":

452

self.push_line(next[:-1])

453

return read_bytes

454

else:

455

self.abort(errors.MissingSection, required_for, section)

456

457

def _who_when(self, s, cmd, section, accept_just_who=False):

458

"""Parse who and when information from a string.

459

460

:return: a tuple of (name,email,timestamp,timezone). name may be

461

the empty string if only an email address was given.

462

"""

463

match = _WHO_AND_WHEN_RE.search(s)

464

if match:

465

datestr = match.group(3)

466

if self.date_parser is None:

467

# auto-detect the date format

468

if len(datestr.split(' ')) == 2:

469

format = 'raw'

470

elif datestr == 'now':

471

format = 'now'

472

else:

473

format = 'rfc2822'

474

self.date_parser = dates.DATE_PARSERS_BY_NAME[format]

475

when = self.date_parser(datestr)

476

else:

477

match = _WHO_RE.search(s)

478

if accept_just_who and match:

479

# HACK around missing time

480

# TODO: output a warning here

481

when = dates.DATE_PARSERS_BY_NAME['now']('now')

482

else:

483

self.abort(errors.BadFormat, cmd, section, s)

484

name = match.group(1)

485

if len(name) > 0:

486

if name[-1] == " ":

487

try:

488

name = name[:-1].decode('utf_8')

489

except UnicodeDecodeError:

490

# The spec says names are *typically* utf8 encoded

491

# but that isn't enforced by git-fast-export (at least)

492

name = name[:-1]

493

return (name,match.group(2),when[0],when[1])

494

495

def _path(self, s):

496

"""Parse a path."""

497

if s.startswith('"'):

498

if s[-1] != '"':

499

self.abort(errors.BadFormat, cmd, section, s)

500

else:

501

return _unquote_c_string(s[1:-1])

502

try:

503

return s.decode('utf_8')

504

except UnicodeDecodeError:

505

# The spec recommends utf8 encoding but that isn't enforced

506

return s

507

508

def _path_pair(self, s):

509

"""Parse two paths separated by a space."""

510

# TODO: handle a space in the first path

511

parts = s.split(' ', 1)

512

return map(_unquote_c_string, parts)

513

514

def _mode(self, s):

515

"""Parse a file mode into executable and symlink flags.

516

517

:return (is_executable, is_symlink)

518

"""

519

# Note: Output from git-fast-export slightly different to spec

520

if s in ['644', '100644', '0100644']:

521

return False, False

522

elif s in ['755', '100755', '0100755']:

523

return True, False

524

elif s in ['120000', '0120000']:

525

return False, True

526

else:

527

self.abort(errors.BadFormat, 'filemodify', 'mode', s)

528

529

530

def _unquote_c_string(s):

531

"""replace C-style escape sequences (\n, \", etc.) with real chars."""

532

# HACK: Python strings are close enough

533

return s.decode('string_escape', 'replace')

Older »