/brz/remove-bazaar : revision 1711.2.19

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/patiencediff.py

Committer: John Arbash Meinel
Date: 2006-05-28 22:00:33 UTC
mto: (1753.1.3 add) (1711.7.2 win32) (1946.2.6 reduce-knit-churn)
mto: This revision was merged to the branch mainline in revision 1733.
Revision ID: john@arbash-meinel.com-20060528220033-c5baec1556ac120a

switch patience-test to use PatienceSequenceMatcher.

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/configuration.txt

doc/plugins.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

generate_docs.py

patience-test.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

files removed:
.bzrignore

COPYING

INSTALL

Makefile

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

commands.py

converter.py

dir.py

errors.py

fetch.py

foreign

foreign/.bzrignore

foreign/TODO

foreign/__init__.py

foreign/test_versionedfiles.py

foreign/upgrade.py

foreign/versionedfiles.py

mapping.py

notes

notes/roundtripping.txt

remote.py

repository.py

revspec.py

server.py

setup.py

shamap.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_dir.py

tests/test_fetch.py

tests/test_ids.py

tests/test_repository.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/patiencediff.py

#!/usr/bin/env python

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

from bisect import bisect

from copy import copy

import difflib

import os

import sys

import time

from bzrlib.trace import mutter

__all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files']

def unique_lcs(a, b):

"""Find the longest common subset for unique lines.

:param a: An indexable object (such as string or list of strings)

:param b: Another indexable object (such as string or list of strings)

:return: A list of tuples, one for each line which is matched.

[(line_in_a, line_in_b), ...]

This only matches lines which are unique on both sides.

This helps prevent common lines from over influencing match

results.

The longest common subset uses the Patience Sorting algorithm:

http://en.wikipedia.org/wiki/Patience_sorting

"""

# set index[line in a] = position of line in a unless

# unless a is a duplicate, in which case it's set to None

index = {}

for i in xrange(len(a)):

line = a[i]

if line in index:

index[line] = None

else:

index[line]= i

# make btoa[i] = position of line i in a, unless

# that line doesn't occur exactly once in both,

# in which case it's set to None

btoa = [None] * len(b)

index2 = {}

for pos, line in enumerate(b):

next = index.get(line)

if next is not None:

if line in index2:

# unset the previous mapping, which we now know to

# be invalid because the line isn't unique

btoa[index2[line]] = None

del index[line]

else:

index2[line] = pos

btoa[pos] = next

# this is the Patience sorting algorithm

# see http://en.wikipedia.org/wiki/Patience_sorting

backpointers = [None] * len(b)

stacks = []

lasts = []

k = 0

for bpos, apos in enumerate(btoa):

if apos is None:

continue

# as an optimization, check if the next line comes at the end,

# because it usually does

if stacks and stacks[-1] < apos:

k = len(stacks)

# as an optimization, check if the next line comes right after

# the previous line, because usually it does

elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or

stacks[k+1] > apos):

k += 1

else:

k = bisect(stacks, apos)

if k > 0:

backpointers[bpos] = lasts[k-1]

if k < len(stacks):

stacks[k] = apos

lasts[k] = bpos

else:

stacks.append(apos)

lasts.append(bpos)

if len(lasts) == 0:

100

return []

101

result = []

102

k = lasts[-1]

103

while k is not None:

104

result.append((btoa[k], k))

105

k = backpointers[k]

106

result.reverse()

107

return result

108

109

110

def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):

111

"""Find all of the matching text in the lines of a and b.

112

113

:param a: A sequence

114

:param b: Another sequence

115

:param ahi: The maximum length of a to check, typically len(a)

116

:param bhi: The maximum length of b to check, typically len(b)

117

:param answer: The return array. Will be filled with tuples

118

indicating [(line_in_a, line_in_b)]

119

:param maxrecursion: The maximum depth to recurse.

120

Must be a positive integer.

121

:return: None, the return value is in the parameter answer, which

122

should be a list

123

124

"""

125

if maxrecursion < 0:

126

mutter('max recursion depth reached')

127

# this will never happen normally, this check is to prevent DOS attacks

128

return

129

oldlength = len(answer)

130

if len(answer) == 0:

131

alo, blo = 0, 0

132

else:

133

alo, blo = answer[-1]

134

alo += 1

135

blo += 1

136

if alo == ahi or blo == bhi:

137

return

138

last_a_pos = -1

139

last_b_pos = -1

140

for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):

141

# recurse between lines which are unique in each file and match

142

apos += alo

143

bpos += blo

144

# Most of the time, you will have a sequence of similar entries

145

if last_a_pos+1 != apos or last_b_pos+1 != bpos:

146

recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1)

147

last_a_pos = apos

148

last_b_pos = bpos

149

answer.append((apos, bpos))

150

if len(answer) > oldlength:

151

# find matches between the last match and the end

152

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

153

elif a[alo] == b[blo]:

154

# find matching lines at the very beginning

155

while alo < ahi and blo < bhi and a[alo] == b[blo]:

156

answer.append((alo, blo))

157

alo += 1

158

blo += 1

159

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

160

elif a[ahi - 1] == b[bhi - 1]:

161

# find matching lines at the very end

162

nahi = ahi - 1

163

nbhi = bhi - 1

164

while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:

165

nahi -= 1

166

nbhi -= 1

167

recurse_matches(a, b, nahi, nbhi, answer, maxrecursion - 1)

168

for i in xrange(ahi - nahi):

169

answer.append((nahi + i, nbhi + i))

170

171

172

class PatienceSequenceMatcher(difflib.SequenceMatcher):

173

"""Compare a pair of sequences using longest common subset."""

174

175

def __init__(self, isjunk=None, a='', b=''):

176

if isjunk is not None:

177

raise NotImplementedError('Currently we do not support'

178

' isjunk for sequence matching')

179

difflib.SequenceMatcher.__init__(self, isjunk, a, b)

180

181

def _check_with_diff(self, alo, ahi, blo, bhi, answer):

182

"""Use the original diff algorithm on an unmatched section.

183

184

This will check to make sure the range is worth checking,

185

before doing any work.

186

187

:param alo: The last line that actually matched

188

:param ahi: The next line that actually matches

189

:param blo: Same as alo, only for the 'b' set

190

:param bhi: Same as ahi

191

:param answer: An array which will have the new ranges appended to it

192

:return: None

193

"""

194

# WORKAROUND

195

# recurse_matches has an implementation design

196

# which does not match non-unique lines in the

197

# if they do not touch matching unique lines

198

# so we rerun the regular diff algorithm

199

# if find a large enough chunk.

200

201

# recurse_matches already looked at the direct

202

# neighbors, so we only need to run if there is

203

# enough space to do so

204

if ahi - alo > 2 and bhi - blo > 2:

205

a = self.a[alo+1:ahi-1]

206

b = self.b[blo+1:bhi-1]

207

m = difflib.SequenceMatcher(None, a, b)

208

new_blocks = m.get_matching_blocks()

209

# difflib always adds a final match

210

new_blocks.pop()

211

for blk in new_blocks:

212

answer.append((blk[0]+alo+1,

213

blk[1]+blo+1,

214

blk[2]))

215

216

def get_matching_blocks(self):

217

"""Return list of triples describing matching subsequences.

218

219

Each triple is of the form (i, j, n), and means that

220

a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in

221

i and in j.

222

223

The last triple is a dummy, (len(a), len(b), 0), and is the only

224

triple with n==0.

225

226

>>> s = PatienceSequenceMatcher(None, "abxcd", "abcd")

227

>>> s.get_matching_blocks()

228

[(0, 0, 2), (3, 2, 2), (5, 4, 0)]

229

"""

230

# jam 20060525 This is the python 2.4.1 difflib get_matching_blocks

231

# implementation which uses __helper. 2.4.3 got rid of helper for

232

# doing it inline with a queue.

233

# We should consider doing the same for recurse_matches

234

235

if self.matching_blocks is not None:

236

return self.matching_blocks

237

self.matching_blocks = []

238

la, lb = len(self.a), len(self.b)

239

self._find_matching_blocks(0, la, 0, lb, self.matching_blocks)

240

self.matching_blocks.append( (la, lb, 0) )

241

return self.matching_blocks

242

243

def _find_matching_blocks(self, alo, ahi, blo, bhi, answer):

244

matches = []

245

a = self.a[alo:ahi]

246

b = self.b[blo:bhi]

247

recurse_matches(a, b, len(a), len(b), matches, 10)

248

# Matches now has individual line pairs of

249

# line A matches line B, at the given offsets

250

251

start_a = start_b = None

252

length = 0

253

for i_a, i_b in matches:

254

if (start_a is not None

255

and (i_a == start_a + length)

256

and (i_b == start_b + length)):

257

length += 1

258

else:

259

# New block

260

if start_a is None:

261

# We need to check from 0,0 until the current match

262

self._check_with_diff(alo-1, i_a+alo, blo-1, i_b+blo,

263

answer)

264

else:

265

answer.append((start_a+alo, start_b+blo, length))

266

self._check_with_diff(start_a+alo+length, i_a+alo,

267

start_b+blo+length, i_b+blo,

268

answer)

269

270

start_a = i_a

271

start_b = i_b

272

length = 1

273

274

if length != 0:

275

answer.append((start_a+alo, start_b+blo, length))

276

self._check_with_diff(start_a+alo+length, ahi+1,

277

start_b+blo+length, bhi+1,

278

answer)

279

if not matches:

280

# Nothing matched, so we need to send the complete text

281

self._check_with_diff(alo-1, ahi+1, blo-1, bhi+1, answer)

282

283

# For consistency sake, make sure all matches are only increasing

284

if __debug__:

285

next_a = -1

286

next_b = -1

287

for a,b,match_len in answer:

288

assert a >= next_a, 'Non increasing matches for a'

289

assert b >= next_b, 'Not increasing matches for b'

290

next_a = a + match_len

291

next_b = b + match_len

292

293

294

# This is a version of unified_diff which only adds a factory parameter

295

# so that you can override the default SequenceMatcher

296

# this has been submitted as a patch to python

297

def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',

298

tofiledate='', n=3, lineterm='\n',

299

sequencematcher=None):

300

r"""

301

Compare two sequences of lines; generate the delta as a unified diff.

302

303

Unified diffs are a compact way of showing line changes and a few

304

lines of context. The number of context lines is set by 'n' which

305

defaults to three.

306

307

By default, the diff control lines (those with ---, +++, or @@) are

308

created with a trailing newline. This is helpful so that inputs

309

created from file.readlines() result in diffs that are suitable for

310

file.writelines() since both the inputs and outputs have trailing

311

newlines.

312

313

For inputs that do not have trailing newlines, set the lineterm

314

argument to "" so that the output will be uniformly newline free.

315

316

The unidiff format normally has a header for filenames and modification

317

times. Any or all of these may be specified using strings for

318

'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification

319

times are normally expressed in the format returned by time.ctime().

320

321

Example:

322

323

>>> for line in unified_diff('one two three four'.split(),

324

... 'zero one tree four'.split(), 'Original', 'Current',

325

... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',

326

... lineterm=''):

327

... print line

328

--- Original Sat Jan 26 23:30:50 1991

329

+++ Current Fri Jun 06 10:20:52 2003

330

@@ -1,4 +1,4 @@

331

+zero

332

one

333

-two

334

-three

335

+tree

336

four

337

"""

338

if sequencematcher is None:

339

sequencematcher = difflib.SequenceMatcher

340

341

started = False

342

for group in sequencematcher(None,a,b).get_grouped_opcodes(n):

343

if not started:

344

yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)

345

yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)

346

started = True

347

i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]

348

yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)

349

for tag, i1, i2, j1, j2 in group:

350

if tag == 'equal':

351

for line in a[i1:i2]:

352

yield ' ' + line

353

continue

354

if tag == 'replace' or tag == 'delete':

355

for line in a[i1:i2]:

356

yield '-' + line

357

if tag == 'replace' or tag == 'insert':

358

for line in b[j1:j2]:

359

yield '+' + line

360

361

362

def unified_diff_files(a, b, sequencematcher=None):

363

"""Generate the diff for two files.

364

"""

365

# Should this actually be an error?

366

if a == b:

367

return []

368

if a == '-':

369

file_a = sys.stdin

370

time_a = time.time()

371

else:

372

file_a = open(a, 'rb')

373

time_a = os.stat(a).st_mtime

374

375

if b == '-':

376

file_b = sys.stdin

377

time_b = time.time()

378

else:

379

file_b = open(b, 'rb')

380

time_b = os.stat(b).st_mtime

381

382

# TODO: Include fromfiledate and tofiledate

383

return unified_diff(file_a.readlines(), file_b.readlines(),

384

fromfile=a, tofile=b,

385

sequencematcher=sequencematcher)

386

387

388

def main(args):

389

import optparse

390

p = optparse.OptionParser(usage='%prog [options] file_a file_b'

391

'\nFiles can be "-" to read from stdin')

392

p.add_option('--patience', dest='matcher', action='store_const', const='patience',

393

default='patience', help='Use the patience difference algorithm')

394

p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',

395

default='patience', help='Use python\'s difflib algorithm')

396

397

algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}

398

399

(opts, args) = p.parse_args(args)

400

matcher = algorithms[opts.matcher]

401

402

if len(args) != 2:

403

print 'You must supply 2 filenames to diff'

404

return -1

405

406

for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):

407

sys.stdout.write(line)

408

409

if __name__ == '__main__':

410

sys.exit(main(sys.argv[1:]))

Older »