/brz/remove-bazaar : revision 1711.2.11

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/patiencediff.py

Committer: John Arbash Meinel
Date: 2006-05-25 14:25:24 UTC
mto: (1753.1.3 add) (1711.7.2 win32) (1946.2.6 reduce-knit-churn)
mto: This revision was merged to the branch mainline in revision 1733.
Revision ID: john@arbash-meinel.com-20060525142524-12593e5c91c5c6e7

Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher

files added:
.bzrignore

.rsyncexclude

BRANCH.TODO

HACKING

INSTALL

Makefile

NEWS

NEWS.developers

README

TODO

build-api

bzrlib

bzrlib/__init__.py

bzrlib/add.py

bzrlib/annotate.py

bzrlib/atomicfile.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_status.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/errors.py

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/identitymap.py

bzrlib/info.py

bzrlib/inter.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/lsprof.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/patiencediff.py

bzrlib/plugin.py

bzrlib/plugins

bzrlib/plugins/__init__.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/revisionspec.py

bzrlib/rio.py

bzrlib/shellcomplete.py

bzrlib/sign_my_commits.py

bzrlib/status.py

bzrlib/store

bzrlib/store/__init__.py

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/store/versioned/__init__.py

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/TestUtil.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_parent.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_hashcache.py

bzrlib/tests/test_http.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge3.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revision.py

bzrlib/tests/test_revisionnamespaces.py

bzrlib/tests/test_revprops.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_add.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_store.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_weave.py

bzrlib/tests/test_whitebox.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/test_xml.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textinv.py

bzrlib/textmerge.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/upgrade.py

bzrlib/util

bzrlib/util/__init__.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/util/configobj/validate.py

bzrlib/util/effbot

bzrlib/util/effbot/__init__.py

bzrlib/util/effbot/org

bzrlib/util/effbot/org/__init__.py

bzrlib/util/effbot/org/gzip_consumer.py

bzrlib/util/effbot/org/http_client.py

bzrlib/util/effbot/org/http_manager.py

bzrlib/util/elementtree

bzrlib/util/elementtree/ElementTree.py

bzrlib/util/elementtree/__init__.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/weave_commands.py

bzrlib/weavefile.py

bzrlib/win32console.py

bzrlib/workingtree.py

bzrlib/xml4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

contrib

contrib/add-bzr-to-baz

contrib/bash

contrib/bash/bzr

contrib/bash/bzr.simple

contrib/create_bzr_rollup.py

contrib/emacs

contrib/emacs/bzr-mode.el

contrib/fortune

contrib/newinventory.py

contrib/pwclient.full

contrib/pwk

contrib/upload-bzr.dev

contrib/zsh

contrib/zsh/_bzr

doc/README.1st

doc/configuration.txt

doc/plugins.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/tutorial.txt

doc/using_aliases.txt

generate_docs.py

patience-test.py

setup.py

tools

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/convertfile.py

tools/convertinv.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_man.py

tools/doc_generate/autodoc_rstx.py

tools/history2revfiles.py

tools/http_client.py

tools/riodemo.py

tools/trace-revisions

tools/weavebench.py

tools/weavemerge.sh

files removed:
.bzrignore

COPYING

HACKING

INSTALL

Makefile

NEWS

README

TODO

__init__.py

branch.py

bzr-receive-pack

bzr-upload-pack

cache.py

commands.py

commit.py

config.py

dir.py

errors.py

fetch.py

help.py

hg.py

info.py

inventory.py

mapping.py

notes

notes/git-serve.txt

notes/mapping.txt

notes/roundtripping.txt

object_store.py

push.py

refs.py

remote.py

repository.py

revspec.py

roundtrip.py

send.py

server.py

setup.py

tests

tests/__init__.py

tests/test_blackbox.py

tests/test_branch.py

tests/test_builder.py

tests/test_cache.py

tests/test_dir.py

tests/test_fetch.py

tests/test_mapping.py

tests/test_object_store.py

tests/test_push.py

tests/test_refs.py

tests/test_remote.py

tests/test_repository.py

tests/test_revspec.py

tests/test_roundtrip.py

tests/test_transportgit.py

transportgit.py

tree.py

versionedfiles.py

workingtree.py

Show diffs side-by-side

added added

removed removed

bzrlib/patiencediff.py

#!/usr/bin/env python

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

from bisect import bisect

from copy import copy

import difflib

import os

import sys

import time

__all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files']

def unique_lcs(a, b):

"""Find the longest common subset for unique lines.

:param a: An indexable object (such as string or list of strings)

:param b: Another indexable object (such as string or list of strings)

:return: A list of tuples, one for each line which is matched.

[(line_in_a, line_in_b), ...]

This only matches lines which are unique on both sides.

This helps prevent common lines from over influencing match

results.

The longest common subset uses the Patience Sorting algorithm:

http://en.wikipedia.org/wiki/Patience_sorting

"""

# set index[line in a] = position of line in a unless

# unless a is a duplicate, in which case it's set to None

index = {}

for i in xrange(len(a)):

line = a[i]

if line in index:

index[line] = None

else:

index[line]= i

# make btoa[i] = position of line i in a, unless

# that line doesn't occur exactly once in both,

# in which case it's set to None

btoa = [None] * len(b)

index2 = {}

for pos, line in enumerate(b):

next = index.get(line)

if next is not None:

if line in index2:

# unset the previous mapping, which we now know to

# be invalid because the line isn't unique

btoa[index2[line]] = None

del index[line]

else:

index2[line] = pos

btoa[pos] = next

# this is the Patience sorting algorithm

# see http://en.wikipedia.org/wiki/Patience_sorting

backpointers = [None] * len(b)

stacks = []

lasts = []

k = 0

for bpos, apos in enumerate(btoa):

if apos is None:

continue

# as an optimization, check if the next line comes at the end,

# because it usually does

if stacks and stacks[-1] < apos:

k = len(stacks)

# as an optimization, check if the next line comes right after

# the previous line, because usually it does

elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or

stacks[k+1] > apos):

k += 1

else:

k = bisect(stacks, apos)

if k > 0:

backpointers[bpos] = lasts[k-1]

if k < len(stacks):

stacks[k] = apos

lasts[k] = bpos

else:

stacks.append(apos)

lasts.append(bpos)

if len(lasts) == 0:

return []

result = []

100

k = lasts[-1]

101

while k is not None:

102

result.append((btoa[k], k))

103

k = backpointers[k]

104

result.reverse()

105

return result

106

107

108

def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):

109

"""Find all of the matching text in the lines of a and b.

110

111

:param a: A sequence

112

:param b: Another sequence

113

:param ahi: The maximum length of a to check, typically len(a)

114

:param bhi: The maximum length of b to check, typically len(b)

115

:param answer: The return array. Will be filled with tuples

116

indicating [(line_in_a), (line_in_b)]

117

:param maxrecursion: The maximum depth to recurse.

118

Must be a positive integer.

119

:return: None, the return value is in the parameter answer, which

120

should be a list

121

122

"""

123

oldlen = len(answer)

124

if maxrecursion < 0:

125

# this will never happen normally, this check is to prevent DOS attacks

126

return

127

oldlength = len(answer)

128

if len(answer) == 0:

129

alo, blo = 0, 0

130

else:

131

alo, blo = answer[-1]

132

alo += 1

133

blo += 1

134

if alo == ahi or blo == bhi:

135

return

136

for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):

137

# recurse between lines which are unique in each file and match

138

apos += alo

139

bpos += blo

140

recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1)

141

answer.append((apos, bpos))

142

if len(answer) > oldlength:

143

# find matches between the last match and the end

144

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

145

elif a[alo] == b[blo]:

146

# find matching lines at the very beginning

147

while alo < ahi and blo < bhi and a[alo] == b[blo]:

148

answer.append((alo, blo))

149

alo += 1

150

blo += 1

151

recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)

152

elif a[ahi - 1] == b[bhi - 1]:

153

# find matching lines at the very end

154

nahi = ahi - 1

155

nbhi = bhi - 1

156

while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:

157

nahi -= 1

158

nbhi -= 1

159

recurse_matches(a, b, nahi, nbhi, answer, maxrecursion - 1)

160

for i in xrange(ahi - nahi):

161

answer.append((nahi + i, nbhi + i))

162

163

164

class PatienceSequenceMatcher(difflib.SequenceMatcher):

165

"""Compare a pair of sequences using longest common subset."""

166

167

def __init__(self, isjunk=None, a='', b=''):

168

if isjunk is not None:

169

raise NotImplementedError('Currently we do not support'

170

' isjunk for sequence matching')

171

difflib.SequenceMatcher.__init__(self, isjunk, a, b)

172

173

def _check_with_diff(self, alo, ahi, blo, bhi, answer):

174

"""Use the original diff algorithm on an unmatched section.

175

176

This will check to make sure the range is worth checking,

177

before doing any work.

178

179

:param alo: The last line that actually matched

180

:param ahi: The next line that actually matches

181

:param blo: Same as alo, only for the 'b' set

182

:param bhi: Same as ahi

183

:param answer: An array which will have the new ranges appended to it

184

:return: None

185

"""

186

# WORKAROUND

187

# recurse_matches has an implementation design

188

# which does not match non-unique lines in the

189

# if they do not touch matching unique lines

190

# so we rerun the regular diff algorithm

191

# if find a large enough chunk.

192

193

# recurse_matches already looked at the direct

194

# neighbors, so we only need to run if there is

195

# enough space to do so

196

if ahi - alo > 2 and bhi - blo > 2:

197

a = self.a[alo+1:ahi-1]

198

b = self.b[blo+1:bhi-1]

199

m = difflib.SequenceMatcher(None, a, b)

200

new_blocks = m.get_matching_blocks()

201

# difflib always adds a final match

202

new_blocks.pop()

203

for blk in new_blocks:

204

answer.append((blk[0]+alo+1,

205

blk[1]+blo+1,

206

blk[2]))

207

208

def get_matching_blocks(self):

209

"""Return list of triples describing matching subsequences.

210

211

Each triple is of the form (i, j, n), and means that

212

a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in

213

i and in j.

214

215

The last triple is a dummy, (len(a), len(b), 0), and is the only

216

triple with n==0.

217

218

>>> s = PatienceSequenceMatcher(None, "abxcd", "abcd")

219

>>> s.get_matching_blocks()

220

[(0, 0, 2), (3, 2, 2), (5, 4, 0)]

221

"""

222

# jam 20060525 This is the python 2.4.1 difflib get_matching_blocks

223

# implementation which uses __helper. 2.4.3 got rid of helper for

224

# doing it inline with a queue.

225

# We should consider doing the same for recurse_matches

226

227

if self.matching_blocks is not None:

228

return self.matching_blocks

229

self.matching_blocks = []

230

la, lb = len(self.a), len(self.b)

231

self.__helper(0, la, 0, lb, self.matching_blocks)

232

self.matching_blocks.append( (la, lb, 0) )

233

return self.matching_blocks

234

235

def __helper(self, alo, ahi, blo, bhi, answer):

236

matches = []

237

a = self.a[alo:ahi]

238

b = self.b[blo:bhi]

239

recurse_matches(a, b, len(a), len(b), matches, 10)

240

# Matches now has individual line pairs of

241

# line A matches line B, at the given offsets

242

243

start_a = start_b = None

244

length = 0

245

for i_a, i_b in matches:

246

if (start_a is not None

247

and (i_a == start_a + length)

248

and (i_b == start_b + length)):

249

length += 1

250

else:

251

# New block

252

if start_a is None:

253

# We need to check from 0,0 until the current match

254

self._check_with_diff(alo-1, i_a+alo, blo-1, i_b+blo,

255

answer)

256

else:

257

answer.append((start_a+alo, start_b+blo, length))

258

self._check_with_diff(start_a+alo+length, i_a+alo,

259

start_b+blo+length, i_b+blo,

260

answer)

261

262

start_a = i_a

263

start_b = i_b

264

length = 1

265

266

if length != 0:

267

answer.append((start_a+alo, start_b+blo, length))

268

self._check_with_diff(start_a+alo+length, ahi+1,

269

start_b+blo+length, bhi+1,

270

answer)

271

if not matches:

272

# Nothing matched, so we need to send the complete text

273

self._check_with_diff(alo-1, ahi+1, blo-1, bhi+1, answer)

274

275

# For consistency sake, make sure all matches are only increasing

276

if __debug__:

277

next_a = -1

278

next_b = -1

279

for a,b,match_len in answer:

280

assert a >= next_a, 'Non increasing matches for a'

281

assert b >= next_b, 'Not increasing matches for b'

282

next_a = a + match_len

283

next_b = b + match_len

284

285

286

# This is a version of unified_diff which only adds a factory parameter

287

# so that you can override the default SequenceMatcher

288

# this has been submitted as a patch to python

289

def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',

290

tofiledate='', n=3, lineterm='\n',

291

sequencematcher=None):

292

r"""

293

Compare two sequences of lines; generate the delta as a unified diff.

294

295

Unified diffs are a compact way of showing line changes and a few

296

lines of context. The number of context lines is set by 'n' which

297

defaults to three.

298

299

By default, the diff control lines (those with ---, +++, or @@) are

300

created with a trailing newline. This is helpful so that inputs

301

created from file.readlines() result in diffs that are suitable for

302

file.writelines() since both the inputs and outputs have trailing

303

newlines.

304

305

For inputs that do not have trailing newlines, set the lineterm

306

argument to "" so that the output will be uniformly newline free.

307

308

The unidiff format normally has a header for filenames and modification

309

times. Any or all of these may be specified using strings for

310

'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification

311

times are normally expressed in the format returned by time.ctime().

312

313

Example:

314

315

>>> for line in unified_diff('one two three four'.split(),

316

... 'zero one tree four'.split(), 'Original', 'Current',

317

... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',

318

... lineterm=''):

319

... print line

320

--- Original Sat Jan 26 23:30:50 1991

321

+++ Current Fri Jun 06 10:20:52 2003

322

@@ -1,4 +1,4 @@

323

+zero

324

one

325

-two

326

-three

327

+tree

328

four

329

"""

330

if sequencematcher is None:

331

sequencematcher = difflib.SequenceMatcher

332

333

started = False

334

for group in sequencematcher(None,a,b).get_grouped_opcodes(n):

335

if not started:

336

yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)

337

yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)

338

started = True

339

i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]

340

yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)

341

for tag, i1, i2, j1, j2 in group:

342

if tag == 'equal':

343

for line in a[i1:i2]:

344

yield ' ' + line

345

continue

346

if tag == 'replace' or tag == 'delete':

347

for line in a[i1:i2]:

348

yield '-' + line

349

if tag == 'replace' or tag == 'insert':

350

for line in b[j1:j2]:

351

yield '+' + line

352

353

354

def unified_diff_files(a, b, sequencematcher=None):

355

"""Generate the diff for two files.

356

"""

357

# Should this actually be an error?

358

if a == b:

359

return []

360

if a == '-':

361

file_a = sys.stdin

362

time_a = time.time()

363

else:

364

file_a = open(a, 'rb')

365

time_a = os.stat(a).st_mtime

366

367

if b == '-':

368

file_b = sys.stdin

369

time_b = time.time()

370

else:

371

file_b = open(b, 'rb')

372

time_b = os.stat(b).st_mtime

373

374

# TODO: Include fromfiledate and tofiledate

375

return unified_diff(file_a.readlines(), file_b.readlines(),

376

fromfile=a, tofile=b,

377

sequencematcher=sequencematcher)

378

379

380

def main(args):

381

import optparse

382

p = optparse.OptionParser(usage='%prog [options] file_a file_b'

383

'\nFiles can be "-" to read from stdin')

384

p.add_option('--patience', dest='matcher', action='store_const', const='patience',

385

default='patience', help='Use the patience difference algorithm')

386

p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',

387

default='patience', help='Use python\'s difflib algorithm')

388

389

algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}

390

391

(opts, args) = p.parse_args(args)

392

matcher = algorithms[opts.matcher]

393

394

if len(args) != 2:

395

print 'You must supply 2 filenames to diff'

396

return -1

397

398

for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):

399

sys.stdout.write(line)

400

401

if __name__ == '__main__':

402

sys.exit(main(sys.argv[1:]))

Older »