2
Copyright (C) 2007, 2010 Canonical Ltd
4
This program is free software; you can redistribute it and/or modify
5
it under the terms of the GNU General Public License as published by
6
the Free Software Foundation; either version 2 of the License, or
7
(at your option) any later version.
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program; if not, write to the Free Software
16
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
Function equate_lines based on bdiff.c from Mercurial.
19
Copyright (C) 2005, 2006 Matt Mackall <mpm@selenic.com>
21
Functions unique_lcs/recurse_matches based on _patiencediff_py.py.
22
Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd
30
#include "python-compat.h"
34
# define inline __inline__
35
#elif defined(_MSC_VER)
36
# define inline __inline
42
#define MIN(a, b) (((a) > (b)) ? (b) : (a))
43
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
57
/* values from this array need to correspont to the order of the enum above */
58
static char *opcode_names[] = {
67
long hash; /* hash code of the string/object */
68
Py_ssize_t next; /* next line from the same equivalence class */
69
Py_ssize_t equiv; /* equivalence class */
75
Py_ssize_t a_head; /* first item in `a` from this equivalence class */
77
Py_ssize_t b_head; /* first item in `b` from this equivalence class */
85
Py_ssize_t last_a_pos;
86
Py_ssize_t last_b_pos;
91
struct matching_line {
92
Py_ssize_t a; /* index of the line in `a` */
93
Py_ssize_t b; /* index of the line in `b` */
97
struct matching_block {
98
Py_ssize_t a; /* index of the first line in `a` */
99
Py_ssize_t b; /* index of the first line in `b` */
100
Py_ssize_t len; /* length of the block */
104
struct matching_blocks {
105
struct matching_block *matches;
125
struct hashtable hashtable;
126
Py_ssize_t *backpointers;
127
} PatienceSequenceMatcher;
130
static inline Py_ssize_t
131
bisect_left(Py_ssize_t *list, Py_ssize_t item, Py_ssize_t lo, Py_ssize_t hi)
134
Py_ssize_t mid = lo / 2 + hi / 2 + (lo % 2 + hi % 2) / 2;
135
if (list[mid] < item)
145
compare_lines(struct line *a, struct line *b)
147
return ((a->hash != b->hash)
148
|| PyObject_RichCompareBool(a->data, b->data, Py_EQ) == 0);
153
find_equivalence_class(struct bucket *hashtable, Py_ssize_t hsize,
154
struct line *lines, struct line *ref_lines,
158
for (j = lines[i].hash & hsize; hashtable[j].b_head != SENTINEL; j = (j + 1) & hsize) {
159
if (!compare_lines(lines + i, ref_lines + hashtable[j].b_head)) {
168
equate_lines(struct hashtable *result,
169
struct line *lines_a, struct line *lines_b,
170
Py_ssize_t asize, Py_ssize_t bsize)
172
Py_ssize_t i, j, hsize;
173
struct bucket *hashtable;
175
/* check for overflow, we need the table to be at least bsize+1 */
176
if (bsize == PY_SSIZE_T_MAX) {
177
PyErr_SetNone(PyExc_OverflowError);
181
/* build a hash table of the next highest power of 2 */
183
while (hsize < bsize + 1)
187
hashtable = (struct bucket *)malloc(sizeof(struct bucket) * hsize);
188
if (hashtable == NULL) {
193
/* initialise the hashtable */
194
for (i = 0; i < hsize; i++) {
195
hashtable[i].a_count = 0;
196
hashtable[i].b_count = 0;
197
hashtable[i].a_head = SENTINEL;
198
hashtable[i].b_head = SENTINEL;
202
/* add lines from lines_b to the hash table chains. iterating
203
backwards so the matching lines are sorted to the linked list
204
by the line number (because we are adding new lines to the
206
for (i = bsize - 1; i >= 0; i--) {
207
/* find the first hashtable entry, which is either empty or contains
208
the same line as lines_b[i] */
209
j = find_equivalence_class(hashtable, hsize, lines_b, lines_b, i);
211
/* set the equivalence class */
212
lines_b[i].equiv = j;
214
/* add to the head of the equivalence class */
215
lines_b[i].next = hashtable[j].b_head;
216
hashtable[j].b_head = i;
217
hashtable[j].b_count++;
220
/* match items from lines_a to their equivalence class in lines_b.
221
again, iterating backwards for the right order of the linked lists */
222
for (i = asize - 1; i >= 0; i--) {
223
/* find the first hash entry, which is either empty or contains
224
the same line as lines_a[i] */
225
j = find_equivalence_class(hashtable, hsize, lines_a, lines_b, i);
227
/* set the equivalence class, even if we are not interested in this
228
line, because the values are not pre-filled */
229
lines_a[i].equiv = j;
231
/* we are not interested in lines which are not also in lines_b */
232
if (hashtable[j].b_head == SENTINEL)
235
/* add to the head of the equivalence class */
236
lines_a[i].next = hashtable[j].a_head;
237
hashtable[j].a_head = i;
238
hashtable[j].a_count++;
241
result->last_a_pos = -1;
242
result->last_b_pos = -1;
243
result->size = hsize + 1;
244
result->table = hashtable;
251
/* Finds longest common subsequence of unique lines in a[alo:ahi] and
253
Parameter backpointers must have allocated memory for at least
254
4 * (bhi - blo) ints. */
256
unique_lcs(struct matching_line *answer,
257
struct hashtable *hashtable, Py_ssize_t *backpointers,
258
struct line *lines_a, struct line *lines_b,
259
Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi)
261
Py_ssize_t i, k, equiv, apos, bpos, norm_apos, norm_bpos, bsize, stacksize;
262
Py_ssize_t *stacks, *lasts, *btoa;
268
h = hashtable->table;
270
/* "unpack" the allocated memory */
271
stacks = backpointers + bsize;
272
lasts = stacks + bsize;
273
btoa = lasts + bsize;
275
/* initialise the backpointers */
276
for (i = 0; i < bsize; i++)
277
backpointers[i] = SENTINEL;
279
if (hashtable->last_a_pos == -1 || hashtable->last_a_pos > alo)
280
for (i = 0; i < hashtable->size; i++)
281
h[i].a_pos = h[i].a_head;
282
hashtable->last_a_pos = alo;
284
if (hashtable->last_b_pos == -1 || hashtable->last_b_pos > blo)
285
for (i = 0; i < hashtable->size; i++)
286
h[i].b_pos = h[i].b_head;
287
hashtable->last_b_pos = blo;
289
for (bpos = blo; bpos < bhi; bpos++) {
290
equiv = lines_b[bpos].equiv;
292
/* no lines in a or b */
293
if (h[equiv].a_count == 0 || h[equiv].b_count == 0)
296
/* find an unique line in lines_a that matches lines_b[bpos]
297
if we find more than one line within the range alo:ahi,
298
jump to the next line from lines_b immediately */
300
/* loop through all lines in the linked list */
301
for (i = h[equiv].a_pos; i != SENTINEL; i = lines_a[i].next) {
302
/* the index is lower than alo, continue to the next line */
307
/* the index is higher than ahi, stop searching */
310
/* if the line is within our range, check if it's a duplicate */
311
if (apos != SENTINEL)
313
/* save index to the line */
316
/* this line has no equivalent in lines_a[alo:ahi] */
317
if (apos == SENTINEL)
320
/* check for duplicates of this line in lines_b[blo:bhi] */
321
/* loop through all lines in the linked list */
322
for (i = h[equiv].b_pos; i != SENTINEL; i = lines_b[i].next) {
323
/* the index is lower than blo, continue to the next line */
328
/* the index is higher than bhi, stop searching */
331
/* if this isn't the line with started with and it's within
332
our range, it's a duplicate */
337
/* use normalised indexes ([0,ahi-alo) instead of [alo,ahi))
338
for the patience sorting algorithm */
339
norm_bpos = bpos - blo;
340
norm_apos = apos - alo;
341
btoa[norm_bpos] = norm_apos;
344
Ok, how does this work...
346
We have a list of matching lines from two lists, a and b. These
347
matches are stored in variable `btoa`. As we are iterating over this
348
table by bpos, the lines from b already form an increasing sequence.
349
We need to "sort" also the lines from a using the patience sorting
350
algorithm, ignoring the lines which would need to be swapped.
352
http://en.wikipedia.org/wiki/Patience_sorting
354
For each pair of lines, we need to place the line from a on either
355
an existing pile that has higher value on the top or create a new
356
pile. Variable `stacks` represents the tops of these piles and in
357
variable `lasts` we store the lines from b, that correspond to the
358
lines from a in `stacks`.
360
Whenever we place a new line on top of a pile, we store a
361
backpointer to the line (b) from top of the previous pile. This means
362
that after the loop, variable `backpointers` will contain an index
363
to the previous matching lines that forms an increasing sequence
364
(over both indexes a and b) with the current matching lines. If
365
either index a or b of the previous matching lines would be higher
366
than indexes of the current one or if the indexes of the current
367
one are 0, it will contain SENTINEL.
369
To construct the LCS, we will just need to follow these backpointers
370
from the top of the last pile and stop when we reach SENTINEL.
373
/* as an optimization, check if the next line comes at the end,
374
because it usually does */
375
if (stacksize && stacks[stacksize - 1] < norm_apos)
377
/* as an optimization, check if the next line comes right after
378
the previous line, because usually it does */
379
else if (stacksize && (stacks[k] < norm_apos) &&
380
(k == stacksize - 1 || stacks[k + 1] > norm_apos))
383
k = bisect_left(stacks, norm_apos, 0, stacksize);
386
backpointers[norm_bpos] = lasts[k - 1];
389
stacks[k] = norm_apos;
390
lasts[k] = norm_bpos;
393
stacks[stacksize] = norm_apos;
394
lasts[stacksize] = norm_bpos;
406
/* backtrace the structures to find the LCS */
408
k = lasts[stacksize - 1];
409
while (k != SENTINEL) {
410
answer[i].a = btoa[k];
419
/* Adds a new line to the list of matching blocks, either extending the
420
current block or adding a new one. */
422
add_matching_line(struct matching_blocks *answer, Py_ssize_t a, Py_ssize_t b)
424
Py_ssize_t last_index = answer->count - 1;
425
if ((last_index >= 0) &&
426
(a == answer->matches[last_index].a +
427
answer->matches[last_index].len) &&
428
(b == answer->matches[last_index].b +
429
answer->matches[last_index].len)) {
430
/* enlarge the last block */
431
answer->matches[last_index].len++;
434
/* create a new block */
436
answer->matches[last_index].a = a;
437
answer->matches[last_index].b = b;
438
answer->matches[last_index].len = 1;
445
recurse_matches(struct matching_blocks *answer, struct hashtable *hashtable,
446
Py_ssize_t *backpointers, struct line *a, struct line *b,
447
Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi,
451
Py_ssize_t new, last_a_pos, last_b_pos, lcs_size, nahi, nbhi, i, apos, bpos;
452
struct matching_line *lcs;
454
if (maxrecursion < 0)
457
if (alo == ahi || blo == bhi)
461
last_a_pos = alo - 1;
462
last_b_pos = blo - 1;
464
lcs = (struct matching_line *)malloc(sizeof(struct matching_line) * (bhi - blo));
468
lcs_size = unique_lcs(lcs, hashtable, backpointers, a, b, alo, blo, ahi, bhi);
470
/* recurse between lines which are unique in each file and match */
471
for (i = lcs_size - 1; i >= 0; i--) {
472
apos = alo + lcs[i].a;
473
bpos = blo + lcs[i].b;
474
if (last_a_pos + 1 != apos || last_b_pos + 1 != bpos) {
475
res = recurse_matches(answer, hashtable,
477
last_a_pos + 1, last_b_pos + 1,
478
apos, bpos, maxrecursion - 1);
484
add_matching_line(answer, apos, bpos);
491
/* find matches between the last match and the end */
493
res = recurse_matches(answer, hashtable,
495
last_a_pos + 1, last_b_pos + 1,
496
ahi, bhi, maxrecursion - 1);
502
/* find matching lines at the very beginning */
503
else if (a[alo].equiv == b[blo].equiv) {
504
while (alo < ahi && blo < bhi && a[alo].equiv == b[blo].equiv)
505
add_matching_line(answer, alo++, blo++);
506
res = recurse_matches(answer, hashtable,
508
alo, blo, ahi, bhi, maxrecursion - 1);
513
/* find matching lines at the very end */
514
else if (a[ahi - 1].equiv == b[bhi - 1].equiv) {
517
while (nahi > alo && nbhi > blo && a[nahi - 1].equiv == b[nbhi - 1].equiv) {
521
res = recurse_matches(answer, hashtable,
523
last_a_pos + 1, last_b_pos + 1,
524
nahi, nbhi, maxrecursion - 1);
527
for (i = 0; i < ahi - nahi; i++)
528
add_matching_line(answer, nahi + i, nbhi + i);
540
delete_lines(struct line *lines, Py_ssize_t size)
542
struct line *line = lines;
544
Py_XDECREF(line->data);
552
load_lines(PyObject *orig, struct line **lines)
556
PyObject *seq, *item;
558
seq = PySequence_Fast(orig, "sequence expected");
563
size = PySequence_Fast_GET_SIZE(seq);
569
/* Allocate a memory block for line data, initialized to 0 */
570
line = *lines = (struct line *)calloc(size, sizeof(struct line));
577
for (i = 0; i < size; i++) {
578
item = PySequence_Fast_GET_ITEM(seq, i);
581
line->hash = PyObject_Hash(item);
582
if (line->hash == (-1)) {
583
/* Propogate the hash exception */
587
line->next = SENTINEL;
594
/* Error -- cleanup unused object references */
595
delete_lines(*lines, i);
603
py_unique_lcs(PyObject *self, PyObject *args)
605
PyObject *aseq, *bseq, *res, *item;
606
Py_ssize_t asize, bsize, i, nmatches, *backpointers = NULL;
607
struct line *a = NULL, *b = NULL;
608
struct matching_line *matches = NULL;
609
struct hashtable hashtable;
611
if (!PyArg_ParseTuple(args, "OO", &aseq, &bseq))
614
hashtable.table = NULL;
616
asize = load_lines(aseq, &a);
617
bsize = load_lines(bseq, &b);
618
if (asize == -1 || bsize == -1)
621
if (!equate_lines(&hashtable, a, b, asize, bsize))
625
matches = (struct matching_line *)malloc(sizeof(struct matching_line) * bsize);
629
backpointers = (Py_ssize_t *)malloc(sizeof(Py_ssize_t) * bsize * 4);
630
if (backpointers == NULL)
634
nmatches = unique_lcs(matches, &hashtable, backpointers, a, b, 0, 0, asize, bsize);
636
res = PyList_New(nmatches);
637
for (i = 0; i < nmatches; i++) {
638
item = Py_BuildValue("nn", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b);
641
if (PyList_SetItem(res, i, item) != 0)
647
free(hashtable.table);
648
delete_lines(b, bsize);
649
delete_lines(a, asize);
655
free(hashtable.table);
656
delete_lines(b, bsize);
657
delete_lines(a, asize);
663
py_recurse_matches(PyObject *self, PyObject *args)
665
PyObject *aseq, *bseq, *item, *answer;
666
int maxrecursion, res;
667
Py_ssize_t i, j, asize, bsize, alo, blo, ahi, bhi;
668
Py_ssize_t *backpointers = NULL;
669
struct line *a = NULL, *b = NULL;
670
struct hashtable hashtable;
671
struct matching_blocks matches;
673
if (!PyArg_ParseTuple(args, "OOnnnnOi", &aseq, &bseq, &alo, &blo,
674
&ahi, &bhi, &answer, &maxrecursion))
677
hashtable.table = NULL;
678
matches.matches = NULL;
680
asize = load_lines(aseq, &a);
681
bsize = load_lines(bseq, &b);
682
if (asize == -1 || bsize == -1)
685
if (!equate_lines(&hashtable, a, b, asize, bsize))
691
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * bsize);
692
if (matches.matches == NULL)
695
backpointers = (Py_ssize_t *)malloc(sizeof(Py_ssize_t) * bsize * 4);
696
if (backpointers == NULL)
699
matches.matches = NULL;
703
res = recurse_matches(&matches, &hashtable, backpointers,
704
a, b, alo, blo, ahi, bhi, maxrecursion);
708
for (i = 0; i < matches.count; i++) {
709
for (j = 0; j < matches.matches[i].len; j++) {
710
item = Py_BuildValue("nn", matches.matches[i].a + j,
711
matches.matches[i].b + j);
714
if (PyList_Append(answer, item) != 0)
720
free(matches.matches);
721
free(hashtable.table);
722
delete_lines(b, bsize);
723
delete_lines(a, asize);
728
free(matches.matches);
729
free(hashtable.table);
730
delete_lines(b, bsize);
731
delete_lines(a, asize);
737
PatienceSequenceMatcher_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
739
PyObject *junk, *a, *b;
740
PatienceSequenceMatcher *self;
742
self = (PatienceSequenceMatcher *)type->tp_alloc(type, 0);
745
if (!PyArg_ParseTuple(args, "OOO", &junk, &a, &b)) {
750
self->asize = load_lines(a, &(self->a));
751
self->bsize = load_lines(b, &(self->b));
753
if (self->asize == -1 || self->bsize == -1) {
758
if (!equate_lines(&self->hashtable, self->a, self->b, self->asize, self->bsize)) {
763
if (self->bsize > 0) {
764
self->backpointers = (Py_ssize_t *)malloc(sizeof(Py_ssize_t) * self->bsize * 4);
765
if (self->backpointers == NULL) {
771
self->backpointers = NULL;
776
return (PyObject *)self;
781
PatienceSequenceMatcher_dealloc(PatienceSequenceMatcher* self)
783
free(self->backpointers);
784
free(self->hashtable.table);
785
delete_lines(self->b, self->bsize);
786
delete_lines(self->a, self->asize);
787
((PyObject *)self)->ob_type->tp_free((PyObject *)self);
791
static char PatienceSequenceMatcher_get_matching_blocks_doc[] =
792
"Return list of triples describing matching subsequences.\n"
794
"Each triple is of the form (i, j, n), and means that\n"
795
"a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in\n"
798
"The last triple is a dummy, (len(a), len(b), 0), and is the only\n"
799
"triple with n==0.\n"
801
">>> s = PatienceSequenceMatcher(None, \"abxcd\", \"abcd\")\n"
802
">>> s.get_matching_blocks()\n"
803
"[(0, 0, 2), (3, 2, 2), (5, 4, 0)]\n";
806
PatienceSequenceMatcher_get_matching_blocks(PatienceSequenceMatcher* self)
808
PyObject *answer, *item;
811
struct matching_blocks matches;
814
if (self->bsize > 0) {
815
matches.matches = (struct matching_block *)
816
malloc(sizeof(struct matching_block) * self->bsize);
817
if (matches.matches == NULL)
818
return PyErr_NoMemory();
820
matches.matches = NULL;
822
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
823
self->a, self->b, 0, 0,
824
self->asize, self->bsize, 10);
826
free(matches.matches);
827
return PyErr_NoMemory();
830
answer = PyList_New(matches.count + 1);
831
if (answer == NULL) {
832
free(matches.matches);
836
for (i = 0; i < matches.count; i++) {
837
item = Py_BuildValue("nnn", matches.matches[i].a,
838
matches.matches[i].b, matches.matches[i].len);
841
if (PyList_SetItem(answer, i, item) != 0)
844
item = Py_BuildValue("nnn", self->asize, self->bsize, 0);
847
if (PyList_SetItem(answer, i, item) != 0)
850
free(matches.matches);
854
free(matches.matches);
860
static char PatienceSequenceMatcher_get_opcodes_doc[] =
861
"Return list of 5-tuples describing how to turn a into b.\n"
863
"Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple\n"
864
"has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the\n"
865
"tuple preceding it, and likewise for j1 == the previous j2.\n"
867
"The tags are strings, with these meanings:\n"
869
"'replace': a[i1:i2] should be replaced by b[j1:j2]\n"
870
"'delete': a[i1:i2] should be deleted.\n"
871
" Note that j1==j2 in this case.\n"
872
"'insert': b[j1:j2] should be inserted at a[i1:i1].\n"
873
" Note that i1==i2 in this case.\n"
874
"'equal': a[i1:i2] == b[j1:j2]\n"
876
">>> a = \"qabxcd\"\n"
877
">>> b = \"abycdf\"\n"
878
">>> s = PatienceSequenceMatcher(None, a, b)\n"
879
">>> for tag, i1, i2, j1, j2 in s.get_opcodes():\n"
880
"... print (\"%7s a[%d:%d] (%s) b[%d:%d] (%s)\" %\n"
881
"... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))\n"
882
" delete a[0:1] (q) b[0:0] ()\n"
883
" equal a[1:3] (ab) b[0:2] (ab)\n"
884
"replace a[3:4] (x) b[2:3] (y)\n"
885
" equal a[4:6] (cd) b[3:5] (cd)\n"
886
" insert a[6:6] () b[5:6] (f)\n";
889
PatienceSequenceMatcher_get_opcodes(PatienceSequenceMatcher* self)
891
PyObject *answer, *item;
892
Py_ssize_t i, j, k, ai, bj;
894
struct matching_blocks matches;
897
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * (self->bsize + 1));
898
if (matches.matches == NULL)
899
return PyErr_NoMemory();
901
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
902
self->a, self->b, 0, 0,
903
self->asize, self->bsize, 10);
905
free(matches.matches);
906
return PyErr_NoMemory();
909
matches.matches[matches.count].a = self->asize;
910
matches.matches[matches.count].b = self->bsize;
911
matches.matches[matches.count].len = 0;
914
answer = PyList_New(0);
915
if (answer == NULL) {
916
free(matches.matches);
921
for (k = 0; k < matches.count; k++) {
922
ai = matches.matches[k].a;
923
bj = matches.matches[k].b;
926
if (i < ai && j < bj)
934
item = Py_BuildValue("snnnn", opcode_names[tag], i, ai, j, bj);
937
if (PyList_Append(answer, item) != 0)
941
i = ai + matches.matches[k].len;
942
j = bj + matches.matches[k].len;
944
if (matches.matches[k].len > 0) {
945
item = Py_BuildValue("snnnn", opcode_names[OP_EQUAL], ai, i, bj, j);
948
if (PyList_Append(answer, item) != 0)
953
free(matches.matches);
957
free(matches.matches);
963
static char PatienceSequenceMatcher_get_grouped_opcodes_doc[] =
964
"Isolate change clusters by eliminating ranges with no changes.\n"
966
"Return a list of groups with up to n lines of context.\n"
967
"Each group is in the same format as returned by get_opcodes().\n"
969
">>> from pprint import pprint\n"
970
">>> a = map(str, range(1,40))\n"
972
">>> b[8:8] = ['i'] # Make an insertion\n"
973
">>> b[20] += 'x' # Make a replacement\n"
974
">>> b[23:28] = [] # Make a deletion\n"
975
">>> b[30] += 'y' # Make another replacement\n"
976
">>> pprint(PatienceSequenceMatcher(None,a,b).get_grouped_opcodes())\n"
977
"[[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],\n"
978
" [('equal', 16, 19, 17, 20),\n"
979
" ('replace', 19, 20, 20, 21),\n"
980
" ('equal', 20, 22, 21, 23),\n"
981
" ('delete', 22, 27, 23, 23),\n"
982
" ('equal', 27, 30, 23, 26)],\n"
983
" [('equal', 31, 34, 27, 30),\n"
984
" ('replace', 34, 35, 30, 31),\n"
985
" ('equal', 35, 38, 31, 34)]]\n";
988
PatienceSequenceMatcher_get_grouped_opcodes(PatienceSequenceMatcher* self,
991
PyObject *answer, *group, *item;
992
Py_ssize_t i, j, k, ai, bj, size, ncodes, tag;
993
Py_ssize_t i1, i2, j1, j2;
995
struct matching_blocks matches;
996
struct opcode *codes;
998
if (!PyArg_ParseTuple(args, "|i", &n))
1002
matches.matches = (struct matching_block *)malloc(sizeof(struct matching_block) * (self->bsize + 1));
1003
if (matches.matches == NULL)
1004
return PyErr_NoMemory();
1006
res = recurse_matches(&matches, &self->hashtable, self->backpointers,
1007
self->a, self->b, 0, 0,
1008
self->asize, self->bsize, 10);
1010
free(matches.matches);
1011
return PyErr_NoMemory();
1014
matches.matches[matches.count].a = self->asize;
1015
matches.matches[matches.count].b = self->bsize;
1016
matches.matches[matches.count].len = 0;
1020
codes = (struct opcode *)malloc(sizeof(struct opcode) * matches.count * 2);
1021
if (codes == NULL) {
1022
free(matches.matches);
1023
return PyErr_NoMemory();
1027
for (k = 0; k < matches.count; k++) {
1028
ai = matches.matches[k].a;
1029
bj = matches.matches[k].b;
1032
if (i < ai && j < bj)
1040
codes[ncodes].tag = tag;
1041
codes[ncodes].i1 = i;
1042
codes[ncodes].i2 = ai;
1043
codes[ncodes].j1 = j;
1044
codes[ncodes].j2 = bj;
1048
i = ai + matches.matches[k].len;
1049
j = bj + matches.matches[k].len;
1051
if (matches.matches[k].len > 0) {
1052
codes[ncodes].tag = OP_EQUAL;
1053
codes[ncodes].i1 = ai;
1054
codes[ncodes].i2 = i;
1055
codes[ncodes].j1 = bj;
1056
codes[ncodes].j2 = j;
1062
codes[ncodes].tag = OP_EQUAL;
1063
codes[ncodes].i1 = 0;
1064
codes[ncodes].i2 = 1;
1065
codes[ncodes].j1 = 0;
1066
codes[ncodes].j2 = 1;
1070
/* fixup leading and trailing groups if they show no changes. */
1071
if (codes[0].tag == OP_EQUAL) {
1072
codes[0].i1 = MAX(codes[0].i1, codes[0].i2 - n);
1073
codes[0].j1 = MAX(codes[0].j1, codes[0].j2 - n);
1075
if (codes[ncodes - 1].tag == OP_EQUAL) {
1076
codes[ncodes - 1].i2 = MIN(codes[ncodes - 1].i2,
1077
codes[ncodes - 1].i1 + n);
1078
codes[ncodes - 1].j2 = MIN(codes[ncodes - 1].j2,
1079
codes[ncodes - 1].j1 + n);
1084
answer = PyList_New(0);
1088
group = PyList_New(0);
1094
for (i = 0; i < ncodes; i++) {
1100
/* end the current group and start a new one whenever
1101
there is a large range with no changes. */
1102
if (tag == OP_EQUAL && i2 - i1 > nn) {
1103
item = Py_BuildValue("snnnn", opcode_names[tag],
1104
i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n));
1107
if (PyList_Append(group, item) != 0)
1109
if (PyList_Append(answer, group) != 0)
1111
group = PyList_New(0);
1114
i1 = MAX(i1, i2 - n);
1115
j1 = MAX(j1, j2 - n);
1117
item = Py_BuildValue("snnnn", opcode_names[tag], i1, i2, j1 ,j2);
1120
if (PyList_Append(group, item) != 0)
1123
size = PyList_Size(group);
1124
if (size > 0 && !(size == 1 && tag == OP_EQUAL)) {
1125
if (PyList_Append(answer, group) != 0)
1132
free(matches.matches);
1137
free(matches.matches);
1144
static PyMethodDef PatienceSequenceMatcher_methods[] = {
1145
{"get_matching_blocks",
1146
(PyCFunction)PatienceSequenceMatcher_get_matching_blocks,
1148
PatienceSequenceMatcher_get_matching_blocks_doc},
1150
(PyCFunction)PatienceSequenceMatcher_get_opcodes,
1152
PatienceSequenceMatcher_get_opcodes_doc},
1153
{"get_grouped_opcodes",
1154
(PyCFunction)PatienceSequenceMatcher_get_grouped_opcodes,
1156
PatienceSequenceMatcher_get_grouped_opcodes_doc},
1161
static char PatienceSequenceMatcher_doc[] =
1162
"C implementation of PatienceSequenceMatcher";
1165
static PyTypeObject PatienceSequenceMatcherType = {
1166
PyObject_HEAD_INIT(NULL)
1167
.tp_name = "PatienceSequenceMatcher",
1168
.tp_basicsize = sizeof(PatienceSequenceMatcher),
1169
.tp_dealloc = (destructor)PatienceSequenceMatcher_dealloc,
1170
.tp_flags = Py_TPFLAGS_DEFAULT,
1171
.tp_doc = PatienceSequenceMatcher_doc,
1172
.tp_methods = PatienceSequenceMatcher_methods,
1173
.tp_new = PatienceSequenceMatcher_new,
1177
static PyMethodDef cpatiencediff_methods[] = {
1178
{"unique_lcs_c", py_unique_lcs, METH_VARARGS},
1179
{"recurse_matches_c", py_recurse_matches, METH_VARARGS},
1183
PYMOD_INIT_FUNC(_patiencediff_c)
1187
if (PyType_Ready(&PatienceSequenceMatcherType) < 0)
1190
PYMOD_CREATE(m, "_patiencediff_c",
1191
"C implementation of PatienceSequenceMatcher",
1192
cpatiencediff_methods);
1196
Py_INCREF(&PatienceSequenceMatcherType);
1197
PyModule_AddObject(m, "PatienceSequenceMatcher_c",
1198
(PyObject *)&PatienceSequenceMatcherType);
1199
return PYMOD_SUCCESS(m);