bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1  | 
# Copyright (C) 2005, 2006 by Canonical Ltd
 | 
2  | 
# Written by Martin Pool.
 | 
|
3  | 
# Modified by Johan Rydberg <jrydberg@gnu.org>
 | 
|
4  | 
# Modified by Robert Collins <robert.collins@canonical.com>
 | 
|
| 
1756.2.5
by Aaron Bentley
 Reduced read_records calls to 1  | 
5  | 
# Modified by Aaron Bentley <aaron.bentley@utoronto.ca>
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
6  | 
#
 | 
7  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
8  | 
# it under the terms of the GNU General Public License as published by
 | 
|
9  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
10  | 
# (at your option) any later version.
 | 
|
11  | 
#
 | 
|
12  | 
# This program is distributed in the hope that it will be useful,
 | 
|
13  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
14  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
15  | 
# GNU General Public License for more details.
 | 
|
16  | 
#
 | 
|
17  | 
# You should have received a copy of the GNU General Public License
 | 
|
18  | 
# along with this program; if not, write to the Free Software
 | 
|
19  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
20  | 
||
21  | 
"""Knit versionedfile implementation.
 | 
|
22  | 
||
23  | 
A knit is a versioned file implementation that supports efficient append only
 | 
|
24  | 
updates.
 | 
|
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
25  | 
|
26  | 
Knit file layout:
 | 
|
27  | 
lifeless: the data file is made up of "delta records".  each delta record has a delta header 
 | 
|
28  | 
that contains; (1) a version id, (2) the size of the delta (in lines), and (3)  the digest of 
 | 
|
29  | 
the -expanded data- (ie, the delta applied to the parent).  the delta also ends with a 
 | 
|
30  | 
end-marker; simply "end VERSION"
 | 
|
31  | 
||
32  | 
delta can be line or full contents.a
 | 
|
33  | 
... the 8's there are the index number of the annotation.
 | 
|
34  | 
version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e
 | 
|
35  | 
59,59,3
 | 
|
36  | 
8
 | 
|
37  | 
8         if ie.executable:
 | 
|
38  | 
8             e.set('executable', 'yes')
 | 
|
39  | 
130,130,2
 | 
|
40  | 
8         if elt.get('executable') == 'yes':
 | 
|
41  | 
8             ie.executable = True
 | 
|
42  | 
end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 
 | 
|
43  | 
||
44  | 
||
45  | 
whats in an index:
 | 
|
46  | 
09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents
 | 
|
47  | 
09:33 < jrydberg> lifeless: the parents are currently dictionary compressed
 | 
|
48  | 
09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)
 | 
|
49  | 
09:33 < lifeless> right
 | 
|
50  | 
09:33 < jrydberg> lifeless: the position and size is the range in the data file
 | 
|
51  | 
||
52  | 
||
53  | 
so the index sequence is the dictionary compressed sequence number used
 | 
|
54  | 
in the deltas to provide line annotation
 | 
|
55  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
56  | 
"""
 | 
57  | 
||
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
58  | 
# TODOS:
 | 
59  | 
# 10:16 < lifeless> make partial index writes safe
 | 
|
60  | 
# 10:16 < lifeless> implement 'knit.check()' like weave.check()
 | 
|
61  | 
# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave 
 | 
|
62  | 
#                    always' approach.
 | 
|
| 
1563.2.11
by Robert Collins
 Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis.  | 
63  | 
# move sha1 out of the content so that join is faster at verifying parents
 | 
64  | 
# record content length ?
 | 
|
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
65  | 
|
66  | 
||
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
67  | 
from copy import copy  | 
| 
1563.2.11
by Robert Collins
 Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis.  | 
68  | 
from cStringIO import StringIO  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
69  | 
import difflib  | 
| 
1596.2.28
by Robert Collins
 more knit profile based tuning.  | 
70  | 
from itertools import izip, chain  | 
| 
1756.2.17
by Aaron Bentley
 Fixes suggested by John Meinel  | 
71  | 
import operator  | 
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
72  | 
import os  | 
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
73  | 
import sys  | 
| 
1756.2.29
by Aaron Bentley
 Remove basis knit support  | 
74  | 
import warnings  | 
| 
1594.2.19
by Robert Collins
 More coalescing tweaks, and knit feedback.  | 
75  | 
|
| 
1594.2.17
by Robert Collins
 Better readv coalescing, now with test, and progress during knit index reading.  | 
76  | 
import bzrlib  | 
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
77  | 
from bzrlib import (  | 
78  | 
cache_utf8,  | 
|
79  | 
errors,  | 
|
80  | 
    )
 | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
81  | 
from bzrlib.errors import FileExists, NoSuchFile, KnitError, \  | 
82  | 
InvalidRevisionId, KnitCorrupt, KnitHeaderError, \  | 
|
83  | 
RevisionNotPresent, RevisionAlreadyPresent  | 
|
| 
1773.4.1
by Martin Pool
 Add pyflakes makefile target; fix many warnings  | 
84  | 
from bzrlib.tuned_gzip import GzipFile  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
85  | 
from bzrlib.trace import mutter  | 
86  | 
from bzrlib.osutils import contains_whitespace, contains_linebreaks, \  | 
|
| 
1664.2.13
by Aaron Bentley
 Knit plan_merge uses slices instead of xenumerate  | 
87  | 
     sha_strings
 | 
| 
1756.2.29
by Aaron Bentley
 Remove basis knit support  | 
88  | 
from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
89  | 
from bzrlib.tsort import topo_sort  | 
| 
1684.3.3
by Robert Collins
 Add a special cased weaves to knit converter.  | 
90  | 
import bzrlib.weave  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
91  | 
from bzrlib.versionedfile import VersionedFile, InterVersionedFile  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
92  | 
|
93  | 
||
94  | 
# TODO: Split out code specific to this format into an associated object.
 | 
|
95  | 
||
96  | 
# TODO: Can we put in some kind of value to check that the index and data
 | 
|
97  | 
# files belong together?
 | 
|
98  | 
||
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
99  | 
# TODO: accommodate binaries, perhaps by storing a byte count
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
100  | 
|
101  | 
# TODO: function to check whole file
 | 
|
102  | 
||
103  | 
# TODO: atomically append data, then measure backwards from the cursor
 | 
|
104  | 
# position after writing to work out where it was located.  we may need to
 | 
|
105  | 
# bypass python file buffering.
 | 
|
106  | 
||
107  | 
DATA_SUFFIX = '.knit'  | 
|
108  | 
INDEX_SUFFIX = '.kndx'  | 
|
109  | 
||
110  | 
||
111  | 
class KnitContent(object):  | 
|
112  | 
"""Content of a knit version to which deltas can be applied."""  | 
|
113  | 
||
114  | 
def __init__(self, lines):  | 
|
115  | 
self._lines = lines  | 
|
116  | 
||
117  | 
def annotate_iter(self):  | 
|
118  | 
"""Yield tuples of (origin, text) for each content line."""  | 
|
119  | 
for origin, text in self._lines:  | 
|
120  | 
yield origin, text  | 
|
121  | 
||
122  | 
def annotate(self):  | 
|
123  | 
"""Return a list of (origin, text) tuples."""  | 
|
124  | 
return list(self.annotate_iter())  | 
|
125  | 
||
126  | 
def line_delta_iter(self, new_lines):  | 
|
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
127  | 
"""Generate line-based delta from this content to new_lines."""  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
128  | 
new_texts = [text for origin, text in new_lines._lines]  | 
129  | 
old_texts = [text for origin, text in self._lines]  | 
|
| 
1711.2.11
by John Arbash Meinel
 Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher  | 
130  | 
s = KnitSequenceMatcher(None, old_texts, new_texts)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
131  | 
for op in s.get_opcodes():  | 
132  | 
if op[0] == 'equal':  | 
|
133  | 
                continue
 | 
|
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
134  | 
            #     ofrom   oto   length        data
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
135  | 
yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])  | 
136  | 
||
137  | 
def line_delta(self, new_lines):  | 
|
138  | 
return list(self.line_delta_iter(new_lines))  | 
|
139  | 
||
140  | 
def text(self):  | 
|
141  | 
return [text for origin, text in self._lines]  | 
|
142  | 
||
| 
1756.3.7
by Aaron Bentley
 Avoid re-parsing texts version components  | 
143  | 
def copy(self):  | 
144  | 
return KnitContent(self._lines[:])  | 
|
145  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
146  | 
|
147  | 
class _KnitFactory(object):  | 
|
148  | 
"""Base factory for creating content objects."""  | 
|
149  | 
||
150  | 
def make(self, lines, version):  | 
|
151  | 
num_lines = len(lines)  | 
|
152  | 
return KnitContent(zip([version] * num_lines, lines))  | 
|
153  | 
||
154  | 
||
155  | 
class KnitAnnotateFactory(_KnitFactory):  | 
|
156  | 
"""Factory for creating annotated Content objects."""  | 
|
157  | 
||
158  | 
annotated = True  | 
|
159  | 
||
160  | 
def parse_fulltext(self, content, version):  | 
|
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
161  | 
"""Convert fulltext to internal representation  | 
162  | 
||
163  | 
        fulltext content is of the format
 | 
|
164  | 
        revid(utf8) plaintext\n
 | 
|
165  | 
        internal representation is of the format:
 | 
|
166  | 
        (revid, plaintext)
 | 
|
167  | 
        """
 | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
168  | 
decode_utf8 = cache_utf8.decode  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
169  | 
lines = []  | 
170  | 
for line in content:  | 
|
171  | 
origin, text = line.split(' ', 1)  | 
|
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
172  | 
lines.append((decode_utf8(origin), text))  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
173  | 
return KnitContent(lines)  | 
174  | 
||
175  | 
def parse_line_delta_iter(self, lines):  | 
|
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
176  | 
for result_item in self.parse_line_delta[lines]:  | 
177  | 
yield result_item  | 
|
178  | 
||
179  | 
def parse_line_delta(self, lines, version):  | 
|
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
180  | 
"""Convert a line based delta into internal representation.  | 
181  | 
||
182  | 
        line delta is in the form of:
 | 
|
183  | 
        intstart intend intcount
 | 
|
184  | 
        1..count lines:
 | 
|
185  | 
        revid(utf8) newline\n
 | 
|
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
186  | 
        internal representation is
 | 
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
187  | 
        (start, end, count, [1..count tuples (revid, newline)])
 | 
188  | 
        """
 | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
189  | 
decode_utf8 = cache_utf8.decode  | 
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
190  | 
result = []  | 
191  | 
lines = iter(lines)  | 
|
192  | 
next = lines.next  | 
|
193  | 
        # walk through the lines parsing.
 | 
|
194  | 
for header in lines:  | 
|
195  | 
start, end, count = [int(n) for n in header.split(',')]  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
196  | 
contents = []  | 
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
197  | 
remaining = count  | 
198  | 
while remaining:  | 
|
199  | 
origin, text = next().split(' ', 1)  | 
|
200  | 
remaining -= 1  | 
|
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
201  | 
contents.append((decode_utf8(origin), text))  | 
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
202  | 
result.append((start, end, count, contents))  | 
203  | 
return result  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
204  | 
|
205  | 
def lower_fulltext(self, content):  | 
|
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
206  | 
"""convert a fulltext content record into a serializable form.  | 
207  | 
||
208  | 
        see parse_fulltext which this inverts.
 | 
|
209  | 
        """
 | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
210  | 
encode_utf8 = cache_utf8.encode  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
211  | 
return ['%s %s' % (encode_utf8(o), t) for o, t in content._lines]  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
212  | 
|
213  | 
def lower_line_delta(self, delta):  | 
|
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
214  | 
"""convert a delta into a serializable form.  | 
215  | 
||
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
216  | 
        See parse_line_delta which this inverts.
 | 
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
217  | 
        """
 | 
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
218  | 
encode_utf8 = cache_utf8.encode  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
219  | 
out = []  | 
220  | 
for start, end, c, lines in delta:  | 
|
221  | 
out.append('%d,%d,%d\n' % (start, end, c))  | 
|
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
222  | 
out.extend(encode_utf8(origin) + ' ' + text  | 
223  | 
for origin, text in lines)  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
224  | 
return out  | 
225  | 
||
226  | 
||
227  | 
class KnitPlainFactory(_KnitFactory):  | 
|
228  | 
"""Factory for creating plain Content objects."""  | 
|
229  | 
||
230  | 
annotated = False  | 
|
231  | 
||
232  | 
def parse_fulltext(self, content, version):  | 
|
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
233  | 
"""This parses an unannotated fulltext.  | 
234  | 
||
235  | 
        Note that this is not a noop - the internal representation
 | 
|
236  | 
        has (versionid, line) - its just a constant versionid.
 | 
|
237  | 
        """
 | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
238  | 
return self.make(content, version)  | 
239  | 
||
240  | 
def parse_line_delta_iter(self, lines, version):  | 
|
241  | 
while lines:  | 
|
242  | 
header = lines.pop(0)  | 
|
243  | 
start, end, c = [int(n) for n in header.split(',')]  | 
|
244  | 
yield start, end, c, zip([version] * c, lines[:c])  | 
|
245  | 
del lines[:c]  | 
|
246  | 
||
247  | 
def parse_line_delta(self, lines, version):  | 
|
248  | 
return list(self.parse_line_delta_iter(lines, version))  | 
|
249  | 
||
250  | 
def lower_fulltext(self, content):  | 
|
251  | 
return content.text()  | 
|
252  | 
||
253  | 
def lower_line_delta(self, delta):  | 
|
254  | 
out = []  | 
|
255  | 
for start, end, c, lines in delta:  | 
|
256  | 
out.append('%d,%d,%d\n' % (start, end, c))  | 
|
257  | 
out.extend([text for origin, text in lines])  | 
|
258  | 
return out  | 
|
259  | 
||
260  | 
||
261  | 
def make_empty_knit(transport, relpath):  | 
|
262  | 
"""Construct a empty knit at the specified location."""  | 
|
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
263  | 
k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
264  | 
k._data._open_file()  | 
265  | 
||
266  | 
||
267  | 
class KnitVersionedFile(VersionedFile):  | 
|
268  | 
"""Weave-like structure with faster random access.  | 
|
269  | 
||
270  | 
    A knit stores a number of texts and a summary of the relationships
 | 
|
271  | 
    between them.  Texts are identified by a string version-id.  Texts
 | 
|
272  | 
    are normally stored and retrieved as a series of lines, but can
 | 
|
273  | 
    also be passed as single strings.
 | 
|
274  | 
||
275  | 
    Lines are stored with the trailing newline (if any) included, to
 | 
|
276  | 
    avoid special cases for files with no final newline.  Lines are
 | 
|
277  | 
    composed of 8-bit characters, not unicode.  The combination of
 | 
|
278  | 
    these approaches should mean any 'binary' file can be safely
 | 
|
279  | 
    stored and retrieved.
 | 
|
280  | 
    """
 | 
|
281  | 
||
| 
1756.2.8
by Aaron Bentley
 Implement get_line_list, cleanups  | 
282  | 
def __init__(self, relpath, transport, file_mode=None, access_mode=None,  | 
| 
1756.2.29
by Aaron Bentley
 Remove basis knit support  | 
283  | 
factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,  | 
284  | 
create=False):  | 
|
| 
1563.2.25
by Robert Collins
 Merge in upstream.  | 
285  | 
"""Construct a knit at location specified by relpath.  | 
286  | 
        
 | 
|
287  | 
        :param create: If not True, only open an existing knit.
 | 
|
288  | 
        """
 | 
|
| 
1756.2.29
by Aaron Bentley
 Remove basis knit support  | 
289  | 
if deprecated_passed(basis_knit):  | 
290  | 
warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"  | 
|
291  | 
" deprecated as of bzr 0.9.",  | 
|
292  | 
DeprecationWarning, stacklevel=2)  | 
|
| 
1563.2.16
by Robert Collins
 Change WeaveStore into VersionedFileStore and make its versoined file class parameterisable.  | 
293  | 
if access_mode is None:  | 
294  | 
access_mode = 'w'  | 
|
| 
1594.2.23
by Robert Collins
 Test versioned file storage handling of clean/dirty status for accessed versioned files.  | 
295  | 
super(KnitVersionedFile, self).__init__(access_mode)  | 
| 
1563.2.16
by Robert Collins
 Change WeaveStore into VersionedFileStore and make its versoined file class parameterisable.  | 
296  | 
assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
297  | 
self.transport = transport  | 
298  | 
self.filename = relpath  | 
|
| 
1563.2.16
by Robert Collins
 Change WeaveStore into VersionedFileStore and make its versoined file class parameterisable.  | 
299  | 
self.factory = factory or KnitAnnotateFactory()  | 
300  | 
self.writable = (access_mode == 'w')  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
301  | 
self.delta = delta  | 
302  | 
||
303  | 
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,  | 
|
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
304  | 
access_mode, create=create, file_mode=file_mode)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
305  | 
self._data = _KnitData(transport, relpath + DATA_SUFFIX,  | 
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
306  | 
access_mode, create=create and not len(self), file_mode=file_mode)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
307  | 
|
| 
1704.2.10
by Martin Pool
 Add KnitVersionedFile.__repr__ method  | 
308  | 
def __repr__(self):  | 
309  | 
return '%s(%s)' % (self.__class__.__name__,  | 
|
310  | 
self.transport.abspath(self.filename))  | 
|
311  | 
||
| 
1596.2.37
by Robert Collins
 Switch to delta based content copying in the generic versioned file copier.  | 
312  | 
def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):  | 
313  | 
"""See VersionedFile._add_delta()."""  | 
|
314  | 
self._check_add(version_id, []) # should we check the lines ?  | 
|
315  | 
self._check_versions_present(parents)  | 
|
316  | 
present_parents = []  | 
|
317  | 
ghosts = []  | 
|
318  | 
parent_texts = {}  | 
|
319  | 
for parent in parents:  | 
|
320  | 
if not self.has_version(parent):  | 
|
321  | 
ghosts.append(parent)  | 
|
322  | 
else:  | 
|
323  | 
present_parents.append(parent)  | 
|
324  | 
||
325  | 
if delta_parent is None:  | 
|
326  | 
            # reconstitute as full text.
 | 
|
327  | 
assert len(delta) == 1 or len(delta) == 0  | 
|
328  | 
if len(delta):  | 
|
329  | 
assert delta[0][0] == 0  | 
|
| 
1596.2.38
by Robert Collins
 rollback from using deltas to using fulltexts - deltas need more work to be ready.  | 
330  | 
assert delta[0][1] == 0, delta[0][1]  | 
| 
1596.2.37
by Robert Collins
 Switch to delta based content copying in the generic versioned file copier.  | 
331  | 
return super(KnitVersionedFile, self)._add_delta(version_id,  | 
332  | 
parents,  | 
|
333  | 
delta_parent,  | 
|
334  | 
sha1,  | 
|
335  | 
noeol,  | 
|
336  | 
delta)  | 
|
337  | 
||
338  | 
digest = sha1  | 
|
339  | 
||
340  | 
options = []  | 
|
341  | 
if noeol:  | 
|
342  | 
options.append('no-eol')  | 
|
343  | 
||
344  | 
if delta_parent is not None:  | 
|
345  | 
            # determine the current delta chain length.
 | 
|
346  | 
            # To speed the extract of texts the delta chain is limited
 | 
|
347  | 
            # to a fixed number of deltas.  This should minimize both
 | 
|
348  | 
            # I/O and the time spend applying deltas.
 | 
|
349  | 
count = 0  | 
|
350  | 
delta_parents = [delta_parent]  | 
|
351  | 
while count < 25:  | 
|
352  | 
parent = delta_parents[0]  | 
|
353  | 
method = self._index.get_method(parent)  | 
|
354  | 
if method == 'fulltext':  | 
|
355  | 
                    break
 | 
|
356  | 
delta_parents = self._index.get_parents(parent)  | 
|
357  | 
count = count + 1  | 
|
358  | 
if method == 'line-delta':  | 
|
359  | 
                # did not find a fulltext in the delta limit.
 | 
|
360  | 
                # just do a normal insertion.
 | 
|
361  | 
return super(KnitVersionedFile, self)._add_delta(version_id,  | 
|
362  | 
parents,  | 
|
363  | 
delta_parent,  | 
|
364  | 
sha1,  | 
|
365  | 
noeol,  | 
|
366  | 
delta)  | 
|
367  | 
||
368  | 
options.append('line-delta')  | 
|
369  | 
store_lines = self.factory.lower_line_delta(delta)  | 
|
370  | 
||
371  | 
where, size = self._data.add_record(version_id, digest, store_lines)  | 
|
372  | 
self._index.add_version(version_id, options, where, size, parents)  | 
|
373  | 
||
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
374  | 
def _add_raw_records(self, records, data):  | 
375  | 
"""Add all the records 'records' with data pre-joined in 'data'.  | 
|
376  | 
||
377  | 
        :param records: A list of tuples(version_id, options, parents, size).
 | 
|
378  | 
        :param data: The data for the records. When it is written, the records
 | 
|
379  | 
                     are adjusted to have pos pointing into data by the sum of
 | 
|
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
380  | 
                     the preceding records sizes.
 | 
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
381  | 
        """
 | 
382  | 
        # write all the data
 | 
|
383  | 
pos = self._data.add_raw_record(data)  | 
|
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
384  | 
offset = 0  | 
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
385  | 
index_entries = []  | 
386  | 
for (version_id, options, parents, size) in records:  | 
|
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
387  | 
index_entries.append((version_id, options, pos+offset,  | 
388  | 
size, parents))  | 
|
389  | 
if self._data._do_cache:  | 
|
390  | 
self._data._cache[version_id] = data[offset:offset+size]  | 
|
391  | 
offset += size  | 
|
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
392  | 
self._index.add_versions(index_entries)  | 
393  | 
||
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
394  | 
def enable_cache(self):  | 
395  | 
"""Start caching data for this knit"""  | 
|
396  | 
self._data.enable_cache()  | 
|
397  | 
||
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
398  | 
def clear_cache(self):  | 
399  | 
"""Clear the data cache only."""  | 
|
400  | 
self._data.clear_cache()  | 
|
401  | 
||
| 
1563.2.15
by Robert Collins
 remove the weavestore assumptions about the number and nature of files it manages.  | 
402  | 
def copy_to(self, name, transport):  | 
403  | 
"""See VersionedFile.copy_to()."""  | 
|
404  | 
        # copy the current index to a temp index to avoid racing with local
 | 
|
405  | 
        # writes
 | 
|
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
406  | 
transport.put(name + INDEX_SUFFIX + '.tmp', self.transport.get(self._index._filename),)  | 
| 
1563.2.15
by Robert Collins
 remove the weavestore assumptions about the number and nature of files it manages.  | 
407  | 
        # copy the data file
 | 
| 
1711.7.25
by John Arbash Meinel
 try/finally to close files, _KnitData was keeping a handle to a file it never used again, and using transport.rename() when it wanted transport.move()  | 
408  | 
f = self._data._open_file()  | 
409  | 
try:  | 
|
410  | 
transport.put(name + DATA_SUFFIX, f)  | 
|
411  | 
finally:  | 
|
412  | 
f.close()  | 
|
413  | 
        # move the copied index into place
 | 
|
414  | 
transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)  | 
|
| 
1563.2.15
by Robert Collins
 remove the weavestore assumptions about the number and nature of files it manages.  | 
415  | 
|
| 
1563.2.13
by Robert Collins
 InterVersionedFile implemented.  | 
416  | 
def create_empty(self, name, transport, mode=None):  | 
| 
1563.2.25
by Robert Collins
 Merge in upstream.  | 
417  | 
return KnitVersionedFile(name, transport, factory=self.factory, delta=self.delta, create=True)  | 
| 
1563.2.15
by Robert Collins
 remove the weavestore assumptions about the number and nature of files it manages.  | 
418  | 
|
| 
1594.2.21
by Robert Collins
 Teach versioned files to prevent mutation after finishing.  | 
419  | 
def _fix_parents(self, version, new_parents):  | 
| 
1594.2.7
by Robert Collins
 Add versionedfile.fix_parents api for correcting data post hoc.  | 
420  | 
"""Fix the parents list for version.  | 
421  | 
        
 | 
|
422  | 
        This is done by appending a new version to the index
 | 
|
423  | 
        with identical data except for the parents list.
 | 
|
424  | 
        the parents list must be a superset of the current
 | 
|
425  | 
        list.
 | 
|
426  | 
        """
 | 
|
427  | 
current_values = self._index._cache[version]  | 
|
428  | 
assert set(current_values[4]).difference(set(new_parents)) == set()  | 
|
429  | 
self._index.add_version(version,  | 
|
430  | 
current_values[1],  | 
|
431  | 
current_values[2],  | 
|
432  | 
current_values[3],  | 
|
433  | 
new_parents)  | 
|
434  | 
||
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
435  | 
def get_delta(self, version_id):  | 
436  | 
"""Get a delta for constructing version from some other version."""  | 
|
437  | 
if not self.has_version(version_id):  | 
|
438  | 
raise RevisionNotPresent(version_id, self.filename)  | 
|
439  | 
||
440  | 
parents = self.get_parents(version_id)  | 
|
441  | 
if len(parents):  | 
|
442  | 
parent = parents[0]  | 
|
443  | 
else:  | 
|
444  | 
parent = None  | 
|
445  | 
data_pos, data_size = self._index.get_position(version_id)  | 
|
446  | 
data, sha1 = self._data.read_records(((version_id, data_pos, data_size),))[version_id]  | 
|
447  | 
version_idx = self._index.lookup(version_id)  | 
|
| 
1596.2.37
by Robert Collins
 Switch to delta based content copying in the generic versioned file copier.  | 
448  | 
noeol = 'no-eol' in self._index.get_options(version_id)  | 
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
449  | 
if 'fulltext' == self._index.get_method(version_id):  | 
450  | 
new_content = self.factory.parse_fulltext(data, version_idx)  | 
|
451  | 
if parent is not None:  | 
|
452  | 
reference_content = self._get_content(parent)  | 
|
453  | 
old_texts = reference_content.text()  | 
|
454  | 
else:  | 
|
455  | 
old_texts = []  | 
|
456  | 
new_texts = new_content.text()  | 
|
| 
1711.2.11
by John Arbash Meinel
 Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher  | 
457  | 
delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)  | 
| 
1596.2.37
by Robert Collins
 Switch to delta based content copying in the generic versioned file copier.  | 
458  | 
return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)  | 
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
459  | 
else:  | 
460  | 
delta = self.factory.parse_line_delta(data, version_idx)  | 
|
| 
1596.2.37
by Robert Collins
 Switch to delta based content copying in the generic versioned file copier.  | 
461  | 
return parent, sha1, noeol, delta  | 
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
462  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
463  | 
def get_graph_with_ghosts(self):  | 
464  | 
"""See VersionedFile.get_graph_with_ghosts()."""  | 
|
465  | 
graph_items = self._index.get_graph()  | 
|
466  | 
return dict(graph_items)  | 
|
467  | 
||
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
468  | 
def get_sha1(self, version_id):  | 
469  | 
"""See VersionedFile.get_sha1()."""  | 
|
| 
1756.3.22
by Aaron Bentley
 Tweaks from review  | 
470  | 
record_map = self._get_record_map([version_id])  | 
471  | 
method, content, digest, next = record_map[version_id]  | 
|
472  | 
return digest  | 
|
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
473  | 
|
| 
1563.2.15
by Robert Collins
 remove the weavestore assumptions about the number and nature of files it manages.  | 
474  | 
    @staticmethod
 | 
475  | 
def get_suffixes():  | 
|
476  | 
"""See VersionedFile.get_suffixes()."""  | 
|
477  | 
return [DATA_SUFFIX, INDEX_SUFFIX]  | 
|
| 
1563.2.13
by Robert Collins
 InterVersionedFile implemented.  | 
478  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
479  | 
def has_ghost(self, version_id):  | 
480  | 
"""True if there is a ghost reference in the file to version_id."""  | 
|
481  | 
        # maybe we have it
 | 
|
482  | 
if self.has_version(version_id):  | 
|
483  | 
return False  | 
|
| 
1759.2.2
by Jelmer Vernooij
 Revert some of my spelling fixes and fix some typos after review by Aaron.  | 
484  | 
        # optimisable if needed by memoising the _ghosts set.
 | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
485  | 
items = self._index.get_graph()  | 
486  | 
for node, parents in items:  | 
|
487  | 
for parent in parents:  | 
|
488  | 
if parent not in self._index._cache:  | 
|
489  | 
if parent == version_id:  | 
|
490  | 
return True  | 
|
491  | 
return False  | 
|
492  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
493  | 
def versions(self):  | 
494  | 
"""See VersionedFile.versions."""  | 
|
495  | 
return self._index.get_versions()  | 
|
496  | 
||
497  | 
def has_version(self, version_id):  | 
|
498  | 
"""See VersionedFile.has_version."""  | 
|
499  | 
return self._index.has_version(version_id)  | 
|
500  | 
||
501  | 
__contains__ = has_version  | 
|
502  | 
||
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
503  | 
def _merge_annotations(self, content, parents, parent_texts={},  | 
504  | 
delta=None, annotated=None):  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
505  | 
"""Merge annotations for content. This is done by comparing  | 
| 
1596.2.27
by Robert Collins
 Note potential improvements in knit adds.  | 
506  | 
        the annotations based on changed to the text.
 | 
507  | 
        """
 | 
|
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
508  | 
if annotated:  | 
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
509  | 
delta_seq = None  | 
510  | 
for parent_id in parents:  | 
|
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
511  | 
merge_content = self._get_content(parent_id, parent_texts)  | 
| 
1711.2.11
by John Arbash Meinel
 Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher  | 
512  | 
seq = KnitSequenceMatcher(None, merge_content.text(), content.text())  | 
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
513  | 
if delta_seq is None:  | 
514  | 
                    # setup a delta seq to reuse.
 | 
|
515  | 
delta_seq = seq  | 
|
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
516  | 
for i, j, n in seq.get_matching_blocks():  | 
517  | 
if n == 0:  | 
|
518  | 
                        continue
 | 
|
519  | 
                    # this appears to copy (origin, text) pairs across to the new
 | 
|
520  | 
                    # content for any line that matches the last-checked parent.
 | 
|
521  | 
                    # FIXME: save the sequence control data for delta compression
 | 
|
522  | 
                    # against the most relevant parent rather than rediffing.
 | 
|
523  | 
content._lines[j:j+n] = merge_content._lines[i:i+n]  | 
|
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
524  | 
if delta:  | 
525  | 
if not annotated:  | 
|
526  | 
reference_content = self._get_content(parents[0], parent_texts)  | 
|
527  | 
new_texts = content.text()  | 
|
528  | 
old_texts = reference_content.text()  | 
|
| 
1711.2.11
by John Arbash Meinel
 Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher  | 
529  | 
delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)  | 
| 
1596.2.36
by Robert Collins
 add a get_delta api to versioned_file.  | 
530  | 
return self._make_line_delta(delta_seq, content)  | 
531  | 
||
532  | 
def _make_line_delta(self, delta_seq, new_content):  | 
|
533  | 
"""Generate a line delta from delta_seq and new_content."""  | 
|
534  | 
diff_hunks = []  | 
|
535  | 
for op in delta_seq.get_opcodes():  | 
|
536  | 
if op[0] == 'equal':  | 
|
537  | 
                continue
 | 
|
538  | 
diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))  | 
|
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
539  | 
return diff_hunks  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
540  | 
|
| 
1756.3.17
by Aaron Bentley
 Combine get_components_positions with get_components_versions  | 
541  | 
def _get_components_positions(self, version_ids):  | 
| 
1756.3.19
by Aaron Bentley
 Documentation and cleanups  | 
542  | 
"""Produce a map of position data for the components of versions.  | 
543  | 
||
| 
1756.3.22
by Aaron Bentley
 Tweaks from review  | 
544  | 
        This data is intended to be used for retrieving the knit records.
 | 
| 
1756.3.19
by Aaron Bentley
 Documentation and cleanups  | 
545  | 
|
546  | 
        A dict of version_id to (method, data_pos, data_size, next) is
 | 
|
547  | 
        returned.
 | 
|
548  | 
        method is the way referenced data should be applied.
 | 
|
549  | 
        data_pos is the position of the data in the knit.
 | 
|
550  | 
        data_size is the size of the data in the knit.
 | 
|
551  | 
        next is the build-parent of the version, or None for fulltexts.
 | 
|
552  | 
        """
 | 
|
| 
1756.3.9
by Aaron Bentley
 More optimization refactoring  | 
553  | 
component_data = {}  | 
554  | 
for version_id in version_ids:  | 
|
555  | 
cursor = version_id  | 
|
556  | 
||
| 
1756.3.10
by Aaron Bentley
 Optimize selection and retrieval of records  | 
557  | 
while cursor is not None and cursor not in component_data:  | 
| 
1756.2.29
by Aaron Bentley
 Remove basis knit support  | 
558  | 
method = self._index.get_method(cursor)  | 
| 
1756.3.10
by Aaron Bentley
 Optimize selection and retrieval of records  | 
559  | 
if method == 'fulltext':  | 
560  | 
next = None  | 
|
561  | 
else:  | 
|
| 
1756.2.29
by Aaron Bentley
 Remove basis knit support  | 
562  | 
next = self.get_parents(cursor)[0]  | 
| 
1756.3.17
by Aaron Bentley
 Combine get_components_positions with get_components_versions  | 
563  | 
data_pos, data_size = self._index.get_position(cursor)  | 
564  | 
component_data[cursor] = (method, data_pos, data_size, next)  | 
|
| 
1756.3.10
by Aaron Bentley
 Optimize selection and retrieval of records  | 
565  | 
cursor = next  | 
566  | 
return component_data  | 
|
| 
1756.3.18
by Aaron Bentley
 More cleanup  | 
567  | 
|
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
568  | 
def _get_content(self, version_id, parent_texts={}):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
569  | 
"""Returns a content object that makes up the specified  | 
570  | 
        version."""
 | 
|
571  | 
if not self.has_version(version_id):  | 
|
572  | 
raise RevisionNotPresent(version_id, self.filename)  | 
|
573  | 
||
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
574  | 
cached_version = parent_texts.get(version_id, None)  | 
575  | 
if cached_version is not None:  | 
|
576  | 
return cached_version  | 
|
577  | 
||
| 
1756.3.22
by Aaron Bentley
 Tweaks from review  | 
578  | 
text_map, contents_map = self._get_content_maps([version_id])  | 
579  | 
return contents_map[version_id]  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
580  | 
|
581  | 
def _check_versions_present(self, version_ids):  | 
|
582  | 
"""Check that all specified versions are present."""  | 
|
583  | 
version_ids = set(version_ids)  | 
|
584  | 
for r in list(version_ids):  | 
|
585  | 
if self._index.has_version(r):  | 
|
586  | 
version_ids.remove(r)  | 
|
587  | 
if version_ids:  | 
|
588  | 
raise RevisionNotPresent(list(version_ids)[0], self.filename)  | 
|
589  | 
||
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
590  | 
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
591  | 
"""See VersionedFile.add_lines_with_ghosts()."""  | 
592  | 
self._check_add(version_id, lines)  | 
|
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
593  | 
return self._add(version_id, lines[:], parents, self.delta, parent_texts)  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
594  | 
|
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
595  | 
def _add_lines(self, version_id, parents, lines, parent_texts):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
596  | 
"""See VersionedFile.add_lines."""  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
597  | 
self._check_add(version_id, lines)  | 
598  | 
self._check_versions_present(parents)  | 
|
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
599  | 
return self._add(version_id, lines[:], parents, self.delta, parent_texts)  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
600  | 
|
601  | 
def _check_add(self, version_id, lines):  | 
|
602  | 
"""check that version_id and lines are safe to add."""  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
603  | 
assert self.writable, "knit is not opened for write"  | 
604  | 
        ### FIXME escape. RBC 20060228
 | 
|
605  | 
if contains_whitespace(version_id):  | 
|
| 
1668.5.1
by Olaf Conradi
 Fix bug in knits when raising InvalidRevisionId without the required  | 
606  | 
raise InvalidRevisionId(version_id, self.filename)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
607  | 
if self.has_version(version_id):  | 
608  | 
raise RevisionAlreadyPresent(version_id, self.filename)  | 
|
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
609  | 
self._check_lines_not_unicode(lines)  | 
610  | 
self._check_lines_are_lines(lines)  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
611  | 
|
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
612  | 
def _add(self, version_id, lines, parents, delta, parent_texts):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
613  | 
"""Add a set of lines on top of version specified by parents.  | 
614  | 
||
615  | 
        If delta is true, compress the text as a line-delta against
 | 
|
616  | 
        the first parent.
 | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
617  | 
|
618  | 
        Any versions not present will be converted into ghosts.
 | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
619  | 
        """
 | 
| 
1596.2.28
by Robert Collins
 more knit profile based tuning.  | 
620  | 
        #  461    0   6546.0390     43.9100   bzrlib.knit:489(_add)
 | 
621  | 
        # +400    0    889.4890    418.9790   +bzrlib.knit:192(lower_fulltext)
 | 
|
622  | 
        # +461    0   1364.8070    108.8030   +bzrlib.knit:996(add_record)
 | 
|
623  | 
        # +461    0    193.3940     41.5720   +bzrlib.knit:898(add_version)
 | 
|
624  | 
        # +461    0    134.0590     18.3810   +bzrlib.osutils:361(sha_strings)
 | 
|
625  | 
        # +461    0     36.3420     15.4540   +bzrlib.knit:146(make)
 | 
|
626  | 
        # +1383   0      8.0370      8.0370   +<len>
 | 
|
627  | 
        # +61     0     13.5770      7.9190   +bzrlib.knit:199(lower_line_delta)
 | 
|
628  | 
        # +61     0    963.3470      7.8740   +bzrlib.knit:427(_get_content)
 | 
|
629  | 
        # +61     0    973.9950      5.2950   +bzrlib.knit:136(line_delta)
 | 
|
630  | 
        # +61     0   1918.1800      5.2640   +bzrlib.knit:359(_merge_annotations)
 | 
|
631  | 
||
| 
1596.2.10
by Robert Collins
 Reviewer feedback on knit branches.  | 
632  | 
present_parents = []  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
633  | 
ghosts = []  | 
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
634  | 
if parent_texts is None:  | 
635  | 
parent_texts = {}  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
636  | 
for parent in parents:  | 
637  | 
if not self.has_version(parent):  | 
|
638  | 
ghosts.append(parent)  | 
|
| 
1594.2.9
by Robert Collins
 Teach Knit repositories how to handle ghosts without corrupting at all.  | 
639  | 
else:  | 
| 
1596.2.10
by Robert Collins
 Reviewer feedback on knit branches.  | 
640  | 
present_parents.append(parent)  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
641  | 
|
| 
1596.2.10
by Robert Collins
 Reviewer feedback on knit branches.  | 
642  | 
if delta and not len(present_parents):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
643  | 
delta = False  | 
644  | 
||
645  | 
digest = sha_strings(lines)  | 
|
646  | 
options = []  | 
|
647  | 
if lines:  | 
|
648  | 
if lines[-1][-1] != '\n':  | 
|
649  | 
options.append('no-eol')  | 
|
650  | 
lines[-1] = lines[-1] + '\n'  | 
|
651  | 
||
| 
1596.2.10
by Robert Collins
 Reviewer feedback on knit branches.  | 
652  | 
if len(present_parents) and delta:  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
653  | 
            # To speed the extract of texts the delta chain is limited
 | 
654  | 
            # to a fixed number of deltas.  This should minimize both
 | 
|
655  | 
            # I/O and the time spend applying deltas.
 | 
|
656  | 
count = 0  | 
|
| 
1596.2.10
by Robert Collins
 Reviewer feedback on knit branches.  | 
657  | 
delta_parents = present_parents  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
658  | 
while count < 25:  | 
659  | 
parent = delta_parents[0]  | 
|
660  | 
method = self._index.get_method(parent)  | 
|
661  | 
if method == 'fulltext':  | 
|
662  | 
                    break
 | 
|
663  | 
delta_parents = self._index.get_parents(parent)  | 
|
664  | 
count = count + 1  | 
|
665  | 
if method == 'line-delta':  | 
|
666  | 
delta = False  | 
|
667  | 
||
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
668  | 
lines = self.factory.make(lines, version_id)  | 
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
669  | 
if delta or (self.factory.annotated and len(present_parents) > 0):  | 
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
670  | 
            # Merge annotations from parent texts if so is needed.
 | 
| 
1596.2.34
by Robert Collins
 Optimise knit add to only diff once per parent, not once per parent + once for the delta generation.  | 
671  | 
delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,  | 
672  | 
delta, self.factory.annotated)  | 
|
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
673  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
674  | 
if delta:  | 
675  | 
options.append('line-delta')  | 
|
676  | 
store_lines = self.factory.lower_line_delta(delta_hunks)  | 
|
677  | 
else:  | 
|
678  | 
options.append('fulltext')  | 
|
679  | 
store_lines = self.factory.lower_fulltext(lines)  | 
|
680  | 
||
681  | 
where, size = self._data.add_record(version_id, digest, store_lines)  | 
|
682  | 
self._index.add_version(version_id, options, where, size, parents)  | 
|
| 
1596.2.32
by Robert Collins
 Reduce re-extraction of texts during weave to knit joins by providing a memoisation facility.  | 
683  | 
return lines  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
684  | 
|
| 
1563.2.19
by Robert Collins
 stub out a check for knits.  | 
685  | 
def check(self, progress_bar=None):  | 
686  | 
"""See VersionedFile.check()."""  | 
|
687  | 
||
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
688  | 
def _clone_text(self, new_version_id, old_version_id, parents):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
689  | 
"""See VersionedFile.clone_text()."""  | 
| 
1756.2.8
by Aaron Bentley
 Implement get_line_list, cleanups  | 
690  | 
        # FIXME RBC 20060228 make fast by only inserting an index with null 
 | 
691  | 
        # delta.
 | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
692  | 
self.add_lines(new_version_id, parents, self.get_lines(old_version_id))  | 
693  | 
||
694  | 
def get_lines(self, version_id):  | 
|
695  | 
"""See VersionedFile.get_lines()."""  | 
|
| 
1756.2.8
by Aaron Bentley
 Implement get_line_list, cleanups  | 
696  | 
return self.get_line_list([version_id])[0]  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
697  | 
|
| 
1756.3.12
by Aaron Bentley
 Stuff all text-building data in record_map  | 
698  | 
def _get_record_map(self, version_ids):  | 
| 
1756.3.19
by Aaron Bentley
 Documentation and cleanups  | 
699  | 
"""Produce a dictionary of knit records.  | 
700  | 
        
 | 
|
701  | 
        The keys are version_ids, the values are tuples of (method, content,
 | 
|
702  | 
        digest, next).
 | 
|
703  | 
        method is the way the content should be applied.  
 | 
|
704  | 
        content is a KnitContent object.
 | 
|
705  | 
        digest is the SHA1 digest of this version id after all steps are done
 | 
|
706  | 
        next is the build-parent of the version, i.e. the leftmost ancestor.
 | 
|
707  | 
        If the method is fulltext, next will be None.
 | 
|
708  | 
        """
 | 
|
| 
1756.3.12
by Aaron Bentley
 Stuff all text-building data in record_map  | 
709  | 
position_map = self._get_components_positions(version_ids)  | 
| 
1756.3.22
by Aaron Bentley
 Tweaks from review  | 
710  | 
        # c = component_id, m = method, p = position, s = size, n = next
 | 
| 
1756.3.10
by Aaron Bentley
 Optimize selection and retrieval of records  | 
711  | 
records = [(c, p, s) for c, (m, p, s, n) in position_map.iteritems()]  | 
| 
1756.3.12
by Aaron Bentley
 Stuff all text-building data in record_map  | 
712  | 
record_map = {}  | 
| 
1863.1.5
by John Arbash Meinel
 Add a read_records_iter_unsorted, which can return records in any order.  | 
713  | 
for component_id, content, digest in \  | 
| 
1863.1.9
by John Arbash Meinel
 Switching to have 'read_records_iter' return in random order.  | 
714  | 
self._data.read_records_iter(records):  | 
| 
1756.3.12
by Aaron Bentley
 Stuff all text-building data in record_map  | 
715  | 
method, position, size, next = position_map[component_id]  | 
716  | 
record_map[component_id] = method, content, digest, next  | 
|
717  | 
||
| 
1756.3.10
by Aaron Bentley
 Optimize selection and retrieval of records  | 
718  | 
return record_map  | 
| 
1756.2.5
by Aaron Bentley
 Reduced read_records calls to 1  | 
719  | 
|
| 
1756.2.7
by Aaron Bentley
 Implement get_text in terms of get_texts  | 
720  | 
def get_text(self, version_id):  | 
721  | 
"""See VersionedFile.get_text"""  | 
|
722  | 
return self.get_texts([version_id])[0]  | 
|
723  | 
||
| 
1756.2.1
by Aaron Bentley
 Implement get_texts  | 
724  | 
def get_texts(self, version_ids):  | 
| 
1756.2.8
by Aaron Bentley
 Implement get_line_list, cleanups  | 
725  | 
return [''.join(l) for l in self.get_line_list(version_ids)]  | 
726  | 
||
727  | 
def get_line_list(self, version_ids):  | 
|
| 
1756.2.1
by Aaron Bentley
 Implement get_texts  | 
728  | 
"""Return the texts of listed versions as a list of strings."""  | 
| 
1756.3.13
by Aaron Bentley
 Refactor get_line_list into _get_content  | 
729  | 
text_map, content_map = self._get_content_maps(version_ids)  | 
730  | 
return [text_map[v] for v in version_ids]  | 
|
731  | 
||
732  | 
def _get_content_maps(self, version_ids):  | 
|
| 
1756.3.19
by Aaron Bentley
 Documentation and cleanups  | 
733  | 
"""Produce maps of text and KnitContents  | 
734  | 
        
 | 
|
735  | 
        :return: (text_map, content_map) where text_map contains the texts for
 | 
|
736  | 
        the requested versions and content_map contains the KnitContents.
 | 
|
| 
1756.3.22
by Aaron Bentley
 Tweaks from review  | 
737  | 
        Both dicts take version_ids as their keys.
 | 
| 
1756.3.19
by Aaron Bentley
 Documentation and cleanups  | 
738  | 
        """
 | 
| 
1756.3.10
by Aaron Bentley
 Optimize selection and retrieval of records  | 
739  | 
for version_id in version_ids:  | 
| 
1756.2.1
by Aaron Bentley
 Implement get_texts  | 
740  | 
if not self.has_version(version_id):  | 
741  | 
raise RevisionNotPresent(version_id, self.filename)  | 
|
| 
1756.3.12
by Aaron Bentley
 Stuff all text-building data in record_map  | 
742  | 
record_map = self._get_record_map(version_ids)  | 
| 
1756.2.5
by Aaron Bentley
 Reduced read_records calls to 1  | 
743  | 
|
| 
1756.2.8
by Aaron Bentley
 Implement get_line_list, cleanups  | 
744  | 
text_map = {}  | 
| 
1756.3.7
by Aaron Bentley
 Avoid re-parsing texts version components  | 
745  | 
content_map = {}  | 
| 
1756.3.14
by Aaron Bentley
 Handle the intermediate and final representations of no-final-eol texts  | 
746  | 
final_content = {}  | 
| 
1756.3.10
by Aaron Bentley
 Optimize selection and retrieval of records  | 
747  | 
for version_id in version_ids:  | 
748  | 
components = []  | 
|
749  | 
cursor = version_id  | 
|
750  | 
while cursor is not None:  | 
|
| 
1756.3.12
by Aaron Bentley
 Stuff all text-building data in record_map  | 
751  | 
method, data, digest, next = record_map[cursor]  | 
| 
1756.3.10
by Aaron Bentley
 Optimize selection and retrieval of records  | 
752  | 
components.append((cursor, method, data, digest))  | 
753  | 
if cursor in content_map:  | 
|
754  | 
                    break
 | 
|
755  | 
cursor = next  | 
|
756  | 
||
| 
1756.2.1
by Aaron Bentley
 Implement get_texts  | 
757  | 
content = None  | 
| 
1756.2.7
by Aaron Bentley
 Implement get_text in terms of get_texts  | 
758  | 
for component_id, method, data, digest in reversed(components):  | 
| 
1756.3.7
by Aaron Bentley
 Avoid re-parsing texts version components  | 
759  | 
if component_id in content_map:  | 
760  | 
content = content_map[component_id]  | 
|
| 
1756.3.8
by Aaron Bentley
 Avoid unused calls, use generators, sets instead of lists  | 
761  | 
else:  | 
762  | 
version_idx = self._index.lookup(component_id)  | 
|
763  | 
if method == 'fulltext':  | 
|
764  | 
assert content is None  | 
|
765  | 
content = self.factory.parse_fulltext(data, version_idx)  | 
|
766  | 
elif method == 'line-delta':  | 
|
767  | 
delta = self.factory.parse_line_delta(data[:],  | 
|
768  | 
version_idx)  | 
|
769  | 
content = content.copy()  | 
|
770  | 
content._lines = self._apply_delta(content._lines,  | 
|
771  | 
delta)  | 
|
| 
1756.3.14
by Aaron Bentley
 Handle the intermediate and final representations of no-final-eol texts  | 
772  | 
content_map[component_id] = content  | 
| 
1756.2.1
by Aaron Bentley
 Implement get_texts  | 
773  | 
|
774  | 
if 'no-eol' in self._index.get_options(version_id):  | 
|
| 
1756.3.14
by Aaron Bentley
 Handle the intermediate and final representations of no-final-eol texts  | 
775  | 
content = content.copy()  | 
| 
1756.2.1
by Aaron Bentley
 Implement get_texts  | 
776  | 
line = content._lines[-1][1].rstrip('\n')  | 
777  | 
content._lines[-1] = (content._lines[-1][0], line)  | 
|
| 
1756.3.14
by Aaron Bentley
 Handle the intermediate and final representations of no-final-eol texts  | 
778  | 
final_content[version_id] = content  | 
| 
1756.2.1
by Aaron Bentley
 Implement get_texts  | 
779  | 
|
780  | 
            # digest here is the digest from the last applied component.
 | 
|
| 
1756.3.6
by Aaron Bentley
 More multi-text extraction  | 
781  | 
text = content.text()  | 
782  | 
if sha_strings(text) != digest:  | 
|
| 
1756.2.8
by Aaron Bentley
 Implement get_line_list, cleanups  | 
783  | 
raise KnitCorrupt(self.filename,  | 
784  | 
'sha-1 does not match %s' % version_id)  | 
|
| 
1756.2.1
by Aaron Bentley
 Implement get_texts  | 
785  | 
|
| 
1756.3.6
by Aaron Bentley
 More multi-text extraction  | 
786  | 
text_map[version_id] = text  | 
| 
1756.3.14
by Aaron Bentley
 Handle the intermediate and final representations of no-final-eol texts  | 
787  | 
return text_map, final_content  | 
| 
1756.2.1
by Aaron Bentley
 Implement get_texts  | 
788  | 
|
| 
1594.2.6
by Robert Collins
 Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved.  | 
789  | 
def iter_lines_added_or_present_in_versions(self, version_ids=None):  | 
790  | 
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""  | 
|
791  | 
if version_ids is None:  | 
|
792  | 
version_ids = self.versions()  | 
|
| 
1759.2.2
by Jelmer Vernooij
 Revert some of my spelling fixes and fix some typos after review by Aaron.  | 
793  | 
        # we don't care about inclusions, the caller cares.
 | 
| 
1594.2.6
by Robert Collins
 Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved.  | 
794  | 
        # but we need to setup a list of records to visit.
 | 
795  | 
        # we need version_id, position, length
 | 
|
796  | 
version_id_records = []  | 
|
| 
1594.3.1
by Robert Collins
 Merge transaction finalisation and ensure iter_lines_added_or_present in knits does a old-to-new read in the knit.  | 
797  | 
requested_versions = list(version_ids)  | 
798  | 
        # filter for available versions
 | 
|
799  | 
for version_id in requested_versions:  | 
|
| 
1594.2.6
by Robert Collins
 Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved.  | 
800  | 
if not self.has_version(version_id):  | 
801  | 
raise RevisionNotPresent(version_id, self.filename)  | 
|
| 
1594.3.1
by Robert Collins
 Merge transaction finalisation and ensure iter_lines_added_or_present in knits does a old-to-new read in the knit.  | 
802  | 
        # get a in-component-order queue:
 | 
803  | 
version_ids = []  | 
|
804  | 
for version_id in self.versions():  | 
|
805  | 
if version_id in requested_versions:  | 
|
806  | 
version_ids.append(version_id)  | 
|
807  | 
data_pos, length = self._index.get_position(version_id)  | 
|
808  | 
version_id_records.append((version_id, data_pos, length))  | 
|
809  | 
||
| 
1594.2.17
by Robert Collins
 Better readv coalescing, now with test, and progress during knit index reading.  | 
810  | 
pb = bzrlib.ui.ui_factory.nested_progress_bar()  | 
811  | 
count = 0  | 
|
812  | 
total = len(version_id_records)  | 
|
813  | 
try:  | 
|
| 
1594.2.19
by Robert Collins
 More coalescing tweaks, and knit feedback.  | 
814  | 
pb.update('Walking content.', count, total)  | 
| 
1594.2.17
by Robert Collins
 Better readv coalescing, now with test, and progress during knit index reading.  | 
815  | 
for version_id, data, sha_value in \  | 
| 
1863.1.9
by John Arbash Meinel
 Switching to have 'read_records_iter' return in random order.  | 
816  | 
self._data.read_records_iter(version_id_records):  | 
| 
1594.2.17
by Robert Collins
 Better readv coalescing, now with test, and progress during knit index reading.  | 
817  | 
pb.update('Walking content.', count, total)  | 
818  | 
method = self._index.get_method(version_id)  | 
|
819  | 
version_idx = self._index.lookup(version_id)  | 
|
820  | 
assert method in ('fulltext', 'line-delta')  | 
|
821  | 
if method == 'fulltext':  | 
|
822  | 
content = self.factory.parse_fulltext(data, version_idx)  | 
|
823  | 
for line in content.text():  | 
|
| 
1594.2.6
by Robert Collins
 Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved.  | 
824  | 
yield line  | 
| 
1594.2.17
by Robert Collins
 Better readv coalescing, now with test, and progress during knit index reading.  | 
825  | 
else:  | 
826  | 
delta = self.factory.parse_line_delta(data, version_idx)  | 
|
827  | 
for start, end, count, lines in delta:  | 
|
828  | 
for origin, line in lines:  | 
|
829  | 
yield line  | 
|
830  | 
count +=1  | 
|
| 
1594.2.19
by Robert Collins
 More coalescing tweaks, and knit feedback.  | 
831  | 
pb.update('Walking content.', total, total)  | 
832  | 
pb.finished()  | 
|
| 
1594.2.17
by Robert Collins
 Better readv coalescing, now with test, and progress during knit index reading.  | 
833  | 
except:  | 
834  | 
pb.update('Walking content.', total, total)  | 
|
835  | 
pb.finished()  | 
|
836  | 
            raise
 | 
|
| 
1594.2.6
by Robert Collins
 Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved.  | 
837  | 
|
| 
1563.2.18
by Robert Collins
 get knit repositories really using knits for text storage.  | 
838  | 
def num_versions(self):  | 
839  | 
"""See VersionedFile.num_versions()."""  | 
|
840  | 
return self._index.num_versions()  | 
|
841  | 
||
842  | 
__len__ = num_versions  | 
|
843  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
844  | 
def annotate_iter(self, version_id):  | 
845  | 
"""See VersionedFile.annotate_iter."""  | 
|
846  | 
content = self._get_content(version_id)  | 
|
847  | 
for origin, text in content.annotate_iter():  | 
|
| 
1596.2.7
by Robert Collins
 Remove the requirement for reannotation in knit joins.  | 
848  | 
yield origin, text  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
849  | 
|
850  | 
def get_parents(self, version_id):  | 
|
851  | 
"""See VersionedFile.get_parents."""  | 
|
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
852  | 
        # perf notes:
 | 
853  | 
        # optimism counts!
 | 
|
854  | 
        # 52554 calls in 1264 872 internal down from 3674
 | 
|
855  | 
try:  | 
|
856  | 
return self._index.get_parents(version_id)  | 
|
857  | 
except KeyError:  | 
|
858  | 
raise RevisionNotPresent(version_id, self.filename)  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
859  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
860  | 
def get_parents_with_ghosts(self, version_id):  | 
861  | 
"""See VersionedFile.get_parents."""  | 
|
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
862  | 
try:  | 
863  | 
return self._index.get_parents_with_ghosts(version_id)  | 
|
864  | 
except KeyError:  | 
|
865  | 
raise RevisionNotPresent(version_id, self.filename)  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
866  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
867  | 
def get_ancestry(self, versions):  | 
868  | 
"""See VersionedFile.get_ancestry."""  | 
|
869  | 
if isinstance(versions, basestring):  | 
|
870  | 
versions = [versions]  | 
|
871  | 
if not versions:  | 
|
872  | 
return []  | 
|
873  | 
self._check_versions_present(versions)  | 
|
874  | 
return self._index.get_ancestry(versions)  | 
|
875  | 
||
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
876  | 
def get_ancestry_with_ghosts(self, versions):  | 
877  | 
"""See VersionedFile.get_ancestry_with_ghosts."""  | 
|
878  | 
if isinstance(versions, basestring):  | 
|
879  | 
versions = [versions]  | 
|
880  | 
if not versions:  | 
|
881  | 
return []  | 
|
882  | 
self._check_versions_present(versions)  | 
|
883  | 
return self._index.get_ancestry_with_ghosts(versions)  | 
|
884  | 
||
| 
1594.2.6
by Robert Collins
 Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved.  | 
885  | 
    #@deprecated_method(zero_eight)
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
886  | 
def walk(self, version_ids):  | 
887  | 
"""See VersionedFile.walk."""  | 
|
888  | 
        # We take the short path here, and extract all relevant texts
 | 
|
889  | 
        # and put them in a weave and let that do all the work.  Far
 | 
|
890  | 
        # from optimal, but is much simpler.
 | 
|
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
891  | 
        # FIXME RB 20060228 this really is inefficient!
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
892  | 
from bzrlib.weave import Weave  | 
893  | 
||
894  | 
w = Weave(self.filename)  | 
|
895  | 
ancestry = self.get_ancestry(version_ids)  | 
|
896  | 
sorted_graph = topo_sort(self._index.get_graph())  | 
|
897  | 
version_list = [vid for vid in sorted_graph if vid in ancestry]  | 
|
898  | 
||
899  | 
for version_id in version_list:  | 
|
900  | 
lines = self.get_lines(version_id)  | 
|
901  | 
w.add_lines(version_id, self.get_parents(version_id), lines)  | 
|
902  | 
||
903  | 
for lineno, insert_id, dset, line in w.walk(version_ids):  | 
|
904  | 
yield lineno, insert_id, dset, line  | 
|
905  | 
||
| 
1664.2.3
by Aaron Bentley
 Add failing test case  | 
906  | 
def plan_merge(self, ver_a, ver_b):  | 
| 
1664.2.11
by Aaron Bentley
 Clarifications from merge review  | 
907  | 
"""See VersionedFile.plan_merge."""  | 
| 
1664.2.6
by Aaron Bentley
 Got plan-merge passing tests  | 
908  | 
ancestors_b = set(self.get_ancestry(ver_b))  | 
909  | 
def status_a(revision, text):  | 
|
910  | 
if revision in ancestors_b:  | 
|
911  | 
return 'killed-b', text  | 
|
912  | 
else:  | 
|
913  | 
return 'new-a', text  | 
|
914  | 
||
915  | 
ancestors_a = set(self.get_ancestry(ver_a))  | 
|
916  | 
def status_b(revision, text):  | 
|
917  | 
if revision in ancestors_a:  | 
|
918  | 
return 'killed-a', text  | 
|
919  | 
else:  | 
|
920  | 
return 'new-b', text  | 
|
921  | 
||
| 
1664.2.4
by Aaron Bentley
 Identify unchanged lines correctly  | 
922  | 
annotated_a = self.annotate(ver_a)  | 
923  | 
annotated_b = self.annotate(ver_b)  | 
|
| 
1664.2.11
by Aaron Bentley
 Clarifications from merge review  | 
924  | 
plain_a = [t for (a, t) in annotated_a]  | 
925  | 
plain_b = [t for (a, t) in annotated_b]  | 
|
| 
1711.2.11
by John Arbash Meinel
 Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher  | 
926  | 
blocks = KnitSequenceMatcher(None, plain_a, plain_b).get_matching_blocks()  | 
| 
1664.2.4
by Aaron Bentley
 Identify unchanged lines correctly  | 
927  | 
a_cur = 0  | 
928  | 
b_cur = 0  | 
|
929  | 
for ai, bi, l in blocks:  | 
|
| 
1664.2.13
by Aaron Bentley
 Knit plan_merge uses slices instead of xenumerate  | 
930  | 
            # process all mismatched sections
 | 
931  | 
            # (last mismatched section is handled because blocks always
 | 
|
932  | 
            # includes a 0-length last block)
 | 
|
933  | 
for revision, text in annotated_a[a_cur:ai]:  | 
|
| 
1664.2.6
by Aaron Bentley
 Got plan-merge passing tests  | 
934  | 
yield status_a(revision, text)  | 
| 
1664.2.13
by Aaron Bentley
 Knit plan_merge uses slices instead of xenumerate  | 
935  | 
for revision, text in annotated_b[b_cur:bi]:  | 
| 
1664.2.6
by Aaron Bentley
 Got plan-merge passing tests  | 
936  | 
yield status_b(revision, text)  | 
| 
1664.2.13
by Aaron Bentley
 Knit plan_merge uses slices instead of xenumerate  | 
937  | 
|
| 
1664.2.11
by Aaron Bentley
 Clarifications from merge review  | 
938  | 
            # and now the matched section
 | 
| 
1664.2.13
by Aaron Bentley
 Knit plan_merge uses slices instead of xenumerate  | 
939  | 
a_cur = ai + l  | 
940  | 
b_cur = bi + l  | 
|
941  | 
for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):  | 
|
| 
1664.2.4
by Aaron Bentley
 Identify unchanged lines correctly  | 
942  | 
assert text_a == text_b  | 
943  | 
yield "unchanged", text_a  | 
|
944  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
945  | 
|
946  | 
class _KnitComponentFile(object):  | 
|
947  | 
"""One of the files used to implement a knit database"""  | 
|
948  | 
||
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
949  | 
def __init__(self, transport, filename, mode, file_mode=None):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
950  | 
self._transport = transport  | 
951  | 
self._filename = filename  | 
|
952  | 
self._mode = mode  | 
|
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
953  | 
self._file_mode=file_mode  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
954  | 
|
955  | 
def write_header(self):  | 
|
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
956  | 
if self._transport.append(self._filename, StringIO(self.HEADER),  | 
957  | 
mode=self._file_mode):  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
958  | 
raise KnitCorrupt(self._filename, 'misaligned after writing header')  | 
959  | 
||
960  | 
def check_header(self, fp):  | 
|
| 
1641.1.2
by Robert Collins
 Change knit index files to be robust in the presence of partial writes.  | 
961  | 
line = fp.readline()  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
962  | 
if line != self.HEADER:  | 
963  | 
raise KnitHeaderError(badline=line)  | 
|
964  | 
||
965  | 
def commit(self):  | 
|
966  | 
"""Commit is a nop."""  | 
|
967  | 
||
968  | 
def __repr__(self):  | 
|
969  | 
return '%s(%s)' % (self.__class__.__name__, self._filename)  | 
|
970  | 
||
971  | 
||
972  | 
class _KnitIndex(_KnitComponentFile):  | 
|
973  | 
"""Manages knit index file.  | 
|
974  | 
||
975  | 
    The index is already kept in memory and read on startup, to enable
 | 
|
976  | 
    fast lookups of revision information.  The cursor of the index
 | 
|
977  | 
    file is always pointing to the end, making it easy to append
 | 
|
978  | 
    entries.
 | 
|
979  | 
||
980  | 
    _cache is a cache for fast mapping from version id to a Index
 | 
|
981  | 
    object.
 | 
|
982  | 
||
983  | 
    _history is a cache for fast mapping from indexes to version ids.
 | 
|
984  | 
||
985  | 
    The index data format is dictionary compressed when it comes to
 | 
|
986  | 
    parent references; a index entry may only have parents that with a
 | 
|
987  | 
    lover index number.  As a result, the index is topological sorted.
 | 
|
| 
1563.2.11
by Robert Collins
 Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis.  | 
988  | 
|
989  | 
    Duplicate entries may be written to the index for a single version id
 | 
|
990  | 
    if this is done then the latter one completely replaces the former:
 | 
|
991  | 
    this allows updates to correct version and parent information. 
 | 
|
992  | 
    Note that the two entries may share the delta, and that successive
 | 
|
993  | 
    annotations and references MUST point to the first entry.
 | 
|
| 
1641.1.2
by Robert Collins
 Change knit index files to be robust in the presence of partial writes.  | 
994  | 
|
995  | 
    The index file on disc contains a header, followed by one line per knit
 | 
|
996  | 
    record. The same revision can be present in an index file more than once.
 | 
|
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
997  | 
    The first occurrence gets assigned a sequence number starting from 0. 
 | 
| 
1641.1.2
by Robert Collins
 Change knit index files to be robust in the presence of partial writes.  | 
998  | 
    
 | 
999  | 
    The format of a single line is
 | 
|
1000  | 
    REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n
 | 
|
1001  | 
    REVISION_ID is a utf8-encoded revision id
 | 
|
1002  | 
    FLAGS is a comma separated list of flags about the record. Values include 
 | 
|
1003  | 
        no-eol, line-delta, fulltext.
 | 
|
1004  | 
    BYTE_OFFSET is the ascii representation of the byte offset in the data file
 | 
|
1005  | 
        that the the compressed data starts at.
 | 
|
1006  | 
    LENGTH is the ascii representation of the length of the data file.
 | 
|
1007  | 
    PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of
 | 
|
1008  | 
        REVISION_ID.
 | 
|
1009  | 
    PARENT_SEQUENCE_ID the ascii representation of the sequence number of a
 | 
|
1010  | 
        revision id already in the knit that is a parent of REVISION_ID.
 | 
|
1011  | 
    The ' :' marker is the end of record marker.
 | 
|
1012  | 
    
 | 
|
1013  | 
    partial writes:
 | 
|
1014  | 
    when a write is interrupted to the index file, it will result in a line that
 | 
|
1015  | 
    does not end in ' :'. If the ' :' is not present at the end of a line, or at
 | 
|
1016  | 
    the end of the file, then the record that is missing it will be ignored by
 | 
|
1017  | 
    the parser.
 | 
|
1018  | 
||
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
1019  | 
    When writing new records to the index file, the data is preceded by '\n'
 | 
| 
1641.1.2
by Robert Collins
 Change knit index files to be robust in the presence of partial writes.  | 
1020  | 
    to ensure that records always start on new lines even if the last write was
 | 
1021  | 
    interrupted. As a result its normal for the last line in the index to be
 | 
|
1022  | 
    missing a trailing newline. One can be added with no harmful effects.
 | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1023  | 
    """
 | 
1024  | 
||
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
1025  | 
HEADER = "# bzr knit index 8\n"  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1026  | 
|
| 
1596.2.18
by Robert Collins
 More microopimisations on index reading, now down to 16000 records/seconds.  | 
1027  | 
    # speed of knit parsing went from 280 ms to 280 ms with slots addition.
 | 
1028  | 
    # __slots__ = ['_cache', '_history', '_transport', '_filename']
 | 
|
1029  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1030  | 
def _cache_version(self, version_id, options, pos, size, parents):  | 
| 
1596.2.18
by Robert Collins
 More microopimisations on index reading, now down to 16000 records/seconds.  | 
1031  | 
"""Cache a version record in the history array and index cache.  | 
1032  | 
        
 | 
|
1033  | 
        This is inlined into __init__ for performance. KEEP IN SYNC.
 | 
|
1034  | 
        (It saves 60ms, 25% of the __init__ overhead on local 4000 record
 | 
|
1035  | 
         indexes).
 | 
|
1036  | 
        """
 | 
|
| 
1596.2.14
by Robert Collins
 Make knit parsing non quadratic?  | 
1037  | 
        # only want the _history index to reference the 1st index entry
 | 
1038  | 
        # for version_id
 | 
|
| 
1596.2.18
by Robert Collins
 More microopimisations on index reading, now down to 16000 records/seconds.  | 
1039  | 
if version_id not in self._cache:  | 
| 
1628.1.1
by Robert Collins
 Cache the index number of versions in the knit index's self._cache so that  | 
1040  | 
index = len(self._history)  | 
| 
1596.2.14
by Robert Collins
 Make knit parsing non quadratic?  | 
1041  | 
self._history.append(version_id)  | 
| 
1628.1.1
by Robert Collins
 Cache the index number of versions in the knit index's self._cache so that  | 
1042  | 
else:  | 
1043  | 
index = self._cache[version_id][5]  | 
|
1044  | 
self._cache[version_id] = (version_id,  | 
|
1045  | 
options,  | 
|
1046  | 
pos,  | 
|
1047  | 
size,  | 
|
1048  | 
parents,  | 
|
1049  | 
index)  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1050  | 
|
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
1051  | 
def __init__(self, transport, filename, mode, create=False, file_mode=None):  | 
1052  | 
_KnitComponentFile.__init__(self, transport, filename, mode, file_mode)  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1053  | 
self._cache = {}  | 
| 
1563.2.11
by Robert Collins
 Consolidate reweave and join as we have no separate usage, make reweave tests apply to all versionedfile implementations and deprecate the old reweave apis.  | 
1054  | 
        # position in _history is the 'official' index for a revision
 | 
1055  | 
        # but the values may have come from a newer entry.
 | 
|
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
1056  | 
        # so - wc -l of a knit index is != the number of unique names
 | 
| 
1773.4.1
by Martin Pool
 Add pyflakes makefile target; fix many warnings  | 
1057  | 
        # in the knit.
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1058  | 
self._history = []  | 
| 
1594.2.17
by Robert Collins
 Better readv coalescing, now with test, and progress during knit index reading.  | 
1059  | 
pb = bzrlib.ui.ui_factory.nested_progress_bar()  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1060  | 
try:  | 
| 
1594.2.17
by Robert Collins
 Better readv coalescing, now with test, and progress during knit index reading.  | 
1061  | 
count = 0  | 
1062  | 
total = 1  | 
|
1063  | 
try:  | 
|
1064  | 
pb.update('read knit index', count, total)  | 
|
1065  | 
fp = self._transport.get(self._filename)  | 
|
| 
1711.7.25
by John Arbash Meinel
 try/finally to close files, _KnitData was keeping a handle to a file it never used again, and using transport.rename() when it wanted transport.move()  | 
1066  | 
try:  | 
1067  | 
self.check_header(fp)  | 
|
1068  | 
                    # readlines reads the whole file at once:
 | 
|
1069  | 
                    # bad for transports like http, good for local disk
 | 
|
1070  | 
                    # we save 60 ms doing this one change (
 | 
|
1071  | 
                    # from calling readline each time to calling
 | 
|
1072  | 
                    # readlines once.
 | 
|
1073  | 
                    # probably what we want for nice behaviour on
 | 
|
1074  | 
                    # http is a incremental readlines that yields, or
 | 
|
1075  | 
                    # a check for local vs non local indexes,
 | 
|
1076  | 
for l in fp.readlines():  | 
|
1077  | 
rec = l.split()  | 
|
1078  | 
if len(rec) < 5 or rec[-1] != ':':  | 
|
1079  | 
                            # corrupt line.
 | 
|
1080  | 
                            # FIXME: in the future we should determine if its a
 | 
|
1081  | 
                            # short write - and ignore it 
 | 
|
1082  | 
                            # or a different failure, and raise. RBC 20060407
 | 
|
1083  | 
                            continue
 | 
|
1084  | 
count += 1  | 
|
1085  | 
total += 1  | 
|
1086  | 
                        #pb.update('read knit index', count, total)
 | 
|
1087  | 
                        # See self._parse_parents
 | 
|
1088  | 
parents = []  | 
|
1089  | 
for value in rec[4:-1]:  | 
|
1090  | 
if '.' == value[0]:  | 
|
1091  | 
                                # uncompressed reference
 | 
|
1092  | 
parents.append(value[1:])  | 
|
1093  | 
else:  | 
|
1094  | 
                                # this is 15/4000ms faster than isinstance,
 | 
|
1095  | 
                                # (in lsprof)
 | 
|
1096  | 
                                # this function is called thousands of times a 
 | 
|
1097  | 
                                # second so small variations add up.
 | 
|
1098  | 
assert value.__class__ is str  | 
|
1099  | 
parents.append(self._history[int(value)])  | 
|
1100  | 
                        # end self._parse_parents
 | 
|
1101  | 
                        # self._cache_version(rec[0], 
 | 
|
1102  | 
                        #                     rec[1].split(','),
 | 
|
1103  | 
                        #                     int(rec[2]),
 | 
|
1104  | 
                        #                     int(rec[3]),
 | 
|
1105  | 
                        #                     parents)
 | 
|
1106  | 
                        # --- self._cache_version
 | 
|
1107  | 
                        # only want the _history index to reference the 1st 
 | 
|
1108  | 
                        # index entry for version_id
 | 
|
1109  | 
version_id = rec[0]  | 
|
1110  | 
if version_id not in self._cache:  | 
|
1111  | 
index = len(self._history)  | 
|
1112  | 
self._history.append(version_id)  | 
|
| 
1596.2.18
by Robert Collins
 More microopimisations on index reading, now down to 16000 records/seconds.  | 
1113  | 
else:  | 
| 
1711.7.25
by John Arbash Meinel
 try/finally to close files, _KnitData was keeping a handle to a file it never used again, and using transport.rename() when it wanted transport.move()  | 
1114  | 
index = self._cache[version_id][5]  | 
1115  | 
self._cache[version_id] = (version_id,  | 
|
1116  | 
rec[1].split(','),  | 
|
1117  | 
int(rec[2]),  | 
|
1118  | 
int(rec[3]),  | 
|
1119  | 
parents,  | 
|
1120  | 
index)  | 
|
1121  | 
                        # --- self._cache_version 
 | 
|
1122  | 
finally:  | 
|
1123  | 
fp.close()  | 
|
| 
1594.2.17
by Robert Collins
 Better readv coalescing, now with test, and progress during knit index reading.  | 
1124  | 
except NoSuchFile, e:  | 
1125  | 
if mode != 'w' or not create:  | 
|
1126  | 
                    raise
 | 
|
1127  | 
self.write_header()  | 
|
1128  | 
finally:  | 
|
1129  | 
pb.update('read knit index', total, total)  | 
|
1130  | 
pb.finished()  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1131  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1132  | 
def _parse_parents(self, compressed_parents):  | 
1133  | 
"""convert a list of string parent values into version ids.  | 
|
1134  | 
||
1135  | 
        ints are looked up in the index.
 | 
|
1136  | 
        .FOO values are ghosts and converted in to FOO.
 | 
|
| 
1596.2.18
by Robert Collins
 More microopimisations on index reading, now down to 16000 records/seconds.  | 
1137  | 
|
1138  | 
        NOTE: the function is retained here for clarity, and for possible
 | 
|
1139  | 
              use in partial index reads. However bulk processing now has
 | 
|
1140  | 
              it inlined in __init__ for inner-loop optimisation.
 | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1141  | 
        """
 | 
1142  | 
result = []  | 
|
1143  | 
for value in compressed_parents:  | 
|
| 
1596.2.15
by Robert Collins
 Microprofiling of knit parsing.  | 
1144  | 
if value[-1] == '.':  | 
| 
1596.2.18
by Robert Collins
 More microopimisations on index reading, now down to 16000 records/seconds.  | 
1145  | 
                # uncompressed reference
 | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1146  | 
result.append(value[1:])  | 
1147  | 
else:  | 
|
| 
1596.2.15
by Robert Collins
 Microprofiling of knit parsing.  | 
1148  | 
                # this is 15/4000ms faster than isinstance,
 | 
1149  | 
                # this function is called thousands of times a 
 | 
|
1150  | 
                # second so small variations add up.
 | 
|
1151  | 
assert value.__class__ is str  | 
|
| 
1596.2.11
by Robert Collins
 Remove utf8 debugging code  | 
1152  | 
result.append(self._history[int(value)])  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1153  | 
return result  | 
1154  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1155  | 
def get_graph(self):  | 
1156  | 
graph = []  | 
|
1157  | 
for version_id, index in self._cache.iteritems():  | 
|
1158  | 
graph.append((version_id, index[4]))  | 
|
1159  | 
return graph  | 
|
1160  | 
||
1161  | 
def get_ancestry(self, versions):  | 
|
1162  | 
"""See VersionedFile.get_ancestry."""  | 
|
| 
1563.2.35
by Robert Collins
 cleanup deprecation warnings and finish conversion so the inventory is knit based too.  | 
1163  | 
        # get a graph of all the mentioned versions:
 | 
1164  | 
graph = {}  | 
|
1165  | 
pending = set(versions)  | 
|
1166  | 
while len(pending):  | 
|
1167  | 
version = pending.pop()  | 
|
1168  | 
parents = self._cache[version][4]  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1169  | 
            # got the parents ok
 | 
1170  | 
            # trim ghosts
 | 
|
1171  | 
parents = [parent for parent in parents if parent in self._cache]  | 
|
| 
1563.2.35
by Robert Collins
 cleanup deprecation warnings and finish conversion so the inventory is knit based too.  | 
1172  | 
for parent in parents:  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1173  | 
                # if not completed and not a ghost
 | 
| 
1563.2.35
by Robert Collins
 cleanup deprecation warnings and finish conversion so the inventory is knit based too.  | 
1174  | 
if parent not in graph:  | 
1175  | 
pending.add(parent)  | 
|
1176  | 
graph[version] = parents  | 
|
1177  | 
return topo_sort(graph.items())  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1178  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1179  | 
def get_ancestry_with_ghosts(self, versions):  | 
1180  | 
"""See VersionedFile.get_ancestry_with_ghosts."""  | 
|
1181  | 
        # get a graph of all the mentioned versions:
 | 
|
1182  | 
graph = {}  | 
|
1183  | 
pending = set(versions)  | 
|
1184  | 
while len(pending):  | 
|
1185  | 
version = pending.pop()  | 
|
1186  | 
try:  | 
|
1187  | 
parents = self._cache[version][4]  | 
|
1188  | 
except KeyError:  | 
|
1189  | 
                # ghost, fake it
 | 
|
1190  | 
graph[version] = []  | 
|
1191  | 
                pass
 | 
|
1192  | 
else:  | 
|
1193  | 
                # got the parents ok
 | 
|
1194  | 
for parent in parents:  | 
|
1195  | 
if parent not in graph:  | 
|
1196  | 
pending.add(parent)  | 
|
1197  | 
graph[version] = parents  | 
|
1198  | 
return topo_sort(graph.items())  | 
|
1199  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1200  | 
def num_versions(self):  | 
1201  | 
return len(self._history)  | 
|
1202  | 
||
1203  | 
__len__ = num_versions  | 
|
1204  | 
||
1205  | 
def get_versions(self):  | 
|
1206  | 
return self._history  | 
|
1207  | 
||
1208  | 
def idx_to_name(self, idx):  | 
|
1209  | 
return self._history[idx]  | 
|
1210  | 
||
1211  | 
def lookup(self, version_id):  | 
|
1212  | 
assert version_id in self._cache  | 
|
| 
1628.1.1
by Robert Collins
 Cache the index number of versions in the knit index's self._cache so that  | 
1213  | 
return self._cache[version_id][5]  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1214  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1215  | 
def _version_list_to_index(self, versions):  | 
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
1216  | 
encode_utf8 = cache_utf8.encode  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1217  | 
result_list = []  | 
1218  | 
for version in versions:  | 
|
1219  | 
if version in self._cache:  | 
|
| 
1628.1.1
by Robert Collins
 Cache the index number of versions in the knit index's self._cache so that  | 
1220  | 
                # -- inlined lookup() --
 | 
1221  | 
result_list.append(str(self._cache[version][5]))  | 
|
1222  | 
                # -- end lookup () --
 | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1223  | 
else:  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
1224  | 
result_list.append('.' + encode_utf8(version))  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1225  | 
return ' '.join(result_list)  | 
1226  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1227  | 
def add_version(self, version_id, options, pos, size, parents):  | 
1228  | 
"""Add a version record to the index."""  | 
|
| 
1692.4.1
by Robert Collins
 Multiple merges:  | 
1229  | 
self.add_versions(((version_id, options, pos, size, parents),))  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1230  | 
|
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
1231  | 
def add_versions(self, versions):  | 
1232  | 
"""Add multiple versions to the index.  | 
|
1233  | 
        
 | 
|
1234  | 
        :param versions: a list of tuples:
 | 
|
1235  | 
                         (version_id, options, pos, size, parents).
 | 
|
1236  | 
        """
 | 
|
1237  | 
lines = []  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
1238  | 
encode_utf8 = cache_utf8.encode  | 
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
1239  | 
for version_id, options, pos, size, parents in versions:  | 
| 
1911.2.1
by John Arbash Meinel
 Cache encode/decode operations, saves memory and time. Especially when committing a new kernel tree with 7.7M new lines to annotate  | 
1240  | 
line = "\n%s %s %s %s %s :" % (encode_utf8(version_id),  | 
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
1241  | 
','.join(options),  | 
1242  | 
pos,  | 
|
1243  | 
size,  | 
|
1244  | 
self._version_list_to_index(parents))  | 
|
| 
1692.4.1
by Robert Collins
 Multiple merges:  | 
1245  | 
assert isinstance(line, str), \  | 
1246  | 
'content must be utf-8 encoded: %r' % (line,)  | 
|
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
1247  | 
lines.append(line)  | 
1248  | 
self._transport.append(self._filename, StringIO(''.join(lines)))  | 
|
1249  | 
        # cache after writing, so that a failed write leads to missing cache
 | 
|
1250  | 
        # entries not extra ones. XXX TODO: RBC 20060502 in the event of a 
 | 
|
1251  | 
        # failure, reload the index or flush it or some such, to prevent
 | 
|
1252  | 
        # writing records that did complete twice.
 | 
|
1253  | 
for version_id, options, pos, size, parents in versions:  | 
|
1254  | 
self._cache_version(version_id, options, pos, size, parents)  | 
|
1255  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1256  | 
def has_version(self, version_id):  | 
1257  | 
"""True if the version is in the index."""  | 
|
1258  | 
return self._cache.has_key(version_id)  | 
|
1259  | 
||
1260  | 
def get_position(self, version_id):  | 
|
1261  | 
"""Return data position and size of specified version."""  | 
|
1262  | 
return (self._cache[version_id][2], \  | 
|
1263  | 
self._cache[version_id][3])  | 
|
1264  | 
||
1265  | 
def get_method(self, version_id):  | 
|
1266  | 
"""Return compression method of specified version."""  | 
|
1267  | 
options = self._cache[version_id][1]  | 
|
1268  | 
if 'fulltext' in options:  | 
|
1269  | 
return 'fulltext'  | 
|
1270  | 
else:  | 
|
1271  | 
assert 'line-delta' in options  | 
|
1272  | 
return 'line-delta'  | 
|
1273  | 
||
1274  | 
def get_options(self, version_id):  | 
|
1275  | 
return self._cache[version_id][1]  | 
|
1276  | 
||
1277  | 
def get_parents(self, version_id):  | 
|
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1278  | 
"""Return parents of specified version ignoring ghosts."""  | 
1279  | 
return [parent for parent in self._cache[version_id][4]  | 
|
1280  | 
if parent in self._cache]  | 
|
1281  | 
||
1282  | 
def get_parents_with_ghosts(self, version_id):  | 
|
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
1283  | 
"""Return parents of specified version with ghosts."""  | 
| 
1594.2.8
by Robert Collins
 add ghost aware apis to knits.  | 
1284  | 
return self._cache[version_id][4]  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1285  | 
|
1286  | 
def check_versions_present(self, version_ids):  | 
|
1287  | 
"""Check that all specified versions are present."""  | 
|
1288  | 
version_ids = set(version_ids)  | 
|
1289  | 
for version_id in list(version_ids):  | 
|
1290  | 
if version_id in self._cache:  | 
|
1291  | 
version_ids.remove(version_id)  | 
|
1292  | 
if version_ids:  | 
|
1293  | 
raise RevisionNotPresent(list(version_ids)[0], self.filename)  | 
|
1294  | 
||
1295  | 
||
1296  | 
class _KnitData(_KnitComponentFile):  | 
|
1297  | 
"""Contents of the knit data file"""  | 
|
1298  | 
||
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
1299  | 
HEADER = "# bzr knit data 8\n"  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1300  | 
|
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
1301  | 
def __init__(self, transport, filename, mode, create=False, file_mode=None):  | 
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
1302  | 
_KnitComponentFile.__init__(self, transport, filename, mode)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1303  | 
self._checked = False  | 
| 
1863.1.8
by John Arbash Meinel
 Removing disk-backed-cache  | 
1304  | 
        # TODO: jam 20060713 conceptually, this could spill to disk
 | 
1305  | 
        #       if the cached size gets larger than a certain amount
 | 
|
1306  | 
        #       but it complicates the model a bit, so for now just use
 | 
|
1307  | 
        #       a simple dictionary
 | 
|
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1308  | 
self._cache = {}  | 
1309  | 
self._do_cache = False  | 
|
| 
1563.2.35
by Robert Collins
 cleanup deprecation warnings and finish conversion so the inventory is knit based too.  | 
1310  | 
if create:  | 
| 
1666.1.6
by Robert Collins
 Make knit the default format.  | 
1311  | 
self._transport.put(self._filename, StringIO(''), mode=file_mode)  | 
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
1312  | 
|
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1313  | 
def enable_cache(self):  | 
1314  | 
"""Enable caching of reads."""  | 
|
| 
1863.1.8
by John Arbash Meinel
 Removing disk-backed-cache  | 
1315  | 
self._do_cache = True  | 
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1316  | 
|
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
1317  | 
def clear_cache(self):  | 
1318  | 
"""Clear the record cache."""  | 
|
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1319  | 
self._do_cache = False  | 
1320  | 
self._cache = {}  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1321  | 
|
1322  | 
def _open_file(self):  | 
|
| 
1711.7.25
by John Arbash Meinel
 try/finally to close files, _KnitData was keeping a handle to a file it never used again, and using transport.rename() when it wanted transport.move()  | 
1323  | 
try:  | 
1324  | 
return self._transport.get(self._filename)  | 
|
1325  | 
except NoSuchFile:  | 
|
1326  | 
            pass
 | 
|
1327  | 
return None  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1328  | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1329  | 
def _record_to_data(self, version_id, digest, lines):  | 
1330  | 
"""Convert version_id, digest, lines into a raw data block.  | 
|
1331  | 
        
 | 
|
1332  | 
        :return: (len, a StringIO instance with the raw data ready to read.)
 | 
|
1333  | 
        """
 | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1334  | 
sio = StringIO()  | 
1335  | 
data_file = GzipFile(None, mode='wb', fileobj=sio)  | 
|
| 
1908.4.10
by John Arbash Meinel
 Small cleanups  | 
1336  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
1337  | 
version_id_utf8 = cache_utf8.encode(version_id)  | 
| 
1908.4.5
by John Arbash Meinel
 Some small tweaks to knit and tuned_gzip to shave off another couple seconds  | 
1338  | 
data_file.writelines(chain(  | 
| 
1908.4.3
by John Arbash Meinel
 Shave another second off of _record_to_data time, by optimizing single write versus multiple writes  | 
1339  | 
["version %s %d %s\n" % (version_id_utf8,  | 
| 
1596.2.28
by Robert Collins
 more knit profile based tuning.  | 
1340  | 
len(lines),  | 
1341  | 
digest)],  | 
|
1342  | 
lines,  | 
|
| 
1908.4.5
by John Arbash Meinel
 Some small tweaks to knit and tuned_gzip to shave off another couple seconds  | 
1343  | 
["end %s\n" % version_id_utf8]))  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1344  | 
data_file.close()  | 
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1345  | 
length= sio.tell()  | 
| 
1596.2.28
by Robert Collins
 more knit profile based tuning.  | 
1346  | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1347  | 
sio.seek(0)  | 
1348  | 
return length, sio  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1349  | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1350  | 
def add_raw_record(self, raw_data):  | 
| 
1692.4.1
by Robert Collins
 Multiple merges:  | 
1351  | 
"""Append a prepared record to the data file.  | 
1352  | 
        
 | 
|
1353  | 
        :return: the offset in the data file raw_data was written.
 | 
|
1354  | 
        """
 | 
|
| 
1596.2.9
by Robert Collins
 Utf8 safety in knit indexes.  | 
1355  | 
assert isinstance(raw_data, str), 'data must be plain bytes'  | 
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
1356  | 
return self._transport.append(self._filename, StringIO(raw_data))  | 
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1357  | 
|
1358  | 
def add_record(self, version_id, digest, lines):  | 
|
1359  | 
"""Write new text record to disk. Returns the position in the  | 
|
1360  | 
        file where it was written."""
 | 
|
1361  | 
size, sio = self._record_to_data(version_id, digest, lines)  | 
|
1362  | 
        # write to disk
 | 
|
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
1363  | 
start_pos = self._transport.append(self._filename, sio)  | 
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1364  | 
if self._do_cache:  | 
1365  | 
self._cache[version_id] = sio.getvalue()  | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1366  | 
return start_pos, size  | 
1367  | 
||
1368  | 
def _parse_record_header(self, version_id, raw_data):  | 
|
1369  | 
"""Parse a record header for consistency.  | 
|
1370  | 
||
1371  | 
        :return: the header and the decompressor stream.
 | 
|
1372  | 
                 as (stream, header_record)
 | 
|
1373  | 
        """
 | 
|
1374  | 
df = GzipFile(mode='rb', fileobj=StringIO(raw_data))  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1375  | 
rec = df.readline().split()  | 
1376  | 
if len(rec) != 4:  | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1377  | 
raise KnitCorrupt(self._filename, 'unexpected number of elements in record header')  | 
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
1378  | 
if cache_utf8.decode(rec[1]) != version_id:  | 
| 
1594.3.3
by Robert Collins
 Bugfix error message output in knit error raising.  | 
1379  | 
raise KnitCorrupt(self._filename,  | 
| 
1594.3.4
by Robert Collins
 Change urllib ranges implementation to be one coalesced range per http request.  | 
1380  | 
'unexpected version, wanted %r, got %r' % (  | 
1381  | 
version_id, rec[1]))  | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1382  | 
return df, rec  | 
1383  | 
||
1384  | 
def _parse_record(self, version_id, data):  | 
|
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
1385  | 
        # profiling notes:
 | 
1386  | 
        # 4168 calls in 2880 217 internal
 | 
|
1387  | 
        # 4168 calls to _parse_record_header in 2121
 | 
|
1388  | 
        # 4168 calls to readlines in 330
 | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1389  | 
df, rec = self._parse_record_header(version_id, data)  | 
| 
1628.1.2
by Robert Collins
 More knit micro-optimisations.  | 
1390  | 
record_contents = df.readlines()  | 
1391  | 
l = record_contents.pop()  | 
|
1392  | 
assert len(record_contents) == int(rec[2])  | 
|
| 
1911.2.3
by John Arbash Meinel
 Moving everything into a new location so that we can cache more than just revision ids  | 
1393  | 
if l != 'end %s\n' % cache_utf8.encode(version_id):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1394  | 
raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'  | 
1395  | 
% (l, version_id))  | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1396  | 
df.close()  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1397  | 
return record_contents, rec[3]  | 
1398  | 
||
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1399  | 
def read_records_iter_raw(self, records):  | 
1400  | 
"""Read text records from data file and yield raw data.  | 
|
1401  | 
||
1402  | 
        This unpacks enough of the text record to validate the id is
 | 
|
1403  | 
        as expected but thats all.
 | 
|
1404  | 
        """
 | 
|
1405  | 
        # setup an iterator of the external records:
 | 
|
1406  | 
        # uses readv so nice and fast we hope.
 | 
|
| 
1756.3.23
by Aaron Bentley
 Remove knit caches  | 
1407  | 
if len(records):  | 
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1408  | 
            # grab the disk data needed.
 | 
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1409  | 
if self._cache:  | 
1410  | 
                # Don't check _cache if it is empty
 | 
|
1411  | 
needed_offsets = [(pos, size) for version_id, pos, size  | 
|
1412  | 
in records  | 
|
1413  | 
if version_id not in self._cache]  | 
|
1414  | 
else:  | 
|
1415  | 
needed_offsets = [(pos, size) for version_id, pos, size  | 
|
1416  | 
in records]  | 
|
1417  | 
||
1418  | 
raw_records = self._transport.readv(self._filename, needed_offsets)  | 
|
1419  | 
||
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1420  | 
|
1421  | 
for version_id, pos, size in records:  | 
|
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1422  | 
if version_id in self._cache:  | 
1423  | 
                # This data has already been validated
 | 
|
1424  | 
data = self._cache[version_id]  | 
|
1425  | 
else:  | 
|
1426  | 
pos, data = raw_records.next()  | 
|
1427  | 
if self._do_cache:  | 
|
1428  | 
self._cache[version_id] = data  | 
|
1429  | 
||
1430  | 
                # validate the header
 | 
|
1431  | 
df, rec = self._parse_record_header(version_id, data)  | 
|
1432  | 
df.close()  | 
|
| 
1756.3.23
by Aaron Bentley
 Remove knit caches  | 
1433  | 
yield version_id, data  | 
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1434  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1435  | 
def read_records_iter(self, records):  | 
1436  | 
"""Read text records from data file and yield result.  | 
|
1437  | 
||
| 
1863.1.5
by John Arbash Meinel
 Add a read_records_iter_unsorted, which can return records in any order.  | 
1438  | 
        The result will be returned in whatever is the fastest to read.
 | 
1439  | 
        Not by the order requested. Also, multiple requests for the same
 | 
|
1440  | 
        record will only yield 1 response.
 | 
|
1441  | 
        :param records: A list of (version_id, pos, len) entries
 | 
|
1442  | 
        :return: Yields (version_id, contents, digest) in the order
 | 
|
1443  | 
                 read, not the order requested
 | 
|
1444  | 
        """
 | 
|
1445  | 
if not records:  | 
|
1446  | 
            return
 | 
|
1447  | 
||
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1448  | 
if self._cache:  | 
| 
1863.1.5
by John Arbash Meinel
 Add a read_records_iter_unsorted, which can return records in any order.  | 
1449  | 
            # Skip records we have alread seen
 | 
1450  | 
yielded_records = set()  | 
|
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1451  | 
needed_records = set()  | 
1452  | 
for record in records:  | 
|
1453  | 
if record[0] in self._cache:  | 
|
| 
1863.1.5
by John Arbash Meinel
 Add a read_records_iter_unsorted, which can return records in any order.  | 
1454  | 
if record[0] in yielded_records:  | 
1455  | 
                        continue
 | 
|
1456  | 
yielded_records.add(record[0])  | 
|
1457  | 
data = self._cache[record[0]]  | 
|
1458  | 
content, digest = self._parse_record(record[0], data)  | 
|
1459  | 
yield (record[0], content, digest)  | 
|
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1460  | 
else:  | 
1461  | 
needed_records.add(record)  | 
|
1462  | 
needed_records = sorted(needed_records, key=operator.itemgetter(1))  | 
|
1463  | 
else:  | 
|
1464  | 
needed_records = sorted(set(records), key=operator.itemgetter(1))  | 
|
| 
1756.3.23
by Aaron Bentley
 Remove knit caches  | 
1465  | 
|
| 
1863.1.5
by John Arbash Meinel
 Add a read_records_iter_unsorted, which can return records in any order.  | 
1466  | 
if not needed_records:  | 
1467  | 
            return
 | 
|
1468  | 
||
1469  | 
        # The transport optimizes the fetching as well 
 | 
|
1470  | 
        # (ie, reads continuous ranges.)
 | 
|
1471  | 
readv_response = self._transport.readv(self._filename,  | 
|
1472  | 
[(pos, size) for version_id, pos, size in needed_records])  | 
|
1473  | 
||
1474  | 
for (version_id, pos, size), (pos, data) in \  | 
|
1475  | 
izip(iter(needed_records), readv_response):  | 
|
1476  | 
content, digest = self._parse_record(version_id, data)  | 
|
| 
1863.1.1
by John Arbash Meinel
 Allow Versioned files to do caching if explicitly asked, and implement for Knit  | 
1477  | 
if self._do_cache:  | 
| 
1863.1.5
by John Arbash Meinel
 Add a read_records_iter_unsorted, which can return records in any order.  | 
1478  | 
self._cache[version_id] = data  | 
| 
1756.3.23
by Aaron Bentley
 Remove knit caches  | 
1479  | 
yield version_id, content, digest  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1480  | 
|
1481  | 
def read_records(self, records):  | 
|
1482  | 
"""Read records into a dictionary."""  | 
|
1483  | 
components = {}  | 
|
| 
1863.1.5
by John Arbash Meinel
 Add a read_records_iter_unsorted, which can return records in any order.  | 
1484  | 
for record_id, content, digest in \  | 
| 
1863.1.9
by John Arbash Meinel
 Switching to have 'read_records_iter' return in random order.  | 
1485  | 
self.read_records_iter(records):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1486  | 
components[record_id] = (content, digest)  | 
1487  | 
return components  | 
|
1488  | 
||
| 
1563.2.13
by Robert Collins
 InterVersionedFile implemented.  | 
1489  | 
|
1490  | 
class InterKnit(InterVersionedFile):  | 
|
1491  | 
"""Optimised code paths for knit to knit operations."""  | 
|
1492  | 
||
| 
1684.3.3
by Robert Collins
 Add a special cased weaves to knit converter.  | 
1493  | 
_matching_file_from_factory = KnitVersionedFile  | 
1494  | 
_matching_file_to_factory = KnitVersionedFile  | 
|
| 
1563.2.13
by Robert Collins
 InterVersionedFile implemented.  | 
1495  | 
|
1496  | 
    @staticmethod
 | 
|
1497  | 
def is_compatible(source, target):  | 
|
1498  | 
"""Be compatible with knits. """  | 
|
1499  | 
try:  | 
|
1500  | 
return (isinstance(source, KnitVersionedFile) and  | 
|
1501  | 
isinstance(target, KnitVersionedFile))  | 
|
1502  | 
except AttributeError:  | 
|
1503  | 
return False  | 
|
1504  | 
||
| 
1563.2.31
by Robert Collins
 Convert Knit repositories to use knits.  | 
1505  | 
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):  | 
| 
1563.2.13
by Robert Collins
 InterVersionedFile implemented.  | 
1506  | 
"""See InterVersionedFile.join."""  | 
1507  | 
assert isinstance(self.source, KnitVersionedFile)  | 
|
1508  | 
assert isinstance(self.target, KnitVersionedFile)  | 
|
1509  | 
||
| 
1684.3.2
by Robert Collins
 Factor out version_ids-to-join selection in InterVersionedfile.  | 
1510  | 
version_ids = self._get_source_version_ids(version_ids, ignore_missing)  | 
| 
1563.2.31
by Robert Collins
 Convert Knit repositories to use knits.  | 
1511  | 
|
| 
1563.2.13
by Robert Collins
 InterVersionedFile implemented.  | 
1512  | 
if not version_ids:  | 
1513  | 
return 0  | 
|
1514  | 
||
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
1515  | 
pb = bzrlib.ui.ui_factory.nested_progress_bar()  | 
1516  | 
try:  | 
|
1517  | 
version_ids = list(version_ids)  | 
|
1518  | 
if None in version_ids:  | 
|
1519  | 
version_ids.remove(None)  | 
|
1520  | 
||
1521  | 
self.source_ancestry = set(self.source.get_ancestry(version_ids))  | 
|
1522  | 
this_versions = set(self.target._index.get_versions())  | 
|
1523  | 
needed_versions = self.source_ancestry - this_versions  | 
|
1524  | 
cross_check_versions = self.source_ancestry.intersection(this_versions)  | 
|
1525  | 
mismatched_versions = set()  | 
|
1526  | 
for version in cross_check_versions:  | 
|
1527  | 
                # scan to include needed parents.
 | 
|
1528  | 
n1 = set(self.target.get_parents_with_ghosts(version))  | 
|
1529  | 
n2 = set(self.source.get_parents_with_ghosts(version))  | 
|
1530  | 
if n1 != n2:  | 
|
1531  | 
                    # FIXME TEST this check for cycles being introduced works
 | 
|
1532  | 
                    # the logic is we have a cycle if in our graph we are an
 | 
|
1533  | 
                    # ancestor of any of the n2 revisions.
 | 
|
1534  | 
for parent in n2:  | 
|
1535  | 
if parent in n1:  | 
|
1536  | 
                            # safe
 | 
|
1537  | 
                            continue
 | 
|
1538  | 
else:  | 
|
1539  | 
parent_ancestors = self.source.get_ancestry(parent)  | 
|
1540  | 
if version in parent_ancestors:  | 
|
1541  | 
raise errors.GraphCycleError([parent, version])  | 
|
1542  | 
                    # ensure this parent will be available later.
 | 
|
1543  | 
new_parents = n2.difference(n1)  | 
|
1544  | 
needed_versions.update(new_parents.difference(this_versions))  | 
|
1545  | 
mismatched_versions.add(version)  | 
|
1546  | 
||
| 
1684.3.3
by Robert Collins
 Add a special cased weaves to knit converter.  | 
1547  | 
if not needed_versions and not mismatched_versions:  | 
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
1548  | 
return 0  | 
1549  | 
full_list = topo_sort(self.source.get_graph())  | 
|
1550  | 
||
1551  | 
version_list = [i for i in full_list if (not self.target.has_version(i)  | 
|
1552  | 
and i in needed_versions)]  | 
|
1553  | 
||
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1554  | 
            # plan the join:
 | 
1555  | 
copy_queue = []  | 
|
1556  | 
copy_queue_records = []  | 
|
1557  | 
copy_set = set()  | 
|
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
1558  | 
for version_id in version_list:  | 
1559  | 
options = self.source._index.get_options(version_id)  | 
|
1560  | 
parents = self.source._index.get_parents_with_ghosts(version_id)  | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1561  | 
                # check that its will be a consistent copy:
 | 
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
1562  | 
for parent in parents:  | 
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1563  | 
                    # if source has the parent, we must :
 | 
1564  | 
                    # * already have it or
 | 
|
1565  | 
                    # * have it scheduled already
 | 
|
| 
1759.2.2
by Jelmer Vernooij
 Revert some of my spelling fixes and fix some typos after review by Aaron.  | 
1566  | 
                    # otherwise we don't care
 | 
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1567  | 
assert (self.target.has_version(parent) or  | 
1568  | 
parent in copy_set or  | 
|
1569  | 
not self.source.has_version(parent))  | 
|
1570  | 
data_pos, data_size = self.source._index.get_position(version_id)  | 
|
1571  | 
copy_queue_records.append((version_id, data_pos, data_size))  | 
|
1572  | 
copy_queue.append((version_id, options, parents))  | 
|
1573  | 
copy_set.add(version_id)  | 
|
1574  | 
||
1575  | 
            # data suck the join:
 | 
|
1576  | 
count = 0  | 
|
1577  | 
total = len(version_list)  | 
|
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
1578  | 
raw_datum = []  | 
1579  | 
raw_records = []  | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1580  | 
for (version_id, raw_data), \  | 
1581  | 
(version_id2, options, parents) in \  | 
|
1582  | 
izip(self.source._data.read_records_iter_raw(copy_queue_records),  | 
|
1583  | 
copy_queue):  | 
|
1584  | 
assert version_id == version_id2, 'logic error, inconsistent results'  | 
|
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
1585  | 
count = count + 1  | 
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1586  | 
pb.update("Joining knit", count, total)  | 
| 
1692.2.1
by Robert Collins
 Fix knit based push to only perform 2 appends to the target, rather that 2*new-versions.  | 
1587  | 
raw_records.append((version_id, options, parents, len(raw_data)))  | 
1588  | 
raw_datum.append(raw_data)  | 
|
1589  | 
self.target._add_raw_records(raw_records, ''.join(raw_datum))  | 
|
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1590  | 
|
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
1591  | 
for version in mismatched_versions:  | 
| 
1596.2.8
by Robert Collins
 Join knits with the original gzipped data avoiding recompression.  | 
1592  | 
                # FIXME RBC 20060309 is this needed?
 | 
| 
1594.2.24
by Robert Collins
 Make use of the transaction finalisation warning support to implement in-knit caching.  | 
1593  | 
n1 = set(self.target.get_parents_with_ghosts(version))  | 
1594  | 
n2 = set(self.source.get_parents_with_ghosts(version))  | 
|
1595  | 
                # write a combined record to our history preserving the current 
 | 
|
1596  | 
                # parents as first in the list
 | 
|
1597  | 
new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))  | 
|
1598  | 
self.target.fix_parents(version, new_parents)  | 
|
1599  | 
return count  | 
|
1600  | 
finally:  | 
|
1601  | 
pb.finished()  | 
|
| 
1563.2.13
by Robert Collins
 InterVersionedFile implemented.  | 
1602  | 
|
1603  | 
||
1604  | 
InterVersionedFile.register_optimiser(InterKnit)  | 
|
| 
1596.2.24
by Robert Collins
 Gzipfile was slightly slower than ideal.  | 
1605  | 
|
1606  | 
||
| 
1684.3.3
by Robert Collins
 Add a special cased weaves to knit converter.  | 
1607  | 
class WeaveToKnit(InterVersionedFile):  | 
1608  | 
"""Optimised code paths for weave to knit operations."""  | 
|
1609  | 
||
1610  | 
_matching_file_from_factory = bzrlib.weave.WeaveFile  | 
|
1611  | 
_matching_file_to_factory = KnitVersionedFile  | 
|
1612  | 
||
1613  | 
    @staticmethod
 | 
|
1614  | 
def is_compatible(source, target):  | 
|
1615  | 
"""Be compatible with weaves to knits."""  | 
|
1616  | 
try:  | 
|
1617  | 
return (isinstance(source, bzrlib.weave.Weave) and  | 
|
1618  | 
isinstance(target, KnitVersionedFile))  | 
|
1619  | 
except AttributeError:  | 
|
1620  | 
return False  | 
|
1621  | 
||
1622  | 
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):  | 
|
1623  | 
"""See InterVersionedFile.join."""  | 
|
1624  | 
assert isinstance(self.source, bzrlib.weave.Weave)  | 
|
1625  | 
assert isinstance(self.target, KnitVersionedFile)  | 
|
1626  | 
||
1627  | 
version_ids = self._get_source_version_ids(version_ids, ignore_missing)  | 
|
1628  | 
||
1629  | 
if not version_ids:  | 
|
1630  | 
return 0  | 
|
1631  | 
||
1632  | 
pb = bzrlib.ui.ui_factory.nested_progress_bar()  | 
|
1633  | 
try:  | 
|
1634  | 
version_ids = list(version_ids)  | 
|
1635  | 
||
1636  | 
self.source_ancestry = set(self.source.get_ancestry(version_ids))  | 
|
1637  | 
this_versions = set(self.target._index.get_versions())  | 
|
1638  | 
needed_versions = self.source_ancestry - this_versions  | 
|
1639  | 
cross_check_versions = self.source_ancestry.intersection(this_versions)  | 
|
1640  | 
mismatched_versions = set()  | 
|
1641  | 
for version in cross_check_versions:  | 
|
1642  | 
                # scan to include needed parents.
 | 
|
1643  | 
n1 = set(self.target.get_parents_with_ghosts(version))  | 
|
1644  | 
n2 = set(self.source.get_parents(version))  | 
|
1645  | 
                # if all of n2's parents are in n1, then its fine.
 | 
|
1646  | 
if n2.difference(n1):  | 
|
1647  | 
                    # FIXME TEST this check for cycles being introduced works
 | 
|
1648  | 
                    # the logic is we have a cycle if in our graph we are an
 | 
|
1649  | 
                    # ancestor of any of the n2 revisions.
 | 
|
1650  | 
for parent in n2:  | 
|
1651  | 
if parent in n1:  | 
|
1652  | 
                            # safe
 | 
|
1653  | 
                            continue
 | 
|
1654  | 
else:  | 
|
1655  | 
parent_ancestors = self.source.get_ancestry(parent)  | 
|
1656  | 
if version in parent_ancestors:  | 
|
1657  | 
raise errors.GraphCycleError([parent, version])  | 
|
1658  | 
                    # ensure this parent will be available later.
 | 
|
1659  | 
new_parents = n2.difference(n1)  | 
|
1660  | 
needed_versions.update(new_parents.difference(this_versions))  | 
|
1661  | 
mismatched_versions.add(version)  | 
|
1662  | 
||
1663  | 
if not needed_versions and not mismatched_versions:  | 
|
1664  | 
return 0  | 
|
1665  | 
full_list = topo_sort(self.source.get_graph())  | 
|
1666  | 
||
1667  | 
version_list = [i for i in full_list if (not self.target.has_version(i)  | 
|
1668  | 
and i in needed_versions)]  | 
|
1669  | 
||
1670  | 
            # do the join:
 | 
|
1671  | 
count = 0  | 
|
1672  | 
total = len(version_list)  | 
|
1673  | 
for version_id in version_list:  | 
|
1674  | 
pb.update("Converting to knit", count, total)  | 
|
1675  | 
parents = self.source.get_parents(version_id)  | 
|
1676  | 
                # check that its will be a consistent copy:
 | 
|
1677  | 
for parent in parents:  | 
|
1678  | 
                    # if source has the parent, we must already have it
 | 
|
1679  | 
assert (self.target.has_version(parent))  | 
|
1680  | 
self.target.add_lines(  | 
|
1681  | 
version_id, parents, self.source.get_lines(version_id))  | 
|
1682  | 
count = count + 1  | 
|
1683  | 
||
1684  | 
for version in mismatched_versions:  | 
|
1685  | 
                # FIXME RBC 20060309 is this needed?
 | 
|
1686  | 
n1 = set(self.target.get_parents_with_ghosts(version))  | 
|
1687  | 
n2 = set(self.source.get_parents(version))  | 
|
1688  | 
                # write a combined record to our history preserving the current 
 | 
|
1689  | 
                # parents as first in the list
 | 
|
1690  | 
new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))  | 
|
1691  | 
self.target.fix_parents(version, new_parents)  | 
|
1692  | 
return count  | 
|
1693  | 
finally:  | 
|
1694  | 
pb.finished()  | 
|
1695  | 
||
1696  | 
||
1697  | 
InterVersionedFile.register_optimiser(WeaveToKnit)  | 
|
1698  | 
||
1699  | 
||
| 
1711.2.11
by John Arbash Meinel
 Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher  | 
1700  | 
class KnitSequenceMatcher(difflib.SequenceMatcher):  | 
| 
1596.2.35
by Robert Collins
 Subclass SequenceMatcher to get a slightly faster (in our case) find_longest_match routine.  | 
1701  | 
"""Knit tuned sequence matcher.  | 
1702  | 
||
1703  | 
    This is based on profiling of difflib which indicated some improvements
 | 
|
1704  | 
    for our usage pattern.
 | 
|
1705  | 
    """
 | 
|
1706  | 
||
1707  | 
def find_longest_match(self, alo, ahi, blo, bhi):  | 
|
1708  | 
"""Find longest matching block in a[alo:ahi] and b[blo:bhi].  | 
|
1709  | 
||
1710  | 
        If isjunk is not defined:
 | 
|
1711  | 
||
1712  | 
        Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
 | 
|
1713  | 
            alo <= i <= i+k <= ahi
 | 
|
1714  | 
            blo <= j <= j+k <= bhi
 | 
|
1715  | 
        and for all (i',j',k') meeting those conditions,
 | 
|
1716  | 
            k >= k'
 | 
|
1717  | 
            i <= i'
 | 
|
1718  | 
            and if i == i', j <= j'
 | 
|
1719  | 
||
1720  | 
        In other words, of all maximal matching blocks, return one that
 | 
|
1721  | 
        starts earliest in a, and of all those maximal matching blocks that
 | 
|
1722  | 
        start earliest in a, return the one that starts earliest in b.
 | 
|
1723  | 
||
1724  | 
        >>> s = SequenceMatcher(None, " abcd", "abcd abcd")
 | 
|
1725  | 
        >>> s.find_longest_match(0, 5, 0, 9)
 | 
|
1726  | 
        (0, 4, 5)
 | 
|
1727  | 
||
1728  | 
        If isjunk is defined, first the longest matching block is
 | 
|
1729  | 
        determined as above, but with the additional restriction that no
 | 
|
1730  | 
        junk element appears in the block.  Then that block is extended as
 | 
|
1731  | 
        far as possible by matching (only) junk elements on both sides.  So
 | 
|
1732  | 
        the resulting block never matches on junk except as identical junk
 | 
|
1733  | 
        happens to be adjacent to an "interesting" match.
 | 
|
1734  | 
||
1735  | 
        Here's the same example as before, but considering blanks to be
 | 
|
1736  | 
        junk.  That prevents " abcd" from matching the " abcd" at the tail
 | 
|
1737  | 
        end of the second sequence directly.  Instead only the "abcd" can
 | 
|
1738  | 
        match, and matches the leftmost "abcd" in the second sequence:
 | 
|
1739  | 
||
1740  | 
        >>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
 | 
|
1741  | 
        >>> s.find_longest_match(0, 5, 0, 9)
 | 
|
1742  | 
        (1, 0, 4)
 | 
|
1743  | 
||
1744  | 
        If no blocks match, return (alo, blo, 0).
 | 
|
1745  | 
||
1746  | 
        >>> s = SequenceMatcher(None, "ab", "c")
 | 
|
1747  | 
        >>> s.find_longest_match(0, 2, 0, 1)
 | 
|
1748  | 
        (0, 0, 0)
 | 
|
1749  | 
        """
 | 
|
1750  | 
||
1751  | 
        # CAUTION:  stripping common prefix or suffix would be incorrect.
 | 
|
1752  | 
        # E.g.,
 | 
|
1753  | 
        #    ab
 | 
|
1754  | 
        #    acab
 | 
|
1755  | 
        # Longest matching block is "ab", but if common prefix is
 | 
|
1756  | 
        # stripped, it's "a" (tied with "b").  UNIX(tm) diff does so
 | 
|
1757  | 
        # strip, so ends up claiming that ab is changed to acab by
 | 
|
1758  | 
        # inserting "ca" in the middle.  That's minimal but unintuitive:
 | 
|
1759  | 
        # "it's obvious" that someone inserted "ac" at the front.
 | 
|
1760  | 
        # Windiff ends up at the same place as diff, but by pairing up
 | 
|
1761  | 
        # the unique 'b's and then matching the first two 'a's.
 | 
|
1762  | 
||
1763  | 
a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk  | 
|
1764  | 
besti, bestj, bestsize = alo, blo, 0  | 
|
1765  | 
        # find longest junk-free match
 | 
|
1766  | 
        # during an iteration of the loop, j2len[j] = length of longest
 | 
|
1767  | 
        # junk-free match ending with a[i-1] and b[j]
 | 
|
1768  | 
j2len = {}  | 
|
1769  | 
        # nothing = []
 | 
|
1770  | 
b2jget = b2j.get  | 
|
1771  | 
for i in xrange(alo, ahi):  | 
|
1772  | 
            # look at all instances of a[i] in b; note that because
 | 
|
1773  | 
            # b2j has no junk keys, the loop is skipped if a[i] is junk
 | 
|
1774  | 
j2lenget = j2len.get  | 
|
1775  | 
newj2len = {}  | 
|
1776  | 
||
| 
1759.2.1
by Jelmer Vernooij
 Fix some types (found using aspell).  | 
1777  | 
            # changing b2j.get(a[i], nothing) to a try:KeyError pair produced the
 | 
| 
1596.2.35
by Robert Collins
 Subclass SequenceMatcher to get a slightly faster (in our case) find_longest_match routine.  | 
1778  | 
            # following improvement
 | 
1779  | 
            #     704  0   4650.5320   2620.7410   bzrlib.knit:1336(find_longest_match)
 | 
|
1780  | 
            # +326674  0   1655.1210   1655.1210   +<method 'get' of 'dict' objects>
 | 
|
1781  | 
            #  +76519  0    374.6700    374.6700   +<method 'has_key' of 'dict' objects>
 | 
|
1782  | 
            # to 
 | 
|
1783  | 
            #     704  0   3733.2820   2209.6520   bzrlib.knit:1336(find_longest_match)
 | 
|
1784  | 
            #  +211400 0   1147.3520   1147.3520   +<method 'get' of 'dict' objects>
 | 
|
1785  | 
            #  +76519  0    376.2780    376.2780   +<method 'has_key' of 'dict' objects>
 | 
|
1786  | 
||
1787  | 
try:  | 
|
1788  | 
js = b2j[a[i]]  | 
|
1789  | 
except KeyError:  | 
|
1790  | 
                pass
 | 
|
1791  | 
else:  | 
|
1792  | 
for j in js:  | 
|
1793  | 
                    # a[i] matches b[j]
 | 
|
1794  | 
if j >= blo:  | 
|
1795  | 
if j >= bhi:  | 
|
1796  | 
                            break
 | 
|
1797  | 
k = newj2len[j] = 1 + j2lenget(-1 + j, 0)  | 
|
1798  | 
if k > bestsize:  | 
|
1799  | 
besti, bestj, bestsize = 1 + i-k, 1 + j-k, k  | 
|
1800  | 
j2len = newj2len  | 
|
1801  | 
||
1802  | 
        # Extend the best by non-junk elements on each end.  In particular,
 | 
|
1803  | 
        # "popular" non-junk elements aren't in b2j, which greatly speeds
 | 
|
1804  | 
        # the inner loop above, but also means "the best" match so far
 | 
|
1805  | 
        # doesn't contain any junk *or* popular non-junk elements.
 | 
|
1806  | 
while besti > alo and bestj > blo and \  | 
|
1807  | 
not isbjunk(b[bestj-1]) and \  | 
|
1808  | 
a[besti-1] == b[bestj-1]:  | 
|
1809  | 
besti, bestj, bestsize = besti-1, bestj-1, bestsize+1  | 
|
1810  | 
while besti+bestsize < ahi and bestj+bestsize < bhi and \  | 
|
1811  | 
not isbjunk(b[bestj+bestsize]) and \  | 
|
1812  | 
a[besti+bestsize] == b[bestj+bestsize]:  | 
|
1813  | 
bestsize += 1  | 
|
1814  | 
||
1815  | 
        # Now that we have a wholly interesting match (albeit possibly
 | 
|
1816  | 
        # empty!), we may as well suck up the matching junk on each
 | 
|
1817  | 
        # side of it too.  Can't think of a good reason not to, and it
 | 
|
1818  | 
        # saves post-processing the (possibly considerable) expense of
 | 
|
1819  | 
        # figuring out what to do with it.  In the case of an empty
 | 
|
1820  | 
        # interesting match, this is clearly the right thing to do,
 | 
|
1821  | 
        # because no other kind of match is possible in the regions.
 | 
|
1822  | 
while besti > alo and bestj > blo and \  | 
|
1823  | 
isbjunk(b[bestj-1]) and \  | 
|
1824  | 
a[besti-1] == b[bestj-1]:  | 
|
1825  | 
besti, bestj, bestsize = besti-1, bestj-1, bestsize+1  | 
|
1826  | 
while besti+bestsize < ahi and bestj+bestsize < bhi and \  | 
|
1827  | 
isbjunk(b[bestj+bestsize]) and \  | 
|
1828  | 
a[besti+bestsize] == b[bestj+bestsize]:  | 
|
1829  | 
bestsize = bestsize + 1  | 
|
1830  | 
||
1831  | 
return besti, bestj, bestsize  | 
|
1832  |