bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
1  | 
# Copyright (C) 2005, 2006 by Canonical Ltd
 | 
2  | 
# Written by Martin Pool.
 | 
|
3  | 
# Modified by Johan Rydberg <jrydberg@gnu.org>
 | 
|
4  | 
# Modified by Robert Collins <robert.collins@canonical.com>
 | 
|
5  | 
#
 | 
|
6  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
7  | 
# it under the terms of the GNU General Public License as published by
 | 
|
8  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
9  | 
# (at your option) any later version.
 | 
|
10  | 
#
 | 
|
11  | 
# This program is distributed in the hope that it will be useful,
 | 
|
12  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
13  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
14  | 
# GNU General Public License for more details.
 | 
|
15  | 
#
 | 
|
16  | 
# You should have received a copy of the GNU General Public License
 | 
|
17  | 
# along with this program; if not, write to the Free Software
 | 
|
18  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
19  | 
||
20  | 
"""Knit versionedfile implementation.
 | 
|
21  | 
||
22  | 
A knit is a versioned file implementation that supports efficient append only
 | 
|
23  | 
updates.
 | 
|
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
24  | 
|
25  | 
Knit file layout:
 | 
|
26  | 
lifeless: the data file is made up of "delta records".  each delta record has a delta header 
 | 
|
27  | 
that contains; (1) a version id, (2) the size of the delta (in lines), and (3)  the digest of 
 | 
|
28  | 
the -expanded data- (ie, the delta applied to the parent).  the delta also ends with a 
 | 
|
29  | 
end-marker; simply "end VERSION"
 | 
|
30  | 
||
31  | 
delta can be line or full contents.a
 | 
|
32  | 
... the 8's there are the index number of the annotation.
 | 
|
33  | 
version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e
 | 
|
34  | 
59,59,3
 | 
|
35  | 
8
 | 
|
36  | 
8         if ie.executable:
 | 
|
37  | 
8             e.set('executable', 'yes')
 | 
|
38  | 
130,130,2
 | 
|
39  | 
8         if elt.get('executable') == 'yes':
 | 
|
40  | 
8             ie.executable = True
 | 
|
41  | 
end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 
 | 
|
42  | 
||
43  | 
||
44  | 
whats in an index:
 | 
|
45  | 
09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents
 | 
|
46  | 
09:33 < jrydberg> lifeless: the parents are currently dictionary compressed
 | 
|
47  | 
09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)
 | 
|
48  | 
09:33 < lifeless> right
 | 
|
49  | 
09:33 < jrydberg> lifeless: the position and size is the range in the data file
 | 
|
50  | 
||
51  | 
||
52  | 
so the index sequence is the dictionary compressed sequence number used
 | 
|
53  | 
in the deltas to provide line annotation
 | 
|
54  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
55  | 
"""
 | 
56  | 
||
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
57  | 
# TODOS:
 | 
58  | 
# 10:16 < lifeless> make partial index writes safe
 | 
|
59  | 
# 10:16 < lifeless> implement 'knit.check()' like weave.check()
 | 
|
60  | 
# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave 
 | 
|
61  | 
#                    always' approach.
 | 
|
62  | 
||
63  | 
||
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
64  | 
import difflib  | 
65  | 
from difflib import SequenceMatcher  | 
|
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
66  | 
from gzip import GzipFile  | 
67  | 
import os  | 
|
68  | 
from StringIO import StringIO  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
69  | 
|
70  | 
from bzrlib.errors import FileExists, NoSuchFile, KnitError, \  | 
|
71  | 
InvalidRevisionId, KnitCorrupt, KnitHeaderError, \  | 
|
72  | 
RevisionNotPresent, RevisionAlreadyPresent  | 
|
73  | 
from bzrlib.trace import mutter  | 
|
74  | 
from bzrlib.osutils import contains_whitespace, contains_linebreaks, \  | 
|
75  | 
     sha_strings
 | 
|
76  | 
from bzrlib.versionedfile import VersionedFile  | 
|
77  | 
from bzrlib.tsort import topo_sort  | 
|
78  | 
||
79  | 
||
80  | 
# TODO: Split out code specific to this format into an associated object.
 | 
|
81  | 
||
82  | 
# TODO: Can we put in some kind of value to check that the index and data
 | 
|
83  | 
# files belong together?
 | 
|
84  | 
||
85  | 
# TODO: accomodate binaries, perhaps by storing a byte count
 | 
|
86  | 
||
87  | 
# TODO: function to check whole file
 | 
|
88  | 
||
89  | 
# TODO: atomically append data, then measure backwards from the cursor
 | 
|
90  | 
# position after writing to work out where it was located.  we may need to
 | 
|
91  | 
# bypass python file buffering.
 | 
|
92  | 
||
93  | 
DATA_SUFFIX = '.knit'  | 
|
94  | 
INDEX_SUFFIX = '.kndx'  | 
|
95  | 
||
96  | 
||
97  | 
class KnitContent(object):  | 
|
98  | 
"""Content of a knit version to which deltas can be applied."""  | 
|
99  | 
||
100  | 
def __init__(self, lines):  | 
|
101  | 
self._lines = lines  | 
|
102  | 
||
103  | 
def annotate_iter(self):  | 
|
104  | 
"""Yield tuples of (origin, text) for each content line."""  | 
|
105  | 
for origin, text in self._lines:  | 
|
106  | 
yield origin, text  | 
|
107  | 
||
108  | 
def annotate(self):  | 
|
109  | 
"""Return a list of (origin, text) tuples."""  | 
|
110  | 
return list(self.annotate_iter())  | 
|
111  | 
||
112  | 
def apply_delta(self, delta):  | 
|
113  | 
"""Apply delta to this content."""  | 
|
114  | 
offset = 0  | 
|
115  | 
for start, end, count, lines in delta:  | 
|
116  | 
self._lines[offset+start:offset+end] = lines  | 
|
117  | 
offset = offset + (start - end) + count  | 
|
118  | 
||
119  | 
def line_delta_iter(self, new_lines):  | 
|
120  | 
"""Generate line-based delta from new_lines to this content."""  | 
|
121  | 
new_texts = [text for origin, text in new_lines._lines]  | 
|
122  | 
old_texts = [text for origin, text in self._lines]  | 
|
123  | 
s = difflib.SequenceMatcher(None, old_texts, new_texts)  | 
|
124  | 
for op in s.get_opcodes():  | 
|
125  | 
if op[0] == 'equal':  | 
|
126  | 
                continue
 | 
|
127  | 
yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])  | 
|
128  | 
||
129  | 
def line_delta(self, new_lines):  | 
|
130  | 
return list(self.line_delta_iter(new_lines))  | 
|
131  | 
||
132  | 
def text(self):  | 
|
133  | 
return [text for origin, text in self._lines]  | 
|
134  | 
||
135  | 
||
136  | 
class _KnitFactory(object):  | 
|
137  | 
"""Base factory for creating content objects."""  | 
|
138  | 
||
139  | 
def make(self, lines, version):  | 
|
140  | 
num_lines = len(lines)  | 
|
141  | 
return KnitContent(zip([version] * num_lines, lines))  | 
|
142  | 
||
143  | 
||
144  | 
class KnitAnnotateFactory(_KnitFactory):  | 
|
145  | 
"""Factory for creating annotated Content objects."""  | 
|
146  | 
||
147  | 
annotated = True  | 
|
148  | 
||
149  | 
def parse_fulltext(self, content, version):  | 
|
150  | 
lines = []  | 
|
151  | 
for line in content:  | 
|
152  | 
origin, text = line.split(' ', 1)  | 
|
153  | 
lines.append((int(origin), text))  | 
|
154  | 
return KnitContent(lines)  | 
|
155  | 
||
156  | 
def parse_line_delta_iter(self, lines):  | 
|
157  | 
while lines:  | 
|
158  | 
header = lines.pop(0)  | 
|
159  | 
start, end, c = [int(n) for n in header.split(',')]  | 
|
160  | 
contents = []  | 
|
161  | 
for i in range(c):  | 
|
162  | 
origin, text = lines.pop(0).split(' ', 1)  | 
|
163  | 
contents.append((int(origin), text))  | 
|
164  | 
yield start, end, c, contents  | 
|
165  | 
||
166  | 
def parse_line_delta(self, lines, version):  | 
|
167  | 
return list(self.parse_line_delta_iter(lines))  | 
|
168  | 
||
169  | 
def lower_fulltext(self, content):  | 
|
170  | 
return ['%d %s' % (o, t) for o, t in content._lines]  | 
|
171  | 
||
172  | 
def lower_line_delta(self, delta):  | 
|
173  | 
out = []  | 
|
174  | 
for start, end, c, lines in delta:  | 
|
175  | 
out.append('%d,%d,%d\n' % (start, end, c))  | 
|
176  | 
for origin, text in lines:  | 
|
177  | 
out.append('%d %s' % (origin, text))  | 
|
178  | 
return out  | 
|
179  | 
||
180  | 
||
181  | 
class KnitPlainFactory(_KnitFactory):  | 
|
182  | 
"""Factory for creating plain Content objects."""  | 
|
183  | 
||
184  | 
annotated = False  | 
|
185  | 
||
186  | 
def parse_fulltext(self, content, version):  | 
|
187  | 
return self.make(content, version)  | 
|
188  | 
||
189  | 
def parse_line_delta_iter(self, lines, version):  | 
|
190  | 
while lines:  | 
|
191  | 
header = lines.pop(0)  | 
|
192  | 
start, end, c = [int(n) for n in header.split(',')]  | 
|
193  | 
yield start, end, c, zip([version] * c, lines[:c])  | 
|
194  | 
del lines[:c]  | 
|
195  | 
||
196  | 
def parse_line_delta(self, lines, version):  | 
|
197  | 
return list(self.parse_line_delta_iter(lines, version))  | 
|
198  | 
||
199  | 
def lower_fulltext(self, content):  | 
|
200  | 
return content.text()  | 
|
201  | 
||
202  | 
def lower_line_delta(self, delta):  | 
|
203  | 
out = []  | 
|
204  | 
for start, end, c, lines in delta:  | 
|
205  | 
out.append('%d,%d,%d\n' % (start, end, c))  | 
|
206  | 
out.extend([text for origin, text in lines])  | 
|
207  | 
return out  | 
|
208  | 
||
209  | 
||
210  | 
def make_empty_knit(transport, relpath):  | 
|
211  | 
"""Construct a empty knit at the specified location."""  | 
|
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
212  | 
k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
213  | 
k._data._open_file()  | 
214  | 
||
215  | 
||
216  | 
class KnitVersionedFile(VersionedFile):  | 
|
217  | 
"""Weave-like structure with faster random access.  | 
|
218  | 
||
219  | 
    A knit stores a number of texts and a summary of the relationships
 | 
|
220  | 
    between them.  Texts are identified by a string version-id.  Texts
 | 
|
221  | 
    are normally stored and retrieved as a series of lines, but can
 | 
|
222  | 
    also be passed as single strings.
 | 
|
223  | 
||
224  | 
    Lines are stored with the trailing newline (if any) included, to
 | 
|
225  | 
    avoid special cases for files with no final newline.  Lines are
 | 
|
226  | 
    composed of 8-bit characters, not unicode.  The combination of
 | 
|
227  | 
    these approaches should mean any 'binary' file can be safely
 | 
|
228  | 
    stored and retrieved.
 | 
|
229  | 
    """
 | 
|
230  | 
||
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
231  | 
def __init__(self, transport, relpath, mode, factory,  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
232  | 
basis_knit=None, delta=True):  | 
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
233  | 
"""Construct a knit at location specified by relpath."""  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
234  | 
assert mode in ('r', 'w'), "invalid mode specified"  | 
235  | 
assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \  | 
|
236  | 
type(basis_knit)  | 
|
237  | 
||
238  | 
self.transport = transport  | 
|
239  | 
self.filename = relpath  | 
|
240  | 
self.basis_knit = basis_knit  | 
|
241  | 
self.factory = factory  | 
|
242  | 
self.writable = (mode == 'w')  | 
|
243  | 
self.delta = delta  | 
|
244  | 
||
245  | 
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,  | 
|
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
246  | 
mode)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
247  | 
self._data = _KnitData(transport, relpath + DATA_SUFFIX,  | 
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
248  | 
mode)  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
249  | 
|
250  | 
def versions(self):  | 
|
251  | 
"""See VersionedFile.versions."""  | 
|
252  | 
return self._index.get_versions()  | 
|
253  | 
||
254  | 
def has_version(self, version_id):  | 
|
255  | 
"""See VersionedFile.has_version."""  | 
|
256  | 
return self._index.has_version(version_id)  | 
|
257  | 
||
258  | 
__contains__ = has_version  | 
|
259  | 
||
260  | 
def _merge_annotations(self, content, parents):  | 
|
261  | 
"""Merge annotations for content. This is done by comparing  | 
|
262  | 
        the annotations based on changed to the text."""
 | 
|
263  | 
for parent_id in parents:  | 
|
264  | 
merge_content = self._get_content(parent_id)  | 
|
265  | 
seq = SequenceMatcher(None, merge_content.text(), content.text())  | 
|
266  | 
for i, j, n in seq.get_matching_blocks():  | 
|
267  | 
if n == 0:  | 
|
268  | 
                    continue
 | 
|
269  | 
content._lines[j:j+n] = merge_content._lines[i:i+n]  | 
|
270  | 
||
271  | 
def _get_components(self, version_id):  | 
|
272  | 
"""Return a list of (version_id, method, data) tuples that  | 
|
273  | 
        makes up version specified by version_id of the knit.
 | 
|
274  | 
||
275  | 
        The components should be applied in the order of the returned
 | 
|
276  | 
        list.
 | 
|
277  | 
||
278  | 
        The basis knit will be used to the largest extent possible
 | 
|
279  | 
        since it is assumed that accesses to it is faster.
 | 
|
280  | 
        """
 | 
|
281  | 
        # needed_revisions holds a list of (method, version_id) of
 | 
|
282  | 
        # versions that is needed to be fetched to construct the final
 | 
|
283  | 
        # version of the file.
 | 
|
284  | 
        #
 | 
|
285  | 
        # basis_revisions is a list of versions that needs to be
 | 
|
286  | 
        # fetched but exists in the basis knit.
 | 
|
287  | 
||
288  | 
basis = self.basis_knit  | 
|
289  | 
needed_versions = []  | 
|
290  | 
basis_versions = []  | 
|
291  | 
cursor = version_id  | 
|
292  | 
||
293  | 
while 1:  | 
|
294  | 
picked_knit = self  | 
|
295  | 
if basis and basis._index.has_version(cursor):  | 
|
296  | 
picked_knit = basis  | 
|
297  | 
basis_versions.append(cursor)  | 
|
298  | 
method = picked_knit._index.get_method(cursor)  | 
|
299  | 
needed_versions.append((method, cursor))  | 
|
300  | 
if method == 'fulltext':  | 
|
301  | 
                break
 | 
|
302  | 
cursor = picked_knit.get_parents(cursor)[0]  | 
|
303  | 
||
304  | 
components = {}  | 
|
305  | 
if basis_versions:  | 
|
306  | 
records = []  | 
|
307  | 
for comp_id in basis_versions:  | 
|
308  | 
data_pos, data_size = basis._index.get_data_position(comp_id)  | 
|
309  | 
records.append((piece_id, data_pos, data_size))  | 
|
310  | 
components.update(basis._data.read_records(records))  | 
|
311  | 
||
312  | 
records = []  | 
|
313  | 
for comp_id in [vid for method, vid in needed_versions  | 
|
314  | 
if vid not in basis_versions]:  | 
|
315  | 
data_pos, data_size = self._index.get_position(comp_id)  | 
|
316  | 
records.append((comp_id, data_pos, data_size))  | 
|
317  | 
components.update(self._data.read_records(records))  | 
|
318  | 
||
319  | 
        # get_data_records returns a mapping with the version id as
 | 
|
320  | 
        # index and the value as data.  The order the components need
 | 
|
321  | 
        # to be applied is held by needed_versions (reversed).
 | 
|
322  | 
out = []  | 
|
323  | 
for method, comp_id in reversed(needed_versions):  | 
|
324  | 
out.append((comp_id, method, components[comp_id]))  | 
|
325  | 
||
326  | 
return out  | 
|
327  | 
||
328  | 
def _get_content(self, version_id):  | 
|
329  | 
"""Returns a content object that makes up the specified  | 
|
330  | 
        version."""
 | 
|
331  | 
if not self.has_version(version_id):  | 
|
332  | 
raise RevisionNotPresent(version_id, self.filename)  | 
|
333  | 
||
334  | 
if self.basis_knit and version_id in self.basis_knit:  | 
|
335  | 
return self.basis_knit._get_content(version_id)  | 
|
336  | 
||
337  | 
content = None  | 
|
338  | 
components = self._get_components(version_id)  | 
|
339  | 
for component_id, method, (data, digest) in components:  | 
|
340  | 
version_idx = self._index.lookup(component_id)  | 
|
341  | 
if method == 'fulltext':  | 
|
342  | 
assert content is None  | 
|
343  | 
content = self.factory.parse_fulltext(data, version_idx)  | 
|
344  | 
elif method == 'line-delta':  | 
|
345  | 
delta = self.factory.parse_line_delta(data, version_idx)  | 
|
346  | 
content.apply_delta(delta)  | 
|
347  | 
||
348  | 
if 'no-eol' in self._index.get_options(version_id):  | 
|
349  | 
line = content._lines[-1][1].rstrip('\n')  | 
|
350  | 
content._lines[-1] = (content._lines[-1][0], line)  | 
|
351  | 
||
352  | 
if sha_strings(content.text()) != digest:  | 
|
353  | 
raise KnitCorrupt(self.filename, 'sha-1 does not match')  | 
|
354  | 
||
355  | 
return content  | 
|
356  | 
||
357  | 
def _check_versions_present(self, version_ids):  | 
|
358  | 
"""Check that all specified versions are present."""  | 
|
359  | 
version_ids = set(version_ids)  | 
|
360  | 
for r in list(version_ids):  | 
|
361  | 
if self._index.has_version(r):  | 
|
362  | 
version_ids.remove(r)  | 
|
363  | 
if version_ids:  | 
|
364  | 
raise RevisionNotPresent(list(version_ids)[0], self.filename)  | 
|
365  | 
||
366  | 
def add_lines(self, version_id, parents, lines):  | 
|
367  | 
"""See VersionedFile.add_lines."""  | 
|
368  | 
assert self.writable, "knit is not opened for write"  | 
|
369  | 
        ### FIXME escape. RBC 20060228
 | 
|
370  | 
if contains_whitespace(version_id):  | 
|
371  | 
raise InvalidRevisionId(version_id)  | 
|
372  | 
if self.has_version(version_id):  | 
|
373  | 
raise RevisionAlreadyPresent(version_id, self.filename)  | 
|
374  | 
||
375  | 
if True or __debug__:  | 
|
376  | 
for l in lines:  | 
|
377  | 
assert '\n' not in l[:-1]  | 
|
378  | 
||
379  | 
self._check_versions_present(parents)  | 
|
380  | 
return self._add(version_id, lines[:], parents, self.delta)  | 
|
381  | 
||
382  | 
def _add(self, version_id, lines, parents, delta):  | 
|
383  | 
"""Add a set of lines on top of version specified by parents.  | 
|
384  | 
||
385  | 
        If delta is true, compress the text as a line-delta against
 | 
|
386  | 
        the first parent.
 | 
|
387  | 
        """
 | 
|
388  | 
if delta and not parents:  | 
|
389  | 
delta = False  | 
|
390  | 
||
391  | 
digest = sha_strings(lines)  | 
|
392  | 
options = []  | 
|
393  | 
if lines:  | 
|
394  | 
if lines[-1][-1] != '\n':  | 
|
395  | 
options.append('no-eol')  | 
|
396  | 
lines[-1] = lines[-1] + '\n'  | 
|
397  | 
||
398  | 
lines = self.factory.make(lines, len(self._index))  | 
|
399  | 
if self.factory.annotated and len(parents) > 0:  | 
|
400  | 
            # Merge annotations from parent texts if so is needed.
 | 
|
401  | 
self._merge_annotations(lines, parents)  | 
|
402  | 
||
403  | 
if parents and delta:  | 
|
404  | 
            # To speed the extract of texts the delta chain is limited
 | 
|
405  | 
            # to a fixed number of deltas.  This should minimize both
 | 
|
406  | 
            # I/O and the time spend applying deltas.
 | 
|
407  | 
count = 0  | 
|
408  | 
delta_parents = parents  | 
|
409  | 
while count < 25:  | 
|
410  | 
parent = delta_parents[0]  | 
|
411  | 
method = self._index.get_method(parent)  | 
|
412  | 
if method == 'fulltext':  | 
|
413  | 
                    break
 | 
|
414  | 
delta_parents = self._index.get_parents(parent)  | 
|
415  | 
count = count + 1  | 
|
416  | 
if method == 'line-delta':  | 
|
417  | 
delta = False  | 
|
418  | 
||
419  | 
if delta:  | 
|
420  | 
options.append('line-delta')  | 
|
421  | 
content = self._get_content(parents[0])  | 
|
422  | 
delta_hunks = content.line_delta(lines)  | 
|
423  | 
store_lines = self.factory.lower_line_delta(delta_hunks)  | 
|
424  | 
else:  | 
|
425  | 
options.append('fulltext')  | 
|
426  | 
store_lines = self.factory.lower_fulltext(lines)  | 
|
427  | 
||
428  | 
where, size = self._data.add_record(version_id, digest, store_lines)  | 
|
429  | 
self._index.add_version(version_id, options, where, size, parents)  | 
|
430  | 
||
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
431  | 
def clone_text(self, new_version_id, old_version_id, parents):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
432  | 
"""See VersionedFile.clone_text()."""  | 
433  | 
        # FIXME RBC 20060228 make fast by only inserting an index with null delta.
 | 
|
434  | 
self.add_lines(new_version_id, parents, self.get_lines(old_version_id))  | 
|
435  | 
||
436  | 
def get_lines(self, version_id):  | 
|
437  | 
"""See VersionedFile.get_lines()."""  | 
|
438  | 
return self._get_content(version_id).text()  | 
|
439  | 
||
440  | 
def annotate_iter(self, version_id):  | 
|
441  | 
"""See VersionedFile.annotate_iter."""  | 
|
442  | 
content = self._get_content(version_id)  | 
|
443  | 
for origin, text in content.annotate_iter():  | 
|
444  | 
yield self._index.idx_to_name(origin), text  | 
|
445  | 
||
446  | 
def get_parents(self, version_id):  | 
|
447  | 
"""See VersionedFile.get_parents."""  | 
|
448  | 
self._check_versions_present([version_id])  | 
|
449  | 
return list(self._index.get_parents(version_id))  | 
|
450  | 
||
451  | 
def get_ancestry(self, versions):  | 
|
452  | 
"""See VersionedFile.get_ancestry."""  | 
|
453  | 
if isinstance(versions, basestring):  | 
|
454  | 
versions = [versions]  | 
|
455  | 
if not versions:  | 
|
456  | 
return []  | 
|
457  | 
self._check_versions_present(versions)  | 
|
458  | 
return self._index.get_ancestry(versions)  | 
|
459  | 
||
460  | 
def _reannotate_line_delta(self, other, lines, new_version_id,  | 
|
461  | 
new_version_idx):  | 
|
462  | 
"""Re-annotate line-delta and return new delta."""  | 
|
463  | 
new_delta = []  | 
|
464  | 
for start, end, count, contents \  | 
|
465  | 
in self.factory.parse_line_delta_iter(lines):  | 
|
466  | 
new_lines = []  | 
|
467  | 
for origin, line in contents:  | 
|
468  | 
old_version_id = other._index.idx_to_name(origin)  | 
|
469  | 
if old_version_id == new_version_id:  | 
|
470  | 
idx = new_version_idx  | 
|
471  | 
else:  | 
|
472  | 
idx = self._index.lookup(old_version_id)  | 
|
473  | 
new_lines.append((idx, line))  | 
|
474  | 
new_delta.append((start, end, count, new_lines))  | 
|
475  | 
||
476  | 
return self.factory.lower_line_delta(new_delta)  | 
|
477  | 
||
478  | 
def _reannotate_fulltext(self, other, lines, new_version_id,  | 
|
479  | 
new_version_idx):  | 
|
480  | 
"""Re-annotate fulltext and return new version."""  | 
|
481  | 
content = self.factory.parse_fulltext(lines, new_version_idx)  | 
|
482  | 
new_lines = []  | 
|
483  | 
for origin, line in content.annotate_iter():  | 
|
484  | 
old_version_id = other._index.idx_to_name(origin)  | 
|
485  | 
if old_version_id == new_version_id:  | 
|
486  | 
idx = new_version_idx  | 
|
487  | 
else:  | 
|
488  | 
idx = self._index.lookup(old_version_id)  | 
|
489  | 
new_lines.append((idx, line))  | 
|
490  | 
||
491  | 
return self.factory.lower_fulltext(KnitContent(new_lines))  | 
|
492  | 
||
493  | 
def join(self, other, pb=None, msg=None, version_ids=None):  | 
|
494  | 
"""See VersionedFile.join."""  | 
|
495  | 
assert isinstance(other, KnitVersionedFile)  | 
|
496  | 
||
497  | 
if version_ids is None:  | 
|
498  | 
version_ids = other.versions()  | 
|
499  | 
if not version_ids:  | 
|
500  | 
return 0  | 
|
501  | 
||
502  | 
if pb is None:  | 
|
503  | 
from bzrlib.progress import DummyProgress  | 
|
504  | 
pb = DummyProgress()  | 
|
505  | 
||
506  | 
version_ids = list(version_ids)  | 
|
507  | 
if None in version_ids:  | 
|
508  | 
version_ids.remove(None)  | 
|
509  | 
||
510  | 
other_ancestry = set(other.get_ancestry(version_ids))  | 
|
511  | 
needed_versions = other_ancestry - set(self._index.get_versions())  | 
|
512  | 
if not needed_versions:  | 
|
513  | 
return 0  | 
|
514  | 
full_list = topo_sort(other._index.get_graph())  | 
|
515  | 
||
516  | 
version_list = [i for i in full_list if (not self.has_version(i)  | 
|
517  | 
and i in needed_versions)]  | 
|
518  | 
||
519  | 
records = []  | 
|
520  | 
for version_id in version_list:  | 
|
521  | 
data_pos, data_size = other._index.get_position(version_id)  | 
|
522  | 
records.append((version_id, data_pos, data_size))  | 
|
523  | 
||
524  | 
count = 0  | 
|
525  | 
for version_id, lines, digest \  | 
|
526  | 
in other._data.read_records_iter(records):  | 
|
527  | 
options = other._index.get_options(version_id)  | 
|
528  | 
parents = other._index.get_parents(version_id)  | 
|
529  | 
||
530  | 
for parent in parents:  | 
|
531  | 
assert self.has_version(parent)  | 
|
532  | 
||
533  | 
if self.factory.annotated:  | 
|
534  | 
                # FIXME jrydberg: it should be possible to skip
 | 
|
535  | 
                # re-annotating components if we know that we are
 | 
|
536  | 
                # going to pull all revisions in the same order.
 | 
|
537  | 
new_version_id = version_id  | 
|
538  | 
new_version_idx = self._index.num_versions()  | 
|
539  | 
if 'fulltext' in options:  | 
|
540  | 
lines = self._reannotate_fulltext(other, lines,  | 
|
541  | 
new_version_id, new_version_idx)  | 
|
542  | 
elif 'line-delta' in options:  | 
|
543  | 
lines = self._reannotate_line_delta(other, lines,  | 
|
544  | 
new_version_id, new_version_idx)  | 
|
545  | 
||
546  | 
count = count + 1  | 
|
547  | 
pb.update(self.filename, count, len(version_list))  | 
|
548  | 
||
549  | 
pos, size = self._data.add_record(version_id, digest, lines)  | 
|
550  | 
self._index.add_version(version_id, options, pos, size, parents)  | 
|
551  | 
||
552  | 
pb.clear()  | 
|
553  | 
return count  | 
|
554  | 
||
555  | 
def walk(self, version_ids):  | 
|
556  | 
"""See VersionedFile.walk."""  | 
|
557  | 
        # We take the short path here, and extract all relevant texts
 | 
|
558  | 
        # and put them in a weave and let that do all the work.  Far
 | 
|
559  | 
        # from optimal, but is much simpler.
 | 
|
| 
1563.2.6
by Robert Collins
 Start check tests for knits (pending), and remove dead code.  | 
560  | 
        # FIXME RB 20060228 this really is inefficient!
 | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
561  | 
from bzrlib.weave import Weave  | 
562  | 
||
563  | 
w = Weave(self.filename)  | 
|
564  | 
ancestry = self.get_ancestry(version_ids)  | 
|
565  | 
sorted_graph = topo_sort(self._index.get_graph())  | 
|
566  | 
version_list = [vid for vid in sorted_graph if vid in ancestry]  | 
|
567  | 
||
568  | 
for version_id in version_list:  | 
|
569  | 
lines = self.get_lines(version_id)  | 
|
570  | 
w.add_lines(version_id, self.get_parents(version_id), lines)  | 
|
571  | 
||
572  | 
for lineno, insert_id, dset, line in w.walk(version_ids):  | 
|
573  | 
yield lineno, insert_id, dset, line  | 
|
574  | 
||
575  | 
||
576  | 
class _KnitComponentFile(object):  | 
|
577  | 
"""One of the files used to implement a knit database"""  | 
|
578  | 
||
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
579  | 
def __init__(self, transport, filename, mode):  | 
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
580  | 
self._transport = transport  | 
581  | 
self._filename = filename  | 
|
582  | 
self._mode = mode  | 
|
583  | 
||
584  | 
def write_header(self):  | 
|
585  | 
old_len = self._transport.append(self._filename, self.HEADER)  | 
|
586  | 
if old_len != 0:  | 
|
587  | 
raise KnitCorrupt(self._filename, 'misaligned after writing header')  | 
|
588  | 
||
589  | 
def check_header(self, fp):  | 
|
590  | 
line = fp.read(len(self.HEADER))  | 
|
591  | 
if line != self.HEADER:  | 
|
592  | 
raise KnitHeaderError(badline=line)  | 
|
593  | 
||
594  | 
def commit(self):  | 
|
595  | 
"""Commit is a nop."""  | 
|
596  | 
||
597  | 
def __repr__(self):  | 
|
598  | 
return '%s(%s)' % (self.__class__.__name__, self._filename)  | 
|
599  | 
||
600  | 
||
601  | 
class _KnitIndex(_KnitComponentFile):  | 
|
602  | 
"""Manages knit index file.  | 
|
603  | 
||
604  | 
    The index is already kept in memory and read on startup, to enable
 | 
|
605  | 
    fast lookups of revision information.  The cursor of the index
 | 
|
606  | 
    file is always pointing to the end, making it easy to append
 | 
|
607  | 
    entries.
 | 
|
608  | 
||
609  | 
    _cache is a cache for fast mapping from version id to a Index
 | 
|
610  | 
    object.
 | 
|
611  | 
||
612  | 
    _history is a cache for fast mapping from indexes to version ids.
 | 
|
613  | 
||
614  | 
    The index data format is dictionary compressed when it comes to
 | 
|
615  | 
    parent references; a index entry may only have parents that with a
 | 
|
616  | 
    lover index number.  As a result, the index is topological sorted.
 | 
|
617  | 
    """
 | 
|
618  | 
||
619  | 
HEADER = "# bzr knit index 7\n"  | 
|
620  | 
||
621  | 
def _cache_version(self, version_id, options, pos, size, parents):  | 
|
622  | 
val = (version_id, options, pos, size, parents)  | 
|
623  | 
self._cache[version_id] = val  | 
|
624  | 
self._history.append(version_id)  | 
|
625  | 
||
626  | 
def _iter_index(self, fp):  | 
|
627  | 
lines = fp.read()  | 
|
628  | 
for l in lines.splitlines(False):  | 
|
629  | 
yield l.split()  | 
|
630  | 
||
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
631  | 
def __init__(self, transport, filename, mode):  | 
632  | 
_KnitComponentFile.__init__(self, transport, filename, mode)  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
633  | 
self._cache = {}  | 
634  | 
self._history = []  | 
|
635  | 
try:  | 
|
636  | 
fp = self._transport.get(self._filename)  | 
|
637  | 
self.check_header(fp)  | 
|
638  | 
for rec in self._iter_index(fp):  | 
|
639  | 
self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),  | 
|
640  | 
[self._history[int(i)] for i in rec[4:]])  | 
|
641  | 
except NoSuchFile, e:  | 
|
642  | 
if mode != 'w':  | 
|
643  | 
raise e  | 
|
644  | 
self.write_header()  | 
|
645  | 
||
646  | 
def get_graph(self):  | 
|
647  | 
graph = []  | 
|
648  | 
for version_id, index in self._cache.iteritems():  | 
|
649  | 
graph.append((version_id, index[4]))  | 
|
650  | 
return graph  | 
|
651  | 
||
652  | 
def get_ancestry(self, versions):  | 
|
653  | 
"""See VersionedFile.get_ancestry."""  | 
|
654  | 
version_idxs = []  | 
|
655  | 
for version_id in versions:  | 
|
656  | 
version_idxs.append(self._history.index(version_id))  | 
|
657  | 
i = set(versions)  | 
|
658  | 
for v in xrange(max(version_idxs), 0, -1):  | 
|
659  | 
if self._history[v] in i:  | 
|
660  | 
                # include all its parents
 | 
|
661  | 
i.update(self._cache[self._history[v]][4])  | 
|
662  | 
return list(i)  | 
|
663  | 
||
664  | 
def num_versions(self):  | 
|
665  | 
return len(self._history)  | 
|
666  | 
||
667  | 
__len__ = num_versions  | 
|
668  | 
||
669  | 
def get_versions(self):  | 
|
670  | 
return self._history  | 
|
671  | 
||
672  | 
def idx_to_name(self, idx):  | 
|
673  | 
return self._history[idx]  | 
|
674  | 
||
675  | 
def lookup(self, version_id):  | 
|
676  | 
assert version_id in self._cache  | 
|
677  | 
return self._history.index(version_id)  | 
|
678  | 
||
679  | 
def add_version(self, version_id, options, pos, size, parents):  | 
|
680  | 
"""Add a version record to the index."""  | 
|
681  | 
self._cache_version(version_id, options, pos, size, parents)  | 
|
682  | 
||
683  | 
content = "%s %s %s %s %s\n" % (version_id,  | 
|
684  | 
','.join(options),  | 
|
685  | 
pos,  | 
|
686  | 
size,  | 
|
687  | 
' '.join([str(self.lookup(vid)) for  | 
|
688  | 
vid in parents]))  | 
|
689  | 
self._transport.append(self._filename, content)  | 
|
690  | 
||
691  | 
def has_version(self, version_id):  | 
|
692  | 
"""True if the version is in the index."""  | 
|
693  | 
return self._cache.has_key(version_id)  | 
|
694  | 
||
695  | 
def get_position(self, version_id):  | 
|
696  | 
"""Return data position and size of specified version."""  | 
|
697  | 
return (self._cache[version_id][2], \  | 
|
698  | 
self._cache[version_id][3])  | 
|
699  | 
||
700  | 
def get_method(self, version_id):  | 
|
701  | 
"""Return compression method of specified version."""  | 
|
702  | 
options = self._cache[version_id][1]  | 
|
703  | 
if 'fulltext' in options:  | 
|
704  | 
return 'fulltext'  | 
|
705  | 
else:  | 
|
706  | 
assert 'line-delta' in options  | 
|
707  | 
return 'line-delta'  | 
|
708  | 
||
709  | 
def get_options(self, version_id):  | 
|
710  | 
return self._cache[version_id][1]  | 
|
711  | 
||
712  | 
def get_parents(self, version_id):  | 
|
713  | 
"""Return parents of specified version."""  | 
|
714  | 
return self._cache[version_id][4]  | 
|
715  | 
||
716  | 
def check_versions_present(self, version_ids):  | 
|
717  | 
"""Check that all specified versions are present."""  | 
|
718  | 
version_ids = set(version_ids)  | 
|
719  | 
for version_id in list(version_ids):  | 
|
720  | 
if version_id in self._cache:  | 
|
721  | 
version_ids.remove(version_id)  | 
|
722  | 
if version_ids:  | 
|
723  | 
raise RevisionNotPresent(list(version_ids)[0], self.filename)  | 
|
724  | 
||
725  | 
||
726  | 
class _KnitData(_KnitComponentFile):  | 
|
727  | 
"""Contents of the knit data file"""  | 
|
728  | 
||
729  | 
HEADER = "# bzr knit data 7\n"  | 
|
730  | 
||
| 
1563.2.5
by Robert Collins
 Remove unused transaction references from knit.py and the versionedfile interface.  | 
731  | 
def __init__(self, transport, filename, mode):  | 
732  | 
_KnitComponentFile.__init__(self, transport, filename, mode)  | 
|
| 
1563.2.4
by Robert Collins
 First cut at including the knit implementation of versioned_file.  | 
733  | 
self._file = None  | 
734  | 
self._checked = False  | 
|
735  | 
||
736  | 
def _open_file(self):  | 
|
737  | 
if self._file is None:  | 
|
738  | 
try:  | 
|
739  | 
self._file = self._transport.get(self._filename)  | 
|
740  | 
except NoSuchFile:  | 
|
741  | 
                pass
 | 
|
742  | 
return self._file  | 
|
743  | 
||
744  | 
def add_record(self, version_id, digest, lines):  | 
|
745  | 
"""Write new text record to disk. Returns the position in the  | 
|
746  | 
        file where it was written."""
 | 
|
747  | 
sio = StringIO()  | 
|
748  | 
data_file = GzipFile(None, mode='wb', fileobj=sio)  | 
|
749  | 
print >>data_file, "version %s %d %s" % (version_id, len(lines), digest)  | 
|
750  | 
data_file.writelines(lines)  | 
|
751  | 
print >>data_file, "end %s\n" % version_id  | 
|
752  | 
data_file.close()  | 
|
753  | 
||
754  | 
content = sio.getvalue()  | 
|
755  | 
start_pos = self._transport.append(self._filename, content)  | 
|
756  | 
return start_pos, len(content)  | 
|
757  | 
||
758  | 
def _parse_record(self, version_id, data):  | 
|
759  | 
df = GzipFile(mode='rb', fileobj=StringIO(data))  | 
|
760  | 
rec = df.readline().split()  | 
|
761  | 
if len(rec) != 4:  | 
|
762  | 
raise KnitCorrupt(self._filename, 'unexpected number of records')  | 
|
763  | 
if rec[1] != version_id:  | 
|
764  | 
raise KnitCorrupt(self.file.name,  | 
|
765  | 
'unexpected version, wanted %r' % version_id)  | 
|
766  | 
lines = int(rec[2])  | 
|
767  | 
record_contents = self._read_record_contents(df, lines)  | 
|
768  | 
l = df.readline()  | 
|
769  | 
if l != 'end %s\n' % version_id:  | 
|
770  | 
raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'  | 
|
771  | 
% (l, version_id))  | 
|
772  | 
return record_contents, rec[3]  | 
|
773  | 
||
774  | 
def _read_record_contents(self, df, record_lines):  | 
|
775  | 
"""Read and return n lines from datafile."""  | 
|
776  | 
r = []  | 
|
777  | 
for i in range(record_lines):  | 
|
778  | 
r.append(df.readline())  | 
|
779  | 
return r  | 
|
780  | 
||
781  | 
def read_records_iter(self, records):  | 
|
782  | 
"""Read text records from data file and yield result.  | 
|
783  | 
||
784  | 
        Each passed record is a tuple of (version_id, pos, len) and
 | 
|
785  | 
        will be read in the given order.  Yields (version_id,
 | 
|
786  | 
        contents, digest).
 | 
|
787  | 
        """
 | 
|
788  | 
||
789  | 
class ContinuousRange:  | 
|
790  | 
def __init__(self, rec_id, pos, size):  | 
|
791  | 
self.start_pos = pos  | 
|
792  | 
self.end_pos = pos + size  | 
|
793  | 
self.versions = [(rec_id, pos, size)]  | 
|
794  | 
||
795  | 
def add(self, rec_id, pos, size):  | 
|
796  | 
if self.end_pos != pos:  | 
|
797  | 
return False  | 
|
798  | 
self.end_pos = pos + size  | 
|
799  | 
self.versions.append((rec_id, pos, size))  | 
|
800  | 
return True  | 
|
801  | 
||
802  | 
def split(self, fp):  | 
|
803  | 
for rec_id, pos, size in self.versions:  | 
|
804  | 
yield rec_id, fp.read(size)  | 
|
805  | 
||
806  | 
fp = self._open_file()  | 
|
807  | 
||
808  | 
        # Loop through all records and try to collect as large
 | 
|
809  | 
        # continuous region as possible to read.
 | 
|
810  | 
while records:  | 
|
811  | 
record_id, pos, size = records.pop(0)  | 
|
812  | 
continuous_range = ContinuousRange(record_id, pos, size)  | 
|
813  | 
while records:  | 
|
814  | 
record_id, pos, size = records[0]  | 
|
815  | 
if continuous_range.add(record_id, pos, size):  | 
|
816  | 
del records[0]  | 
|
817  | 
else:  | 
|
818  | 
                    break
 | 
|
819  | 
fp.seek(continuous_range.start_pos, 0)  | 
|
820  | 
for record_id, data in continuous_range.split(fp):  | 
|
821  | 
content, digest = self._parse_record(record_id, data)  | 
|
822  | 
yield record_id, content, digest  | 
|
823  | 
||
824  | 
self._file = None  | 
|
825  | 
||
826  | 
def read_records(self, records):  | 
|
827  | 
"""Read records into a dictionary."""  | 
|
828  | 
components = {}  | 
|
829  | 
for record_id, content, digest in self.read_records_iter(records):  | 
|
830  | 
components[record_id] = (content, digest)  | 
|
831  | 
return components  | 
|
832  |