1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Knit versionedfile implementation.
19
A knit is a versioned file implementation that supports efficient append only
23
lifeless: the data file is made up of "delta records". each delta record has a delta header
24
that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of
25
the -expanded data- (ie, the delta applied to the parent). the delta also ends with a
26
end-marker; simply "end VERSION"
28
delta can be line or full contents.a
29
... the 8's there are the index number of the annotation.
30
version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e
34
8 e.set('executable', 'yes')
36
8 if elt.get('executable') == 'yes':
37
8 ie.executable = True
38
end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad
42
09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents
43
09:33 < jrydberg> lifeless: the parents are currently dictionary compressed
44
09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)
45
09:33 < lifeless> right
46
09:33 < jrydberg> lifeless: the position and size is the range in the data file
49
so the index sequence is the dictionary compressed sequence number used
50
in the deltas to provide line annotation
55
# 10:16 < lifeless> make partial index writes safe
56
# 10:16 < lifeless> implement 'knit.check()' like weave.check()
57
# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave
59
# move sha1 out of the content so that join is faster at verifying parents
60
# record content length ?
64
from cStringIO import StringIO
66
from itertools import izip, chain
73
from bzrlib.lazy_import import lazy_import
74
lazy_import(globals(), """
91
from bzrlib.errors import (
99
RevisionAlreadyPresent,
101
from bzrlib.tuned_gzip import GzipFile
102
from bzrlib.osutils import (
107
from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed
108
from bzrlib.tsort import topo_sort
111
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
114
# TODO: Split out code specific to this format into an associated object.
116
# TODO: Can we put in some kind of value to check that the index and data
117
# files belong together?
119
# TODO: accommodate binaries, perhaps by storing a byte count
121
# TODO: function to check whole file
123
# TODO: atomically append data, then measure backwards from the cursor
124
# position after writing to work out where it was located. we may need to
125
# bypass python file buffering.
127
DATA_SUFFIX = '.knit'
128
INDEX_SUFFIX = '.kndx'
131
class KnitContent(object):
132
"""Content of a knit version to which deltas can be applied."""
134
def __init__(self, lines):
137
def annotate_iter(self):
138
"""Yield tuples of (origin, text) for each content line."""
139
return iter(self._lines)
142
"""Return a list of (origin, text) tuples."""
143
return list(self.annotate_iter())
145
def line_delta_iter(self, new_lines):
146
"""Generate line-based delta from this content to new_lines."""
147
new_texts = new_lines.text()
148
old_texts = self.text()
149
s = KnitSequenceMatcher(None, old_texts, new_texts)
150
for tag, i1, i2, j1, j2 in s.get_opcodes():
153
# ofrom, oto, length, data
154
yield i1, i2, j2 - j1, new_lines._lines[j1:j2]
156
def line_delta(self, new_lines):
157
return list(self.line_delta_iter(new_lines))
160
return [text for origin, text in self._lines]
163
return KnitContent(self._lines[:])
166
def get_line_delta_blocks(knit_delta, source, target):
167
"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""
168
target_len = len(target)
171
for s_begin, s_end, t_len, new_text in knit_delta:
172
true_n = s_begin - s_pos
175
# knit deltas do not provide reliable info about whether the
176
# last line of a file matches, due to eol handling.
177
if source[s_pos + n -1] != target[t_pos + n -1]:
180
yield s_pos, t_pos, n
181
t_pos += t_len + true_n
183
n = target_len - t_pos
185
if source[s_pos + n -1] != target[t_pos + n -1]:
188
yield s_pos, t_pos, n
189
yield s_pos + (target_len - t_pos), target_len, 0
192
class _KnitFactory(object):
193
"""Base factory for creating content objects."""
195
def make(self, lines, version_id):
196
num_lines = len(lines)
197
return KnitContent(zip([version_id] * num_lines, lines))
200
class KnitAnnotateFactory(_KnitFactory):
201
"""Factory for creating annotated Content objects."""
205
def parse_fulltext(self, content, version_id):
206
"""Convert fulltext to internal representation
208
fulltext content is of the format
209
revid(utf8) plaintext\n
210
internal representation is of the format:
213
# TODO: jam 20070209 The tests expect this to be returned as tuples,
214
# but the code itself doesn't really depend on that.
215
# Figure out a way to not require the overhead of turning the
216
# list back into tuples.
217
lines = [tuple(line.split(' ', 1)) for line in content]
218
return KnitContent(lines)
220
def parse_line_delta_iter(self, lines):
221
return iter(self.parse_line_delta(lines))
223
def parse_line_delta(self, lines, version_id):
224
"""Convert a line based delta into internal representation.
226
line delta is in the form of:
227
intstart intend intcount
229
revid(utf8) newline\n
230
internal representation is
231
(start, end, count, [1..count tuples (revid, newline)])
238
def cache_and_return(line):
239
origin, text = line.split(' ', 1)
240
return cache.setdefault(origin, origin), text
242
# walk through the lines parsing.
244
start, end, count = [int(n) for n in header.split(',')]
245
contents = [tuple(next().split(' ', 1)) for i in xrange(count)]
246
result.append((start, end, count, contents))
249
def get_fulltext_content(self, lines):
250
"""Extract just the content lines from a fulltext."""
251
return (line.split(' ', 1)[1] for line in lines)
253
def get_linedelta_content(self, lines):
254
"""Extract just the content from a line delta.
256
This doesn't return all of the extra information stored in a delta.
257
Only the actual content lines.
262
header = header.split(',')
263
count = int(header[2])
264
for i in xrange(count):
265
origin, text = next().split(' ', 1)
268
def lower_fulltext(self, content):
269
"""convert a fulltext content record into a serializable form.
271
see parse_fulltext which this inverts.
273
# TODO: jam 20070209 We only do the caching thing to make sure that
274
# the origin is a valid utf-8 line, eventually we could remove it
275
return ['%s %s' % (o, t) for o, t in content._lines]
277
def lower_line_delta(self, delta):
278
"""convert a delta into a serializable form.
280
See parse_line_delta which this inverts.
282
# TODO: jam 20070209 We only do the caching thing to make sure that
283
# the origin is a valid utf-8 line, eventually we could remove it
285
for start, end, c, lines in delta:
286
out.append('%d,%d,%d\n' % (start, end, c))
287
out.extend(origin + ' ' + text
288
for origin, text in lines)
292
class KnitPlainFactory(_KnitFactory):
293
"""Factory for creating plain Content objects."""
297
def parse_fulltext(self, content, version_id):
298
"""This parses an unannotated fulltext.
300
Note that this is not a noop - the internal representation
301
has (versionid, line) - its just a constant versionid.
303
return self.make(content, version_id)
305
def parse_line_delta_iter(self, lines, version_id):
307
num_lines = len(lines)
308
while cur < num_lines:
311
start, end, c = [int(n) for n in header.split(',')]
312
yield start, end, c, zip([version_id] * c, lines[cur:cur+c])
315
def parse_line_delta(self, lines, version_id):
316
return list(self.parse_line_delta_iter(lines, version_id))
318
def get_fulltext_content(self, lines):
319
"""Extract just the content lines from a fulltext."""
322
def get_linedelta_content(self, lines):
323
"""Extract just the content from a line delta.
325
This doesn't return all of the extra information stored in a delta.
326
Only the actual content lines.
331
header = header.split(',')
332
count = int(header[2])
333
for i in xrange(count):
336
def lower_fulltext(self, content):
337
return content.text()
339
def lower_line_delta(self, delta):
341
for start, end, c, lines in delta:
342
out.append('%d,%d,%d\n' % (start, end, c))
343
out.extend([text for origin, text in lines])
347
def make_empty_knit(transport, relpath):
348
"""Construct a empty knit at the specified location."""
349
k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)
352
class KnitVersionedFile(VersionedFile):
353
"""Weave-like structure with faster random access.
355
A knit stores a number of texts and a summary of the relationships
356
between them. Texts are identified by a string version-id. Texts
357
are normally stored and retrieved as a series of lines, but can
358
also be passed as single strings.
360
Lines are stored with the trailing newline (if any) included, to
361
avoid special cases for files with no final newline. Lines are
362
composed of 8-bit characters, not unicode. The combination of
363
these approaches should mean any 'binary' file can be safely
364
stored and retrieved.
367
def __init__(self, relpath, transport, file_mode=None, access_mode=None,
368
factory=None, basis_knit=DEPRECATED_PARAMETER, delta=True,
369
create=False, create_parent_dir=False, delay_create=False,
370
dir_mode=None, index=None, access_method=None):
371
"""Construct a knit at location specified by relpath.
373
:param create: If not True, only open an existing knit.
374
:param create_parent_dir: If True, create the parent directory if
375
creating the file fails. (This is used for stores with
376
hash-prefixes that may not exist yet)
377
:param delay_create: The calling code is aware that the knit won't
378
actually be created until the first data is stored.
379
:param index: An index to use for the knit.
381
if deprecated_passed(basis_knit):
382
warnings.warn("KnitVersionedFile.__(): The basis_knit parameter is"
383
" deprecated as of bzr 0.9.",
384
DeprecationWarning, stacklevel=2)
385
if access_mode is None:
387
super(KnitVersionedFile, self).__init__(access_mode)
388
assert access_mode in ('r', 'w'), "invalid mode specified %r" % access_mode
389
self.transport = transport
390
self.filename = relpath
391
self.factory = factory or KnitAnnotateFactory()
392
self.writable = (access_mode == 'w')
395
self._max_delta_chain = 200
398
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
399
access_mode, create=create, file_mode=file_mode,
400
create_parent_dir=create_parent_dir, delay_create=delay_create,
404
if access_method is None:
405
_access = _KnitAccess(transport, relpath + DATA_SUFFIX, file_mode, dir_mode,
406
((create and not len(self)) and delay_create), create_parent_dir)
408
_access = access_method
409
if create and not len(self) and not delay_create:
411
self._data = _KnitData(_access)
414
return '%s(%s)' % (self.__class__.__name__,
415
self.transport.abspath(self.filename))
417
def _check_should_delta(self, first_parents):
418
"""Iterate back through the parent listing, looking for a fulltext.
420
This is used when we want to decide whether to add a delta or a new
421
fulltext. It searches for _max_delta_chain parents. When it finds a
422
fulltext parent, it sees if the total size of the deltas leading up to
423
it is large enough to indicate that we want a new full text anyway.
425
Return True if we should create a new delta, False if we should use a
430
delta_parents = first_parents
431
for count in xrange(self._max_delta_chain):
432
parent = delta_parents[0]
433
method = self._index.get_method(parent)
434
index, pos, size = self._index.get_position(parent)
435
if method == 'fulltext':
439
delta_parents = self._index.get_parents(parent)
441
# We couldn't find a fulltext, so we must create a new one
444
return fulltext_size > delta_size
446
def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):
447
"""See VersionedFile._add_delta()."""
448
self._check_add(version_id, []) # should we check the lines ?
449
self._check_versions_present(parents)
453
for parent in parents:
454
if not self.has_version(parent):
455
ghosts.append(parent)
457
present_parents.append(parent)
459
if delta_parent is None:
460
# reconstitute as full text.
461
assert len(delta) == 1 or len(delta) == 0
463
assert delta[0][0] == 0
464
assert delta[0][1] == 0, delta[0][1]
465
return super(KnitVersionedFile, self)._add_delta(version_id,
476
options.append('no-eol')
478
if delta_parent is not None:
479
# determine the current delta chain length.
480
# To speed the extract of texts the delta chain is limited
481
# to a fixed number of deltas. This should minimize both
482
# I/O and the time spend applying deltas.
483
# The window was changed to a maximum of 200 deltas, but also added
484
# was a check that the total compressed size of the deltas is
485
# smaller than the compressed size of the fulltext.
486
if not self._check_should_delta([delta_parent]):
487
# We don't want a delta here, just do a normal insertion.
488
return super(KnitVersionedFile, self)._add_delta(version_id,
495
options.append('line-delta')
496
store_lines = self.factory.lower_line_delta(delta)
498
access_memo = self._data.add_record(version_id, digest, store_lines)
499
self._index.add_version(version_id, options, access_memo, parents)
501
def _add_raw_records(self, records, data):
502
"""Add all the records 'records' with data pre-joined in 'data'.
504
:param records: A list of tuples(version_id, options, parents, size).
505
:param data: The data for the records. When it is written, the records
506
are adjusted to have pos pointing into data by the sum of
507
the preceding records sizes.
510
raw_record_sizes = [record[3] for record in records]
511
positions = self._data.add_raw_records(raw_record_sizes, data)
514
for (version_id, options, parents, size), access_memo in zip(
516
index_entries.append((version_id, options, access_memo, parents))
517
if self._data._do_cache:
518
self._data._cache[version_id] = data[offset:offset+size]
520
self._index.add_versions(index_entries)
522
def enable_cache(self):
523
"""Start caching data for this knit"""
524
self._data.enable_cache()
526
def clear_cache(self):
527
"""Clear the data cache only."""
528
self._data.clear_cache()
530
def copy_to(self, name, transport):
531
"""See VersionedFile.copy_to()."""
532
# copy the current index to a temp index to avoid racing with local
534
transport.put_file_non_atomic(name + INDEX_SUFFIX + '.tmp',
535
self.transport.get(self._index._filename))
537
f = self._data._open_file()
539
transport.put_file(name + DATA_SUFFIX, f)
542
# move the copied index into place
543
transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)
545
def create_empty(self, name, transport, mode=None):
546
return KnitVersionedFile(name, transport, factory=self.factory,
547
delta=self.delta, create=True)
549
def _fix_parents(self, version_id, new_parents):
550
"""Fix the parents list for version.
552
This is done by appending a new version to the index
553
with identical data except for the parents list.
554
the parents list must be a superset of the current
557
current_values = self._index._cache[version_id]
558
assert set(current_values[4]).difference(set(new_parents)) == set()
559
self._index.add_version(version_id,
561
(None, current_values[2], current_values[3]),
564
def _extract_blocks(self, version_id, source, target):
565
if self._index.get_method(version_id) != 'line-delta':
567
parent, sha1, noeol, delta = self.get_delta(version_id)
568
return KnitContent.get_line_delta_blocks(delta, source, target)
570
def get_delta(self, version_id):
571
"""Get a delta for constructing version from some other version."""
572
version_id = osutils.safe_revision_id(version_id)
573
self.check_not_reserved_id(version_id)
574
if not self.has_version(version_id):
575
raise RevisionNotPresent(version_id, self.filename)
577
parents = self.get_parents(version_id)
582
index_memo = self._index.get_position(version_id)
583
data, sha1 = self._data.read_records(((version_id, index_memo),))[version_id]
584
noeol = 'no-eol' in self._index.get_options(version_id)
585
if 'fulltext' == self._index.get_method(version_id):
586
new_content = self.factory.parse_fulltext(data, version_id)
587
if parent is not None:
588
reference_content = self._get_content(parent)
589
old_texts = reference_content.text()
592
new_texts = new_content.text()
593
delta_seq = KnitSequenceMatcher(None, old_texts, new_texts)
594
return parent, sha1, noeol, self._make_line_delta(delta_seq, new_content)
596
delta = self.factory.parse_line_delta(data, version_id)
597
return parent, sha1, noeol, delta
599
def get_graph_with_ghosts(self):
600
"""See VersionedFile.get_graph_with_ghosts()."""
601
graph_items = self._index.get_graph()
602
return dict(graph_items)
604
def get_sha1(self, version_id):
605
return self.get_sha1s([version_id])[0]
607
def get_sha1s(self, version_ids):
608
"""See VersionedFile.get_sha1()."""
609
version_ids = [osutils.safe_revision_id(v) for v in version_ids]
610
record_map = self._get_record_map(version_ids)
611
# record entry 2 is the 'digest'.
612
return [record_map[v][2] for v in version_ids]
616
"""See VersionedFile.get_suffixes()."""
617
return [DATA_SUFFIX, INDEX_SUFFIX]
619
def has_ghost(self, version_id):
620
"""True if there is a ghost reference in the file to version_id."""
621
version_id = osutils.safe_revision_id(version_id)
623
if self.has_version(version_id):
625
# optimisable if needed by memoising the _ghosts set.
626
items = self._index.get_graph()
627
for node, parents in items:
628
for parent in parents:
629
if parent not in self._index._cache:
630
if parent == version_id:
635
"""See VersionedFile.versions."""
636
if 'evil' in debug.debug_flags:
637
trace.mutter_callsite(2, "versions scales with size of history")
638
return self._index.get_versions()
640
def has_version(self, version_id):
641
"""See VersionedFile.has_version."""
642
if 'evil' in debug.debug_flags:
643
trace.mutter_callsite(2, "has_version is a LBYL scenario")
644
version_id = osutils.safe_revision_id(version_id)
645
return self._index.has_version(version_id)
647
__contains__ = has_version
649
def _merge_annotations(self, content, parents, parent_texts={},
650
delta=None, annotated=None,
651
left_matching_blocks=None):
652
"""Merge annotations for content. This is done by comparing
653
the annotations based on changed to the text.
655
if left_matching_blocks is not None:
656
delta_seq = diff._PrematchedMatcher(left_matching_blocks)
660
for parent_id in parents:
661
merge_content = self._get_content(parent_id, parent_texts)
662
if (parent_id == parents[0] and delta_seq is not None):
665
seq = patiencediff.PatienceSequenceMatcher(
666
None, merge_content.text(), content.text())
667
for i, j, n in seq.get_matching_blocks():
670
# this appears to copy (origin, text) pairs across to the
671
# new content for any line that matches the last-checked
673
content._lines[j:j+n] = merge_content._lines[i:i+n]
675
if delta_seq is None:
676
reference_content = self._get_content(parents[0], parent_texts)
677
new_texts = content.text()
678
old_texts = reference_content.text()
679
delta_seq = patiencediff.PatienceSequenceMatcher(
680
None, old_texts, new_texts)
681
return self._make_line_delta(delta_seq, content)
683
def _make_line_delta(self, delta_seq, new_content):
684
"""Generate a line delta from delta_seq and new_content."""
686
for op in delta_seq.get_opcodes():
689
diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))
692
def _get_components_positions(self, version_ids):
693
"""Produce a map of position data for the components of versions.
695
This data is intended to be used for retrieving the knit records.
697
A dict of version_id to (method, data_pos, data_size, next) is
699
method is the way referenced data should be applied.
700
data_pos is the position of the data in the knit.
701
data_size is the size of the data in the knit.
702
next is the build-parent of the version, or None for fulltexts.
705
for version_id in version_ids:
708
while cursor is not None and cursor not in component_data:
709
method = self._index.get_method(cursor)
710
if method == 'fulltext':
713
next = self.get_parents(cursor)[0]
714
index_memo = self._index.get_position(cursor)
715
component_data[cursor] = (method, index_memo, next)
717
return component_data
719
def _get_content(self, version_id, parent_texts={}):
720
"""Returns a content object that makes up the specified
722
if not self.has_version(version_id):
723
raise RevisionNotPresent(version_id, self.filename)
725
cached_version = parent_texts.get(version_id, None)
726
if cached_version is not None:
727
return cached_version
729
text_map, contents_map = self._get_content_maps([version_id])
730
return contents_map[version_id]
732
def _check_versions_present(self, version_ids):
733
"""Check that all specified versions are present."""
734
self._index.check_versions_present(version_ids)
736
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):
737
"""See VersionedFile.add_lines_with_ghosts()."""
738
self._check_add(version_id, lines)
739
return self._add(version_id, lines[:], parents, self.delta, parent_texts)
741
def _add_lines(self, version_id, parents, lines, parent_texts,
742
left_matching_blocks=None):
743
"""See VersionedFile.add_lines."""
744
self._check_add(version_id, lines)
745
self._check_versions_present(parents)
746
return self._add(version_id, lines[:], parents, self.delta,
747
parent_texts, left_matching_blocks)
749
def _check_add(self, version_id, lines):
750
"""check that version_id and lines are safe to add."""
751
assert self.writable, "knit is not opened for write"
752
### FIXME escape. RBC 20060228
753
if contains_whitespace(version_id):
754
raise InvalidRevisionId(version_id, self.filename)
755
self.check_not_reserved_id(version_id)
756
if self.has_version(version_id):
757
raise RevisionAlreadyPresent(version_id, self.filename)
758
self._check_lines_not_unicode(lines)
759
self._check_lines_are_lines(lines)
761
def _add(self, version_id, lines, parents, delta, parent_texts,
762
left_matching_blocks=None):
763
"""Add a set of lines on top of version specified by parents.
765
If delta is true, compress the text as a line-delta against
768
Any versions not present will be converted into ghosts.
770
# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)
771
# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)
772
# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)
773
# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)
774
# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)
775
# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)
776
# +1383 0 8.0370 8.0370 +<len>
777
# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)
778
# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)
779
# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)
780
# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)
784
if parent_texts is None:
786
for parent in parents:
787
if not self.has_version(parent):
788
ghosts.append(parent)
790
present_parents.append(parent)
792
if delta and not len(present_parents):
795
digest = sha_strings(lines)
798
if lines[-1][-1] != '\n':
799
options.append('no-eol')
800
lines[-1] = lines[-1] + '\n'
802
if len(present_parents) and delta:
803
# To speed the extract of texts the delta chain is limited
804
# to a fixed number of deltas. This should minimize both
805
# I/O and the time spend applying deltas.
806
delta = self._check_should_delta(present_parents)
808
assert isinstance(version_id, str)
809
lines = self.factory.make(lines, version_id)
810
if delta or (self.factory.annotated and len(present_parents) > 0):
811
# Merge annotations from parent texts if so is needed.
812
delta_hunks = self._merge_annotations(lines, present_parents,
813
parent_texts, delta, self.factory.annotated,
814
left_matching_blocks)
817
options.append('line-delta')
818
store_lines = self.factory.lower_line_delta(delta_hunks)
820
options.append('fulltext')
821
store_lines = self.factory.lower_fulltext(lines)
823
access_memo = self._data.add_record(version_id, digest, store_lines)
824
self._index.add_version(version_id, options, access_memo, parents)
827
def check(self, progress_bar=None):
828
"""See VersionedFile.check()."""
830
def _clone_text(self, new_version_id, old_version_id, parents):
831
"""See VersionedFile.clone_text()."""
832
# FIXME RBC 20060228 make fast by only inserting an index with null
834
self.add_lines(new_version_id, parents, self.get_lines(old_version_id))
836
def get_lines(self, version_id):
837
"""See VersionedFile.get_lines()."""
838
return self.get_line_list([version_id])[0]
840
def _get_record_map(self, version_ids):
841
"""Produce a dictionary of knit records.
843
The keys are version_ids, the values are tuples of (method, content,
845
method is the way the content should be applied.
846
content is a KnitContent object.
847
digest is the SHA1 digest of this version id after all steps are done
848
next is the build-parent of the version, i.e. the leftmost ancestor.
849
If the method is fulltext, next will be None.
851
position_map = self._get_components_positions(version_ids)
852
# c = component_id, m = method, i_m = index_memo, n = next
853
records = [(c, i_m) for c, (m, i_m, n) in position_map.iteritems()]
855
for component_id, content, digest in \
856
self._data.read_records_iter(records):
857
method, index_memo, next = position_map[component_id]
858
record_map[component_id] = method, content, digest, next
862
def get_text(self, version_id):
863
"""See VersionedFile.get_text"""
864
return self.get_texts([version_id])[0]
866
def get_texts(self, version_ids):
867
return [''.join(l) for l in self.get_line_list(version_ids)]
869
def get_line_list(self, version_ids):
870
"""Return the texts of listed versions as a list of strings."""
871
version_ids = [osutils.safe_revision_id(v) for v in version_ids]
872
for version_id in version_ids:
873
self.check_not_reserved_id(version_id)
874
text_map, content_map = self._get_content_maps(version_ids)
875
return [text_map[v] for v in version_ids]
877
_get_lf_split_line_list = get_line_list
879
def _get_content_maps(self, version_ids):
880
"""Produce maps of text and KnitContents
882
:return: (text_map, content_map) where text_map contains the texts for
883
the requested versions and content_map contains the KnitContents.
884
Both dicts take version_ids as their keys.
886
for version_id in version_ids:
887
if not self.has_version(version_id):
888
raise RevisionNotPresent(version_id, self.filename)
889
record_map = self._get_record_map(version_ids)
894
for version_id in version_ids:
897
while cursor is not None:
898
method, data, digest, next = record_map[cursor]
899
components.append((cursor, method, data, digest))
900
if cursor in content_map:
905
for component_id, method, data, digest in reversed(components):
906
if component_id in content_map:
907
content = content_map[component_id]
909
if method == 'fulltext':
910
assert content is None
911
content = self.factory.parse_fulltext(data, version_id)
912
elif method == 'line-delta':
913
delta = self.factory.parse_line_delta(data, version_id)
914
content = content.copy()
915
content._lines = self._apply_delta(content._lines,
917
content_map[component_id] = content
919
if 'no-eol' in self._index.get_options(version_id):
920
content = content.copy()
921
line = content._lines[-1][1].rstrip('\n')
922
content._lines[-1] = (content._lines[-1][0], line)
923
final_content[version_id] = content
925
# digest here is the digest from the last applied component.
926
text = content.text()
927
if sha_strings(text) != digest:
928
raise KnitCorrupt(self.filename,
929
'sha-1 does not match %s' % version_id)
931
text_map[version_id] = text
932
return text_map, final_content
934
def iter_lines_added_or_present_in_versions(self, version_ids=None,
936
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
937
if version_ids is None:
938
version_ids = self.versions()
940
version_ids = [osutils.safe_revision_id(v) for v in version_ids]
942
pb = progress.DummyProgress()
943
# we don't care about inclusions, the caller cares.
944
# but we need to setup a list of records to visit.
945
# we need version_id, position, length
946
version_id_records = []
947
requested_versions = set(version_ids)
948
# filter for available versions
949
for version_id in requested_versions:
950
if not self.has_version(version_id):
951
raise RevisionNotPresent(version_id, self.filename)
952
# get a in-component-order queue:
953
for version_id in self.versions():
954
if version_id in requested_versions:
955
index_memo = self._index.get_position(version_id)
956
version_id_records.append((version_id, index_memo))
958
total = len(version_id_records)
959
for version_idx, (version_id, data, sha_value) in \
960
enumerate(self._data.read_records_iter(version_id_records)):
961
pb.update('Walking content.', version_idx, total)
962
method = self._index.get_method(version_id)
964
assert method in ('fulltext', 'line-delta')
965
if method == 'fulltext':
966
line_iterator = self.factory.get_fulltext_content(data)
968
line_iterator = self.factory.get_linedelta_content(data)
969
for line in line_iterator:
972
pb.update('Walking content.', total, total)
974
def iter_parents(self, version_ids):
975
"""Iterate through the parents for many version ids.
977
:param version_ids: An iterable yielding version_ids.
978
:return: An iterator that yields (version_id, parents). Requested
979
version_ids not present in the versioned file are simply skipped.
980
The order is undefined, allowing for different optimisations in
981
the underlying implementation.
983
version_ids = [osutils.safe_revision_id(version_id) for
984
version_id in version_ids]
985
return self._index.iter_parents(version_ids)
987
def num_versions(self):
988
"""See VersionedFile.num_versions()."""
989
return self._index.num_versions()
991
__len__ = num_versions
993
def annotate_iter(self, version_id):
994
"""See VersionedFile.annotate_iter."""
995
version_id = osutils.safe_revision_id(version_id)
996
content = self._get_content(version_id)
997
for origin, text in content.annotate_iter():
1000
def get_parents(self, version_id):
1001
"""See VersionedFile.get_parents."""
1004
# 52554 calls in 1264 872 internal down from 3674
1005
version_id = osutils.safe_revision_id(version_id)
1007
return self._index.get_parents(version_id)
1009
raise RevisionNotPresent(version_id, self.filename)
1011
def get_parents_with_ghosts(self, version_id):
1012
"""See VersionedFile.get_parents."""
1013
version_id = osutils.safe_revision_id(version_id)
1015
return self._index.get_parents_with_ghosts(version_id)
1017
raise RevisionNotPresent(version_id, self.filename)
1019
def get_ancestry(self, versions, topo_sorted=True):
1020
"""See VersionedFile.get_ancestry."""
1021
if isinstance(versions, basestring):
1022
versions = [versions]
1025
versions = [osutils.safe_revision_id(v) for v in versions]
1026
return self._index.get_ancestry(versions, topo_sorted)
1028
def get_ancestry_with_ghosts(self, versions):
1029
"""See VersionedFile.get_ancestry_with_ghosts."""
1030
if isinstance(versions, basestring):
1031
versions = [versions]
1034
versions = [osutils.safe_revision_id(v) for v in versions]
1035
return self._index.get_ancestry_with_ghosts(versions)
1037
def plan_merge(self, ver_a, ver_b):
1038
"""See VersionedFile.plan_merge."""
1039
ver_a = osutils.safe_revision_id(ver_a)
1040
ver_b = osutils.safe_revision_id(ver_b)
1041
ancestors_b = set(self.get_ancestry(ver_b, topo_sorted=False))
1043
ancestors_a = set(self.get_ancestry(ver_a, topo_sorted=False))
1044
annotated_a = self.annotate(ver_a)
1045
annotated_b = self.annotate(ver_b)
1046
return merge._plan_annotate_merge(annotated_a, annotated_b,
1047
ancestors_a, ancestors_b)
1050
class _KnitComponentFile(object):
1051
"""One of the files used to implement a knit database"""
1053
def __init__(self, transport, filename, mode, file_mode=None,
1054
create_parent_dir=False, dir_mode=None):
1055
self._transport = transport
1056
self._filename = filename
1058
self._file_mode = file_mode
1059
self._dir_mode = dir_mode
1060
self._create_parent_dir = create_parent_dir
1061
self._need_to_create = False
1063
def _full_path(self):
1064
"""Return the full path to this file."""
1065
return self._transport.base + self._filename
1067
def check_header(self, fp):
1068
line = fp.readline()
1070
# An empty file can actually be treated as though the file doesn't
1072
raise errors.NoSuchFile(self._full_path())
1073
if line != self.HEADER:
1074
raise KnitHeaderError(badline=line,
1075
filename=self._transport.abspath(self._filename))
1078
return '%s(%s)' % (self.__class__.__name__, self._filename)
1081
class _KnitIndex(_KnitComponentFile):
1082
"""Manages knit index file.
1084
The index is already kept in memory and read on startup, to enable
1085
fast lookups of revision information. The cursor of the index
1086
file is always pointing to the end, making it easy to append
1089
_cache is a cache for fast mapping from version id to a Index
1092
_history is a cache for fast mapping from indexes to version ids.
1094
The index data format is dictionary compressed when it comes to
1095
parent references; a index entry may only have parents that with a
1096
lover index number. As a result, the index is topological sorted.
1098
Duplicate entries may be written to the index for a single version id
1099
if this is done then the latter one completely replaces the former:
1100
this allows updates to correct version and parent information.
1101
Note that the two entries may share the delta, and that successive
1102
annotations and references MUST point to the first entry.
1104
The index file on disc contains a header, followed by one line per knit
1105
record. The same revision can be present in an index file more than once.
1106
The first occurrence gets assigned a sequence number starting from 0.
1108
The format of a single line is
1109
REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n
1110
REVISION_ID is a utf8-encoded revision id
1111
FLAGS is a comma separated list of flags about the record. Values include
1112
no-eol, line-delta, fulltext.
1113
BYTE_OFFSET is the ascii representation of the byte offset in the data file
1114
that the the compressed data starts at.
1115
LENGTH is the ascii representation of the length of the data file.
1116
PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of
1118
PARENT_SEQUENCE_ID the ascii representation of the sequence number of a
1119
revision id already in the knit that is a parent of REVISION_ID.
1120
The ' :' marker is the end of record marker.
1123
when a write is interrupted to the index file, it will result in a line
1124
that does not end in ' :'. If the ' :' is not present at the end of a line,
1125
or at the end of the file, then the record that is missing it will be
1126
ignored by the parser.
1128
When writing new records to the index file, the data is preceded by '\n'
1129
to ensure that records always start on new lines even if the last write was
1130
interrupted. As a result its normal for the last line in the index to be
1131
missing a trailing newline. One can be added with no harmful effects.
1134
HEADER = "# bzr knit index 8\n"
1136
# speed of knit parsing went from 280 ms to 280 ms with slots addition.
1137
# __slots__ = ['_cache', '_history', '_transport', '_filename']
1139
def _cache_version(self, version_id, options, pos, size, parents):
1140
"""Cache a version record in the history array and index cache.
1142
This is inlined into _load_data for performance. KEEP IN SYNC.
1143
(It saves 60ms, 25% of the __init__ overhead on local 4000 record
1146
# only want the _history index to reference the 1st index entry
1148
if version_id not in self._cache:
1149
index = len(self._history)
1150
self._history.append(version_id)
1152
index = self._cache[version_id][5]
1153
self._cache[version_id] = (version_id,
1160
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1161
create_parent_dir=False, delay_create=False, dir_mode=None):
1162
_KnitComponentFile.__init__(self, transport, filename, mode,
1163
file_mode=file_mode,
1164
create_parent_dir=create_parent_dir,
1167
# position in _history is the 'official' index for a revision
1168
# but the values may have come from a newer entry.
1169
# so - wc -l of a knit index is != the number of unique names
1173
fp = self._transport.get(self._filename)
1175
# _load_data may raise NoSuchFile if the target knit is
1177
_load_data(self, fp)
1181
if mode != 'w' or not create:
1184
self._need_to_create = True
1186
self._transport.put_bytes_non_atomic(
1187
self._filename, self.HEADER, mode=self._file_mode)
1189
def get_graph(self):
1190
"""Return a list of the node:parents lists from this knit index."""
1191
return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]
1193
def get_ancestry(self, versions, topo_sorted=True):
1194
"""See VersionedFile.get_ancestry."""
1195
# get a graph of all the mentioned versions:
1197
pending = set(versions)
1200
version = pending.pop()
1203
parents = [p for p in cache[version][4] if p in cache]
1205
raise RevisionNotPresent(version, self._filename)
1206
# if not completed and not a ghost
1207
pending.update([p for p in parents if p not in graph])
1208
graph[version] = parents
1211
return topo_sort(graph.items())
1213
def get_ancestry_with_ghosts(self, versions):
1214
"""See VersionedFile.get_ancestry_with_ghosts."""
1215
# get a graph of all the mentioned versions:
1216
self.check_versions_present(versions)
1219
pending = set(versions)
1221
version = pending.pop()
1223
parents = cache[version][4]
1229
pending.update([p for p in parents if p not in graph])
1230
graph[version] = parents
1231
return topo_sort(graph.items())
1233
def iter_parents(self, version_ids):
1234
"""Iterate through the parents for many version ids.
1236
:param version_ids: An iterable yielding version_ids.
1237
:return: An iterator that yields (version_id, parents). Requested
1238
version_ids not present in the versioned file are simply skipped.
1239
The order is undefined, allowing for different optimisations in
1240
the underlying implementation.
1242
for version_id in version_ids:
1244
yield version_id, tuple(self.get_parents(version_id))
1248
def num_versions(self):
1249
return len(self._history)
1251
__len__ = num_versions
1253
def get_versions(self):
1254
"""Get all the versions in the file. not topologically sorted."""
1255
return self._history
1257
def _version_list_to_index(self, versions):
1260
for version in versions:
1261
if version in cache:
1262
# -- inlined lookup() --
1263
result_list.append(str(cache[version][5]))
1264
# -- end lookup () --
1266
result_list.append('.' + version)
1267
return ' '.join(result_list)
1269
def add_version(self, version_id, options, index_memo, parents):
1270
"""Add a version record to the index."""
1271
self.add_versions(((version_id, options, index_memo, parents),))
1273
def add_versions(self, versions):
1274
"""Add multiple versions to the index.
1276
:param versions: a list of tuples:
1277
(version_id, options, pos, size, parents).
1280
orig_history = self._history[:]
1281
orig_cache = self._cache.copy()
1284
for version_id, options, (index, pos, size), parents in versions:
1285
line = "\n%s %s %s %s %s :" % (version_id,
1289
self._version_list_to_index(parents))
1290
assert isinstance(line, str), \
1291
'content must be utf-8 encoded: %r' % (line,)
1293
self._cache_version(version_id, options, pos, size, parents)
1294
if not self._need_to_create:
1295
self._transport.append_bytes(self._filename, ''.join(lines))
1298
sio.write(self.HEADER)
1299
sio.writelines(lines)
1301
self._transport.put_file_non_atomic(self._filename, sio,
1302
create_parent_dir=self._create_parent_dir,
1303
mode=self._file_mode,
1304
dir_mode=self._dir_mode)
1305
self._need_to_create = False
1307
# If any problems happen, restore the original values and re-raise
1308
self._history = orig_history
1309
self._cache = orig_cache
1312
def has_version(self, version_id):
1313
"""True if the version is in the index."""
1314
return version_id in self._cache
1316
def get_position(self, version_id):
1317
"""Return details needed to access the version.
1319
.kndx indices do not support split-out data, so return None for the
1322
:return: a tuple (None, data position, size) to hand to the access
1323
logic to get the record.
1325
entry = self._cache[version_id]
1326
return None, entry[2], entry[3]
1328
def get_method(self, version_id):
1329
"""Return compression method of specified version."""
1330
options = self._cache[version_id][1]
1331
if 'fulltext' in options:
1334
if 'line-delta' not in options:
1335
raise errors.KnitIndexUnknownMethod(self._full_path(), options)
1338
def get_options(self, version_id):
1339
"""Return a string represention options.
1343
return self._cache[version_id][1]
1345
def get_parents(self, version_id):
1346
"""Return parents of specified version ignoring ghosts."""
1347
return [parent for parent in self._cache[version_id][4]
1348
if parent in self._cache]
1350
def get_parents_with_ghosts(self, version_id):
1351
"""Return parents of specified version with ghosts."""
1352
return self._cache[version_id][4]
1354
def check_versions_present(self, version_ids):
1355
"""Check that all specified versions are present."""
1357
for version_id in version_ids:
1358
if version_id not in cache:
1359
raise RevisionNotPresent(version_id, self._filename)
1362
class KnitGraphIndex(object):
1363
"""A knit index that builds on GraphIndex."""
1365
def __init__(self, graph_index, deltas=False, parents=True, add_callback=None):
1366
"""Construct a KnitGraphIndex on a graph_index.
1368
:param graph_index: An implementation of bzrlib.index.GraphIndex.
1369
:param deltas: Allow delta-compressed records.
1370
:param add_callback: If not None, allow additions to the index and call
1371
this callback with a list of added GraphIndex nodes:
1372
[(node, value, node_refs), ...]
1373
:param parents: If True, record knits parents, if not do not record
1376
self._graph_index = graph_index
1377
self._deltas = deltas
1378
self._add_callback = add_callback
1379
self._parents = parents
1380
if deltas and not parents:
1381
raise KnitCorrupt(self, "Cannot do delta compression without "
1384
def _get_entries(self, keys, check_present=False):
1385
"""Get the entries for keys.
1387
:param keys: An iterable of index keys, - 1-tuples.
1392
for node in self._graph_index.iter_entries(keys):
1394
found_keys.add(node[1])
1396
# adapt parentless index to the rest of the code.
1397
for node in self._graph_index.iter_entries(keys):
1398
yield node[0], node[1], node[2], ()
1399
found_keys.add(node[1])
1401
missing_keys = keys.difference(found_keys)
1403
raise RevisionNotPresent(missing_keys.pop(), self)
1405
def _present_keys(self, version_ids):
1407
node[1] for node in self._get_entries(version_ids)])
1409
def _parentless_ancestry(self, versions):
1410
"""Honour the get_ancestry API for parentless knit indices."""
1411
wanted_keys = self._version_ids_to_keys(versions)
1412
present_keys = self._present_keys(wanted_keys)
1413
missing = set(wanted_keys).difference(present_keys)
1415
raise RevisionNotPresent(missing.pop(), self)
1416
return list(self._keys_to_version_ids(present_keys))
1418
def get_ancestry(self, versions, topo_sorted=True):
1419
"""See VersionedFile.get_ancestry."""
1420
if not self._parents:
1421
return self._parentless_ancestry(versions)
1422
# XXX: This will do len(history) index calls - perhaps
1423
# it should be altered to be a index core feature?
1424
# get a graph of all the mentioned versions:
1427
versions = self._version_ids_to_keys(versions)
1428
pending = set(versions)
1430
# get all pending nodes
1431
this_iteration = pending
1432
new_nodes = self._get_entries(this_iteration)
1435
for (index, key, value, node_refs) in new_nodes:
1436
# dont ask for ghosties - otherwise
1437
# we we can end up looping with pending
1438
# being entirely ghosted.
1439
graph[key] = [parent for parent in node_refs[0]
1440
if parent not in ghosts]
1442
for parent in graph[key]:
1443
# dont examine known nodes again
1448
ghosts.update(this_iteration.difference(found))
1449
if versions.difference(graph):
1450
raise RevisionNotPresent(versions.difference(graph).pop(), self)
1452
result_keys = topo_sort(graph.items())
1454
result_keys = graph.iterkeys()
1455
return [key[0] for key in result_keys]
1457
def get_ancestry_with_ghosts(self, versions):
1458
"""See VersionedFile.get_ancestry."""
1459
if not self._parents:
1460
return self._parentless_ancestry(versions)
1461
# XXX: This will do len(history) index calls - perhaps
1462
# it should be altered to be a index core feature?
1463
# get a graph of all the mentioned versions:
1465
versions = self._version_ids_to_keys(versions)
1466
pending = set(versions)
1468
# get all pending nodes
1469
this_iteration = pending
1470
new_nodes = self._get_entries(this_iteration)
1472
for (index, key, value, node_refs) in new_nodes:
1473
graph[key] = node_refs[0]
1475
for parent in graph[key]:
1476
# dont examine known nodes again
1480
missing_versions = this_iteration.difference(graph)
1481
missing_needed = versions.intersection(missing_versions)
1483
raise RevisionNotPresent(missing_needed.pop(), self)
1484
for missing_version in missing_versions:
1485
# add a key, no parents
1486
graph[missing_version] = []
1487
pending.discard(missing_version) # don't look for it
1488
result_keys = topo_sort(graph.items())
1489
return [key[0] for key in result_keys]
1491
def get_graph(self):
1492
"""Return a list of the node:parents lists from this knit index."""
1493
if not self._parents:
1494
return [(key, ()) for key in self.get_versions()]
1496
for index, key, value, refs in self._graph_index.iter_all_entries():
1497
result.append((key[0], tuple([ref[0] for ref in refs[0]])))
1500
def iter_parents(self, version_ids):
1501
"""Iterate through the parents for many version ids.
1503
:param version_ids: An iterable yielding version_ids.
1504
:return: An iterator that yields (version_id, parents). Requested
1505
version_ids not present in the versioned file are simply skipped.
1506
The order is undefined, allowing for different optimisations in
1507
the underlying implementation.
1510
all_nodes = set(self._get_entries(self._version_ids_to_keys(version_ids)))
1512
present_parents = set()
1513
for node in all_nodes:
1514
all_parents.update(node[3][0])
1515
# any node we are querying must be present
1516
present_parents.add(node[1])
1517
unknown_parents = all_parents.difference(present_parents)
1518
present_parents.update(self._present_keys(unknown_parents))
1519
for node in all_nodes:
1521
for parent in node[3][0]:
1522
if parent in present_parents:
1523
parents.append(parent[0])
1524
yield node[1][0], tuple(parents)
1526
for node in self._get_entries(self._version_ids_to_keys(version_ids)):
1527
yield node[1][0], ()
1529
def num_versions(self):
1530
return len(list(self._graph_index.iter_all_entries()))
1532
__len__ = num_versions
1534
def get_versions(self):
1535
"""Get all the versions in the file. not topologically sorted."""
1536
return [node[1][0] for node in self._graph_index.iter_all_entries()]
1538
def has_version(self, version_id):
1539
"""True if the version is in the index."""
1540
return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1
1542
def _keys_to_version_ids(self, keys):
1543
return tuple(key[0] for key in keys)
1545
def get_position(self, version_id):
1546
"""Return details needed to access the version.
1548
:return: a tuple (index, data position, size) to hand to the access
1549
logic to get the record.
1551
node = self._get_node(version_id)
1552
bits = node[2][1:].split(' ')
1553
return node[0], int(bits[0]), int(bits[1])
1555
def get_method(self, version_id):
1556
"""Return compression method of specified version."""
1557
if not self._deltas:
1559
return self._parent_compression(self._get_node(version_id)[3][1])
1561
def _parent_compression(self, reference_list):
1562
# use the second reference list to decide if this is delta'd or not.
1563
if len(reference_list):
1568
def _get_node(self, version_id):
1569
return list(self._get_entries(self._version_ids_to_keys([version_id])))[0]
1571
def get_options(self, version_id):
1572
"""Return a string represention options.
1576
node = self._get_node(version_id)
1577
if not self._deltas:
1578
options = ['fulltext']
1580
options = [self._parent_compression(node[3][1])]
1581
if node[2][0] == 'N':
1582
options.append('no-eol')
1585
def get_parents(self, version_id):
1586
"""Return parents of specified version ignoring ghosts."""
1587
parents = list(self.iter_parents([version_id]))
1590
raise errors.RevisionNotPresent(version_id, self)
1591
return parents[0][1]
1593
def get_parents_with_ghosts(self, version_id):
1594
"""Return parents of specified version with ghosts."""
1595
nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),
1596
check_present=True))
1597
if not self._parents:
1599
return self._keys_to_version_ids(nodes[0][3][0])
1601
def check_versions_present(self, version_ids):
1602
"""Check that all specified versions are present."""
1603
keys = self._version_ids_to_keys(version_ids)
1604
present = self._present_keys(keys)
1605
missing = keys.difference(present)
1607
raise RevisionNotPresent(missing.pop(), self)
1609
def add_version(self, version_id, options, access_memo, parents):
1610
"""Add a version record to the index."""
1611
return self.add_versions(((version_id, options, access_memo, parents),))
1613
def add_versions(self, versions):
1614
"""Add multiple versions to the index.
1616
This function does not insert data into the Immutable GraphIndex
1617
backing the KnitGraphIndex, instead it prepares data for insertion by
1618
the caller and checks that it is safe to insert then calls
1619
self._add_callback with the prepared GraphIndex nodes.
1621
:param versions: a list of tuples:
1622
(version_id, options, pos, size, parents).
1624
if not self._add_callback:
1625
raise errors.ReadOnlyError(self)
1626
# we hope there are no repositories with inconsistent parentage
1631
for (version_id, options, access_memo, parents) in versions:
1632
index, pos, size = access_memo
1633
key = (version_id, )
1634
parents = tuple((parent, ) for parent in parents)
1635
if 'no-eol' in options:
1639
value += "%d %d" % (pos, size)
1640
if not self._deltas:
1641
if 'line-delta' in options:
1642
raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")
1645
if 'line-delta' in options:
1646
node_refs = (parents, (parents[0],))
1648
node_refs = (parents, ())
1650
node_refs = (parents, )
1653
raise KnitCorrupt(self, "attempt to add node with parents "
1654
"in parentless index.")
1656
keys[key] = (value, node_refs)
1657
present_nodes = self._get_entries(keys)
1658
for (index, key, value, node_refs) in present_nodes:
1659
if (value, node_refs) != keys[key]:
1660
raise KnitCorrupt(self, "inconsistent details in add_versions"
1661
": %s %s" % ((value, node_refs), keys[key]))
1665
for key, (value, node_refs) in keys.iteritems():
1666
result.append((key, value, node_refs))
1668
for key, (value, node_refs) in keys.iteritems():
1669
result.append((key, value))
1670
self._add_callback(result)
1672
def _version_ids_to_keys(self, version_ids):
1673
return set((version_id, ) for version_id in version_ids)
1676
class _KnitAccess(object):
1677
"""Access to knit records in a .knit file."""
1679
def __init__(self, transport, filename, _file_mode, _dir_mode,
1680
_need_to_create, _create_parent_dir):
1681
"""Create a _KnitAccess for accessing and inserting data.
1683
:param transport: The transport the .knit is located on.
1684
:param filename: The filename of the .knit.
1686
self._transport = transport
1687
self._filename = filename
1688
self._file_mode = _file_mode
1689
self._dir_mode = _dir_mode
1690
self._need_to_create = _need_to_create
1691
self._create_parent_dir = _create_parent_dir
1693
def add_raw_records(self, sizes, raw_data):
1694
"""Add raw knit bytes to a storage area.
1696
The data is spooled to whereever the access method is storing data.
1698
:param sizes: An iterable containing the size of each raw data segment.
1699
:param raw_data: A bytestring containing the data.
1700
:return: A list of memos to retrieve the record later. Each memo is a
1701
tuple - (index, pos, length), where the index field is always None
1702
for the .knit access method.
1704
assert type(raw_data) == str, \
1705
'data must be plain bytes was %s' % type(raw_data)
1706
if not self._need_to_create:
1707
base = self._transport.append_bytes(self._filename, raw_data)
1709
self._transport.put_bytes_non_atomic(self._filename, raw_data,
1710
create_parent_dir=self._create_parent_dir,
1711
mode=self._file_mode,
1712
dir_mode=self._dir_mode)
1713
self._need_to_create = False
1717
result.append((None, base, size))
1722
"""IFF this data access has its own storage area, initialise it.
1726
self._transport.put_bytes_non_atomic(self._filename, '',
1727
mode=self._file_mode)
1729
def open_file(self):
1730
"""IFF this data access can be represented as a single file, open it.
1732
For knits that are not mapped to a single file on disk this will
1735
:return: None or a file handle.
1738
return self._transport.get(self._filename)
1743
def get_raw_records(self, memos_for_retrieval):
1744
"""Get the raw bytes for a records.
1746
:param memos_for_retrieval: An iterable containing the (index, pos,
1747
length) memo for retrieving the bytes. The .knit method ignores
1748
the index as there is always only a single file.
1749
:return: An iterator over the bytes of the records.
1751
read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]
1752
for pos, data in self._transport.readv(self._filename, read_vector):
1756
class _PackAccess(object):
1757
"""Access to knit records via a collection of packs."""
1759
def __init__(self, index_to_packs, writer=None):
1760
"""Create a _PackAccess object.
1762
:param index_to_packs: A dict mapping index objects to the transport
1763
and file names for obtaining data.
1764
:param writer: A tuple (pack.ContainerWriter, write_index) which
1765
contains the pack to write, and the index that reads from it will
1769
self.container_writer = writer[0]
1770
self.write_index = writer[1]
1772
self.container_writer = None
1773
self.write_index = None
1774
self.indices = index_to_packs
1776
def add_raw_records(self, sizes, raw_data):
1777
"""Add raw knit bytes to a storage area.
1779
The data is spooled to the container writer in one bytes-record per
1782
:param sizes: An iterable containing the size of each raw data segment.
1783
:param raw_data: A bytestring containing the data.
1784
:return: A list of memos to retrieve the record later. Each memo is a
1785
tuple - (index, pos, length), where the index field is the
1786
write_index object supplied to the PackAccess object.
1788
assert type(raw_data) == str, \
1789
'data must be plain bytes was %s' % type(raw_data)
1793
p_offset, p_length = self.container_writer.add_bytes_record(
1794
raw_data[offset:offset+size], [])
1796
result.append((self.write_index, p_offset, p_length))
1800
"""Pack based knits do not get individually created."""
1802
def get_raw_records(self, memos_for_retrieval):
1803
"""Get the raw bytes for a records.
1805
:param memos_for_retrieval: An iterable containing the (index, pos,
1806
length) memo for retrieving the bytes. The Pack access method
1807
looks up the pack to use for a given record in its index_to_pack
1809
:return: An iterator over the bytes of the records.
1811
# first pass, group into same-index requests
1813
current_index = None
1814
for (index, offset, length) in memos_for_retrieval:
1815
if current_index == index:
1816
current_list.append((offset, length))
1818
if current_index is not None:
1819
request_lists.append((current_index, current_list))
1820
current_index = index
1821
current_list = [(offset, length)]
1822
# handle the last entry
1823
if current_index is not None:
1824
request_lists.append((current_index, current_list))
1825
for index, offsets in request_lists:
1826
transport, path = self.indices[index]
1827
reader = pack.make_readv_reader(transport, path, offsets)
1828
for names, read_func in reader.iter_records():
1829
yield read_func(None)
1831
def open_file(self):
1832
"""Pack based knits have no single file."""
1835
def set_writer(self, writer, index, (transport, packname)):
1836
"""Set a writer to use for adding data."""
1837
self.indices[index] = (transport, packname)
1838
self.container_writer = writer
1839
self.write_index = index
1842
class _KnitData(object):
1843
"""Manage extraction of data from a KnitAccess, caching and decompressing.
1845
The KnitData class provides the logic for parsing and using knit records,
1846
making use of an access method for the low level read and write operations.
1849
def __init__(self, access):
1850
"""Create a KnitData object.
1852
:param access: The access method to use. Access methods such as
1853
_KnitAccess manage the insertion of raw records and the subsequent
1854
retrieval of the same.
1856
self._access = access
1857
self._checked = False
1858
# TODO: jam 20060713 conceptually, this could spill to disk
1859
# if the cached size gets larger than a certain amount
1860
# but it complicates the model a bit, so for now just use
1861
# a simple dictionary
1863
self._do_cache = False
1865
def enable_cache(self):
1866
"""Enable caching of reads."""
1867
self._do_cache = True
1869
def clear_cache(self):
1870
"""Clear the record cache."""
1871
self._do_cache = False
1874
def _open_file(self):
1875
return self._access.open_file()
1877
def _record_to_data(self, version_id, digest, lines):
1878
"""Convert version_id, digest, lines into a raw data block.
1880
:return: (len, a StringIO instance with the raw data ready to read.)
1883
data_file = GzipFile(None, mode='wb', fileobj=sio)
1885
assert isinstance(version_id, str)
1886
data_file.writelines(chain(
1887
["version %s %d %s\n" % (version_id,
1891
["end %s\n" % version_id]))
1898
def add_raw_records(self, sizes, raw_data):
1899
"""Append a prepared record to the data file.
1901
:param sizes: An iterable containing the size of each raw data segment.
1902
:param raw_data: A bytestring containing the data.
1903
:return: a list of index data for the way the data was stored.
1904
See the access method add_raw_records documentation for more
1907
return self._access.add_raw_records(sizes, raw_data)
1909
def add_record(self, version_id, digest, lines):
1910
"""Write new text record to disk.
1912
Returns index data for retrieving it later, as per add_raw_records.
1914
size, sio = self._record_to_data(version_id, digest, lines)
1915
result = self.add_raw_records([size], sio.getvalue())
1917
self._cache[version_id] = sio.getvalue()
1920
def _parse_record_header(self, version_id, raw_data):
1921
"""Parse a record header for consistency.
1923
:return: the header and the decompressor stream.
1924
as (stream, header_record)
1926
df = GzipFile(mode='rb', fileobj=StringIO(raw_data))
1928
rec = self._check_header(version_id, df.readline())
1929
except Exception, e:
1930
raise KnitCorrupt(self._access,
1931
"While reading {%s} got %s(%s)"
1932
% (version_id, e.__class__.__name__, str(e)))
1935
def _check_header(self, version_id, line):
1938
raise KnitCorrupt(self._access,
1939
'unexpected number of elements in record header')
1940
if rec[1] != version_id:
1941
raise KnitCorrupt(self._access,
1942
'unexpected version, wanted %r, got %r'
1943
% (version_id, rec[1]))
1946
def _parse_record(self, version_id, data):
1948
# 4168 calls in 2880 217 internal
1949
# 4168 calls to _parse_record_header in 2121
1950
# 4168 calls to readlines in 330
1951
df = GzipFile(mode='rb', fileobj=StringIO(data))
1954
record_contents = df.readlines()
1955
except Exception, e:
1956
raise KnitCorrupt(self._access,
1957
"While reading {%s} got %s(%s)"
1958
% (version_id, e.__class__.__name__, str(e)))
1959
header = record_contents.pop(0)
1960
rec = self._check_header(version_id, header)
1962
last_line = record_contents.pop()
1963
if len(record_contents) != int(rec[2]):
1964
raise KnitCorrupt(self._access,
1965
'incorrect number of lines %s != %s'
1967
% (len(record_contents), int(rec[2]),
1969
if last_line != 'end %s\n' % rec[1]:
1970
raise KnitCorrupt(self._access,
1971
'unexpected version end line %r, wanted %r'
1972
% (last_line, version_id))
1974
return record_contents, rec[3]
1976
def read_records_iter_raw(self, records):
1977
"""Read text records from data file and yield raw data.
1979
This unpacks enough of the text record to validate the id is
1980
as expected but thats all.
1982
# setup an iterator of the external records:
1983
# uses readv so nice and fast we hope.
1985
# grab the disk data needed.
1987
# Don't check _cache if it is empty
1988
needed_offsets = [index_memo for version_id, index_memo
1990
if version_id not in self._cache]
1992
needed_offsets = [index_memo for version_id, index_memo
1995
raw_records = self._access.get_raw_records(needed_offsets)
1997
for version_id, index_memo in records:
1998
if version_id in self._cache:
1999
# This data has already been validated
2000
data = self._cache[version_id]
2002
data = raw_records.next()
2004
self._cache[version_id] = data
2006
# validate the header
2007
df, rec = self._parse_record_header(version_id, data)
2009
yield version_id, data
2011
def read_records_iter(self, records):
2012
"""Read text records from data file and yield result.
2014
The result will be returned in whatever is the fastest to read.
2015
Not by the order requested. Also, multiple requests for the same
2016
record will only yield 1 response.
2017
:param records: A list of (version_id, pos, len) entries
2018
:return: Yields (version_id, contents, digest) in the order
2019
read, not the order requested
2025
# Skip records we have alread seen
2026
yielded_records = set()
2027
needed_records = set()
2028
for record in records:
2029
if record[0] in self._cache:
2030
if record[0] in yielded_records:
2032
yielded_records.add(record[0])
2033
data = self._cache[record[0]]
2034
content, digest = self._parse_record(record[0], data)
2035
yield (record[0], content, digest)
2037
needed_records.add(record)
2038
needed_records = sorted(needed_records, key=operator.itemgetter(1))
2040
needed_records = sorted(set(records), key=operator.itemgetter(1))
2042
if not needed_records:
2045
# The transport optimizes the fetching as well
2046
# (ie, reads continuous ranges.)
2047
raw_data = self._access.get_raw_records(
2048
[index_memo for version_id, index_memo in needed_records])
2050
for (version_id, index_memo), data in \
2051
izip(iter(needed_records), raw_data):
2052
content, digest = self._parse_record(version_id, data)
2054
self._cache[version_id] = data
2055
yield version_id, content, digest
2057
def read_records(self, records):
2058
"""Read records into a dictionary."""
2060
for record_id, content, digest in \
2061
self.read_records_iter(records):
2062
components[record_id] = (content, digest)
2066
class InterKnit(InterVersionedFile):
2067
"""Optimised code paths for knit to knit operations."""
2069
_matching_file_from_factory = KnitVersionedFile
2070
_matching_file_to_factory = KnitVersionedFile
2073
def is_compatible(source, target):
2074
"""Be compatible with knits. """
2076
return (isinstance(source, KnitVersionedFile) and
2077
isinstance(target, KnitVersionedFile))
2078
except AttributeError:
2081
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
2082
"""See InterVersionedFile.join."""
2083
assert isinstance(self.source, KnitVersionedFile)
2084
assert isinstance(self.target, KnitVersionedFile)
2086
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
2091
pb = ui.ui_factory.nested_progress_bar()
2093
version_ids = list(version_ids)
2094
if None in version_ids:
2095
version_ids.remove(None)
2097
self.source_ancestry = set(self.source.get_ancestry(version_ids))
2098
this_versions = set(self.target._index.get_versions())
2099
needed_versions = self.source_ancestry - this_versions
2100
cross_check_versions = self.source_ancestry.intersection(this_versions)
2101
mismatched_versions = set()
2102
for version in cross_check_versions:
2103
# scan to include needed parents.
2104
n1 = set(self.target.get_parents_with_ghosts(version))
2105
n2 = set(self.source.get_parents_with_ghosts(version))
2107
# FIXME TEST this check for cycles being introduced works
2108
# the logic is we have a cycle if in our graph we are an
2109
# ancestor of any of the n2 revisions.
2115
parent_ancestors = self.source.get_ancestry(parent)
2116
if version in parent_ancestors:
2117
raise errors.GraphCycleError([parent, version])
2118
# ensure this parent will be available later.
2119
new_parents = n2.difference(n1)
2120
needed_versions.update(new_parents.difference(this_versions))
2121
mismatched_versions.add(version)
2123
if not needed_versions and not mismatched_versions:
2125
full_list = topo_sort(self.source.get_graph())
2127
version_list = [i for i in full_list if (not self.target.has_version(i)
2128
and i in needed_versions)]
2132
copy_queue_records = []
2134
for version_id in version_list:
2135
options = self.source._index.get_options(version_id)
2136
parents = self.source._index.get_parents_with_ghosts(version_id)
2137
# check that its will be a consistent copy:
2138
for parent in parents:
2139
# if source has the parent, we must :
2140
# * already have it or
2141
# * have it scheduled already
2142
# otherwise we don't care
2143
assert (self.target.has_version(parent) or
2144
parent in copy_set or
2145
not self.source.has_version(parent))
2146
index_memo = self.source._index.get_position(version_id)
2147
copy_queue_records.append((version_id, index_memo))
2148
copy_queue.append((version_id, options, parents))
2149
copy_set.add(version_id)
2151
# data suck the join:
2153
total = len(version_list)
2156
for (version_id, raw_data), \
2157
(version_id2, options, parents) in \
2158
izip(self.source._data.read_records_iter_raw(copy_queue_records),
2160
assert version_id == version_id2, 'logic error, inconsistent results'
2162
pb.update("Joining knit", count, total)
2163
raw_records.append((version_id, options, parents, len(raw_data)))
2164
raw_datum.append(raw_data)
2165
self.target._add_raw_records(raw_records, ''.join(raw_datum))
2167
for version in mismatched_versions:
2168
# FIXME RBC 20060309 is this needed?
2169
n1 = set(self.target.get_parents_with_ghosts(version))
2170
n2 = set(self.source.get_parents_with_ghosts(version))
2171
# write a combined record to our history preserving the current
2172
# parents as first in the list
2173
new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))
2174
self.target.fix_parents(version, new_parents)
2180
InterVersionedFile.register_optimiser(InterKnit)
2183
class WeaveToKnit(InterVersionedFile):
2184
"""Optimised code paths for weave to knit operations."""
2186
_matching_file_from_factory = bzrlib.weave.WeaveFile
2187
_matching_file_to_factory = KnitVersionedFile
2190
def is_compatible(source, target):
2191
"""Be compatible with weaves to knits."""
2193
return (isinstance(source, bzrlib.weave.Weave) and
2194
isinstance(target, KnitVersionedFile))
2195
except AttributeError:
2198
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
2199
"""See InterVersionedFile.join."""
2200
assert isinstance(self.source, bzrlib.weave.Weave)
2201
assert isinstance(self.target, KnitVersionedFile)
2203
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
2208
pb = ui.ui_factory.nested_progress_bar()
2210
version_ids = list(version_ids)
2212
self.source_ancestry = set(self.source.get_ancestry(version_ids))
2213
this_versions = set(self.target._index.get_versions())
2214
needed_versions = self.source_ancestry - this_versions
2215
cross_check_versions = self.source_ancestry.intersection(this_versions)
2216
mismatched_versions = set()
2217
for version in cross_check_versions:
2218
# scan to include needed parents.
2219
n1 = set(self.target.get_parents_with_ghosts(version))
2220
n2 = set(self.source.get_parents(version))
2221
# if all of n2's parents are in n1, then its fine.
2222
if n2.difference(n1):
2223
# FIXME TEST this check for cycles being introduced works
2224
# the logic is we have a cycle if in our graph we are an
2225
# ancestor of any of the n2 revisions.
2231
parent_ancestors = self.source.get_ancestry(parent)
2232
if version in parent_ancestors:
2233
raise errors.GraphCycleError([parent, version])
2234
# ensure this parent will be available later.
2235
new_parents = n2.difference(n1)
2236
needed_versions.update(new_parents.difference(this_versions))
2237
mismatched_versions.add(version)
2239
if not needed_versions and not mismatched_versions:
2241
full_list = topo_sort(self.source.get_graph())
2243
version_list = [i for i in full_list if (not self.target.has_version(i)
2244
and i in needed_versions)]
2248
total = len(version_list)
2249
for version_id in version_list:
2250
pb.update("Converting to knit", count, total)
2251
parents = self.source.get_parents(version_id)
2252
# check that its will be a consistent copy:
2253
for parent in parents:
2254
# if source has the parent, we must already have it
2255
assert (self.target.has_version(parent))
2256
self.target.add_lines(
2257
version_id, parents, self.source.get_lines(version_id))
2260
for version in mismatched_versions:
2261
# FIXME RBC 20060309 is this needed?
2262
n1 = set(self.target.get_parents_with_ghosts(version))
2263
n2 = set(self.source.get_parents(version))
2264
# write a combined record to our history preserving the current
2265
# parents as first in the list
2266
new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))
2267
self.target.fix_parents(version, new_parents)
2273
InterVersionedFile.register_optimiser(WeaveToKnit)
2276
class KnitSequenceMatcher(difflib.SequenceMatcher):
2277
"""Knit tuned sequence matcher.
2279
This is based on profiling of difflib which indicated some improvements
2280
for our usage pattern.
2283
def find_longest_match(self, alo, ahi, blo, bhi):
2284
"""Find longest matching block in a[alo:ahi] and b[blo:bhi].
2286
If isjunk is not defined:
2288
Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
2289
alo <= i <= i+k <= ahi
2290
blo <= j <= j+k <= bhi
2291
and for all (i',j',k') meeting those conditions,
2294
and if i == i', j <= j'
2296
In other words, of all maximal matching blocks, return one that
2297
starts earliest in a, and of all those maximal matching blocks that
2298
start earliest in a, return the one that starts earliest in b.
2300
>>> s = SequenceMatcher(None, " abcd", "abcd abcd")
2301
>>> s.find_longest_match(0, 5, 0, 9)
2304
If isjunk is defined, first the longest matching block is
2305
determined as above, but with the additional restriction that no
2306
junk element appears in the block. Then that block is extended as
2307
far as possible by matching (only) junk elements on both sides. So
2308
the resulting block never matches on junk except as identical junk
2309
happens to be adjacent to an "interesting" match.
2311
Here's the same example as before, but considering blanks to be
2312
junk. That prevents " abcd" from matching the " abcd" at the tail
2313
end of the second sequence directly. Instead only the "abcd" can
2314
match, and matches the leftmost "abcd" in the second sequence:
2316
>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
2317
>>> s.find_longest_match(0, 5, 0, 9)
2320
If no blocks match, return (alo, blo, 0).
2322
>>> s = SequenceMatcher(None, "ab", "c")
2323
>>> s.find_longest_match(0, 2, 0, 1)
2327
# CAUTION: stripping common prefix or suffix would be incorrect.
2331
# Longest matching block is "ab", but if common prefix is
2332
# stripped, it's "a" (tied with "b"). UNIX(tm) diff does so
2333
# strip, so ends up claiming that ab is changed to acab by
2334
# inserting "ca" in the middle. That's minimal but unintuitive:
2335
# "it's obvious" that someone inserted "ac" at the front.
2336
# Windiff ends up at the same place as diff, but by pairing up
2337
# the unique 'b's and then matching the first two 'a's.
2339
a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.isbjunk
2340
besti, bestj, bestsize = alo, blo, 0
2341
# find longest junk-free match
2342
# during an iteration of the loop, j2len[j] = length of longest
2343
# junk-free match ending with a[i-1] and b[j]
2347
for i in xrange(alo, ahi):
2348
# look at all instances of a[i] in b; note that because
2349
# b2j has no junk keys, the loop is skipped if a[i] is junk
2350
j2lenget = j2len.get
2353
# changing b2j.get(a[i], nothing) to a try:KeyError pair produced the
2354
# following improvement
2355
# 704 0 4650.5320 2620.7410 bzrlib.knit:1336(find_longest_match)
2356
# +326674 0 1655.1210 1655.1210 +<method 'get' of 'dict' objects>
2357
# +76519 0 374.6700 374.6700 +<method 'has_key' of 'dict' objects>
2359
# 704 0 3733.2820 2209.6520 bzrlib.knit:1336(find_longest_match)
2360
# +211400 0 1147.3520 1147.3520 +<method 'get' of 'dict' objects>
2361
# +76519 0 376.2780 376.2780 +<method 'has_key' of 'dict' objects>
2373
k = newj2len[j] = 1 + j2lenget(-1 + j, 0)
2375
besti, bestj, bestsize = 1 + i-k, 1 + j-k, k
2378
# Extend the best by non-junk elements on each end. In particular,
2379
# "popular" non-junk elements aren't in b2j, which greatly speeds
2380
# the inner loop above, but also means "the best" match so far
2381
# doesn't contain any junk *or* popular non-junk elements.
2382
while besti > alo and bestj > blo and \
2383
not isbjunk(b[bestj-1]) and \
2384
a[besti-1] == b[bestj-1]:
2385
besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
2386
while besti+bestsize < ahi and bestj+bestsize < bhi and \
2387
not isbjunk(b[bestj+bestsize]) and \
2388
a[besti+bestsize] == b[bestj+bestsize]:
2391
# Now that we have a wholly interesting match (albeit possibly
2392
# empty!), we may as well suck up the matching junk on each
2393
# side of it too. Can't think of a good reason not to, and it
2394
# saves post-processing the (possibly considerable) expense of
2395
# figuring out what to do with it. In the case of an empty
2396
# interesting match, this is clearly the right thing to do,
2397
# because no other kind of match is possible in the regions.
2398
while besti > alo and bestj > blo and \
2399
isbjunk(b[bestj-1]) and \
2400
a[besti-1] == b[bestj-1]:
2401
besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
2402
while besti+bestsize < ahi and bestj+bestsize < bhi and \
2403
isbjunk(b[bestj+bestsize]) and \
2404
a[besti+bestsize] == b[bestj+bestsize]:
2405
bestsize = bestsize + 1
2407
return besti, bestj, bestsize
2411
from bzrlib._knit_load_data_c import _load_data_c as _load_data
2413
from bzrlib._knit_load_data_py import _load_data_py as _load_data