1
# Copyright (C) 2005, 2006 by Canonical Ltd
2
# Written by Martin Pool.
3
# Modified by Johan Rydberg <jrydberg@gnu.org>
4
# Modified by Robert Collins <robert.collins@canonical.com>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
"""Knit versionedfile implementation.
22
A knit is a versioned file implementation that supports efficient append only
26
lifeless: the data file is made up of "delta records". each delta record has a delta header
27
that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of
28
the -expanded data- (ie, the delta applied to the parent). the delta also ends with a
29
end-marker; simply "end VERSION"
31
delta can be line or full contents.a
32
... the 8's there are the index number of the annotation.
33
version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e
37
8 e.set('executable', 'yes')
39
8 if elt.get('executable') == 'yes':
40
8 ie.executable = True
41
end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad
45
09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents
46
09:33 < jrydberg> lifeless: the parents are currently dictionary compressed
47
09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)
48
09:33 < lifeless> right
49
09:33 < jrydberg> lifeless: the position and size is the range in the data file
52
so the index sequence is the dictionary compressed sequence number used
53
in the deltas to provide line annotation
58
# 10:16 < lifeless> make partial index writes safe
59
# 10:16 < lifeless> implement 'knit.check()' like weave.check()
60
# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave
62
# move sha1 out of the content so that join is faster at verifying parents
63
# record content length ?
66
from cStringIO import StringIO
68
from difflib import SequenceMatcher
69
from gzip import GzipFile
72
import bzrlib.errors as errors
73
from bzrlib.errors import FileExists, NoSuchFile, KnitError, \
74
InvalidRevisionId, KnitCorrupt, KnitHeaderError, \
75
RevisionNotPresent, RevisionAlreadyPresent
76
from bzrlib.trace import mutter
77
from bzrlib.osutils import contains_whitespace, contains_linebreaks, \
79
from bzrlib.versionedfile import VersionedFile
80
from bzrlib.tsort import topo_sort
83
# TODO: Split out code specific to this format into an associated object.
85
# TODO: Can we put in some kind of value to check that the index and data
86
# files belong together?
88
# TODO: accomodate binaries, perhaps by storing a byte count
90
# TODO: function to check whole file
92
# TODO: atomically append data, then measure backwards from the cursor
93
# position after writing to work out where it was located. we may need to
94
# bypass python file buffering.
97
INDEX_SUFFIX = '.kndx'
100
# convenience factories for testing or use:
101
def AnnotatedKnitFactory(name, transport, mode=None):
102
"""Create a knit with path name in transport transport."""
103
return KnitVersionedFile(transport,
106
KnitAnnotateFactory(),
110
class KnitContent(object):
111
"""Content of a knit version to which deltas can be applied."""
113
def __init__(self, lines):
116
def annotate_iter(self):
117
"""Yield tuples of (origin, text) for each content line."""
118
for origin, text in self._lines:
122
"""Return a list of (origin, text) tuples."""
123
return list(self.annotate_iter())
125
def apply_delta(self, delta):
126
"""Apply delta to this content."""
128
for start, end, count, lines in delta:
129
self._lines[offset+start:offset+end] = lines
130
offset = offset + (start - end) + count
132
def line_delta_iter(self, new_lines):
133
"""Generate line-based delta from new_lines to this content."""
134
new_texts = [text for origin, text in new_lines._lines]
135
old_texts = [text for origin, text in self._lines]
136
s = difflib.SequenceMatcher(None, old_texts, new_texts)
137
for op in s.get_opcodes():
140
yield (op[1], op[2], op[4]-op[3], new_lines._lines[op[3]:op[4]])
142
def line_delta(self, new_lines):
143
return list(self.line_delta_iter(new_lines))
146
return [text for origin, text in self._lines]
149
class _KnitFactory(object):
150
"""Base factory for creating content objects."""
152
def make(self, lines, version):
153
num_lines = len(lines)
154
return KnitContent(zip([version] * num_lines, lines))
157
class KnitAnnotateFactory(_KnitFactory):
158
"""Factory for creating annotated Content objects."""
162
def parse_fulltext(self, content, version):
165
origin, text = line.split(' ', 1)
166
lines.append((int(origin), text))
167
return KnitContent(lines)
169
def parse_line_delta_iter(self, lines):
171
header = lines.pop(0)
172
start, end, c = [int(n) for n in header.split(',')]
175
origin, text = lines.pop(0).split(' ', 1)
176
contents.append((int(origin), text))
177
yield start, end, c, contents
179
def parse_line_delta(self, lines, version):
180
return list(self.parse_line_delta_iter(lines))
182
def lower_fulltext(self, content):
183
return ['%d %s' % (o, t) for o, t in content._lines]
185
def lower_line_delta(self, delta):
187
for start, end, c, lines in delta:
188
out.append('%d,%d,%d\n' % (start, end, c))
189
for origin, text in lines:
190
out.append('%d %s' % (origin, text))
194
class KnitPlainFactory(_KnitFactory):
195
"""Factory for creating plain Content objects."""
199
def parse_fulltext(self, content, version):
200
return self.make(content, version)
202
def parse_line_delta_iter(self, lines, version):
204
header = lines.pop(0)
205
start, end, c = [int(n) for n in header.split(',')]
206
yield start, end, c, zip([version] * c, lines[:c])
209
def parse_line_delta(self, lines, version):
210
return list(self.parse_line_delta_iter(lines, version))
212
def lower_fulltext(self, content):
213
return content.text()
215
def lower_line_delta(self, delta):
217
for start, end, c, lines in delta:
218
out.append('%d,%d,%d\n' % (start, end, c))
219
out.extend([text for origin, text in lines])
223
def make_empty_knit(transport, relpath):
224
"""Construct a empty knit at the specified location."""
225
k = KnitVersionedFile(transport, relpath, 'w', KnitPlainFactory)
229
class KnitVersionedFile(VersionedFile):
230
"""Weave-like structure with faster random access.
232
A knit stores a number of texts and a summary of the relationships
233
between them. Texts are identified by a string version-id. Texts
234
are normally stored and retrieved as a series of lines, but can
235
also be passed as single strings.
237
Lines are stored with the trailing newline (if any) included, to
238
avoid special cases for files with no final newline. Lines are
239
composed of 8-bit characters, not unicode. The combination of
240
these approaches should mean any 'binary' file can be safely
241
stored and retrieved.
244
def __init__(self, transport, relpath, mode, factory,
245
basis_knit=None, delta=True):
246
"""Construct a knit at location specified by relpath."""
247
assert mode in ('r', 'w'), "invalid mode specified"
248
assert not basis_knit or isinstance(basis_knit, KnitVersionedFile), \
251
self.transport = transport
252
self.filename = relpath
253
self.basis_knit = basis_knit
254
self.factory = factory
255
self.writable = (mode == 'w')
258
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
260
self._data = _KnitData(transport, relpath + DATA_SUFFIX,
264
"""See VersionedFile.versions."""
265
return self._index.get_versions()
267
def has_version(self, version_id):
268
"""See VersionedFile.has_version."""
269
return self._index.has_version(version_id)
271
__contains__ = has_version
273
def _merge_annotations(self, content, parents):
274
"""Merge annotations for content. This is done by comparing
275
the annotations based on changed to the text."""
276
for parent_id in parents:
277
merge_content = self._get_content(parent_id)
278
seq = SequenceMatcher(None, merge_content.text(), content.text())
279
for i, j, n in seq.get_matching_blocks():
282
content._lines[j:j+n] = merge_content._lines[i:i+n]
284
def _get_components(self, version_id):
285
"""Return a list of (version_id, method, data) tuples that
286
makes up version specified by version_id of the knit.
288
The components should be applied in the order of the returned
291
The basis knit will be used to the largest extent possible
292
since it is assumed that accesses to it is faster.
294
# needed_revisions holds a list of (method, version_id) of
295
# versions that is needed to be fetched to construct the final
296
# version of the file.
298
# basis_revisions is a list of versions that needs to be
299
# fetched but exists in the basis knit.
301
basis = self.basis_knit
308
if basis and basis._index.has_version(cursor):
310
basis_versions.append(cursor)
311
method = picked_knit._index.get_method(cursor)
312
needed_versions.append((method, cursor))
313
if method == 'fulltext':
315
cursor = picked_knit.get_parents(cursor)[0]
320
for comp_id in basis_versions:
321
data_pos, data_size = basis._index.get_data_position(comp_id)
322
records.append((piece_id, data_pos, data_size))
323
components.update(basis._data.read_records(records))
326
for comp_id in [vid for method, vid in needed_versions
327
if vid not in basis_versions]:
328
data_pos, data_size = self._index.get_position(comp_id)
329
records.append((comp_id, data_pos, data_size))
330
components.update(self._data.read_records(records))
332
# get_data_records returns a mapping with the version id as
333
# index and the value as data. The order the components need
334
# to be applied is held by needed_versions (reversed).
336
for method, comp_id in reversed(needed_versions):
337
out.append((comp_id, method, components[comp_id]))
341
def _get_content(self, version_id):
342
"""Returns a content object that makes up the specified
344
if not self.has_version(version_id):
345
raise RevisionNotPresent(version_id, self.filename)
347
if self.basis_knit and version_id in self.basis_knit:
348
return self.basis_knit._get_content(version_id)
351
components = self._get_components(version_id)
352
for component_id, method, (data, digest) in components:
353
version_idx = self._index.lookup(component_id)
354
if method == 'fulltext':
355
assert content is None
356
content = self.factory.parse_fulltext(data, version_idx)
357
elif method == 'line-delta':
358
delta = self.factory.parse_line_delta(data, version_idx)
359
content.apply_delta(delta)
361
if 'no-eol' in self._index.get_options(version_id):
362
line = content._lines[-1][1].rstrip('\n')
363
content._lines[-1] = (content._lines[-1][0], line)
365
if sha_strings(content.text()) != digest:
366
raise KnitCorrupt(self.filename, 'sha-1 does not match')
370
def _check_versions_present(self, version_ids):
371
"""Check that all specified versions are present."""
372
version_ids = set(version_ids)
373
for r in list(version_ids):
374
if self._index.has_version(r):
375
version_ids.remove(r)
377
raise RevisionNotPresent(list(version_ids)[0], self.filename)
379
def add_lines(self, version_id, parents, lines):
380
"""See VersionedFile.add_lines."""
381
assert self.writable, "knit is not opened for write"
382
### FIXME escape. RBC 20060228
383
if contains_whitespace(version_id):
384
raise InvalidRevisionId(version_id)
385
if self.has_version(version_id):
386
raise RevisionAlreadyPresent(version_id, self.filename)
388
if True or __debug__:
390
assert '\n' not in l[:-1]
392
self._check_versions_present(parents)
393
return self._add(version_id, lines[:], parents, self.delta)
395
def _add(self, version_id, lines, parents, delta):
396
"""Add a set of lines on top of version specified by parents.
398
If delta is true, compress the text as a line-delta against
401
if delta and not parents:
404
digest = sha_strings(lines)
407
if lines[-1][-1] != '\n':
408
options.append('no-eol')
409
lines[-1] = lines[-1] + '\n'
411
lines = self.factory.make(lines, len(self._index))
412
if self.factory.annotated and len(parents) > 0:
413
# Merge annotations from parent texts if so is needed.
414
self._merge_annotations(lines, parents)
416
if parents and delta:
417
# To speed the extract of texts the delta chain is limited
418
# to a fixed number of deltas. This should minimize both
419
# I/O and the time spend applying deltas.
421
delta_parents = parents
423
parent = delta_parents[0]
424
method = self._index.get_method(parent)
425
if method == 'fulltext':
427
delta_parents = self._index.get_parents(parent)
429
if method == 'line-delta':
433
options.append('line-delta')
434
content = self._get_content(parents[0])
435
delta_hunks = content.line_delta(lines)
436
store_lines = self.factory.lower_line_delta(delta_hunks)
438
options.append('fulltext')
439
store_lines = self.factory.lower_fulltext(lines)
441
where, size = self._data.add_record(version_id, digest, store_lines)
442
self._index.add_version(version_id, options, where, size, parents)
444
def clone_text(self, new_version_id, old_version_id, parents):
445
"""See VersionedFile.clone_text()."""
446
# FIXME RBC 20060228 make fast by only inserting an index with null delta.
447
self.add_lines(new_version_id, parents, self.get_lines(old_version_id))
449
def get_lines(self, version_id):
450
"""See VersionedFile.get_lines()."""
451
return self._get_content(version_id).text()
453
def annotate_iter(self, version_id):
454
"""See VersionedFile.annotate_iter."""
455
content = self._get_content(version_id)
456
for origin, text in content.annotate_iter():
457
yield self._index.idx_to_name(origin), text
459
def get_parents(self, version_id):
460
"""See VersionedFile.get_parents."""
461
self._check_versions_present([version_id])
462
return list(self._index.get_parents(version_id))
464
def get_ancestry(self, versions):
465
"""See VersionedFile.get_ancestry."""
466
if isinstance(versions, basestring):
467
versions = [versions]
470
self._check_versions_present(versions)
471
return self._index.get_ancestry(versions)
473
def _reannotate_line_delta(self, other, lines, new_version_id,
475
"""Re-annotate line-delta and return new delta."""
477
for start, end, count, contents \
478
in self.factory.parse_line_delta_iter(lines):
480
for origin, line in contents:
481
old_version_id = other._index.idx_to_name(origin)
482
if old_version_id == new_version_id:
483
idx = new_version_idx
485
idx = self._index.lookup(old_version_id)
486
new_lines.append((idx, line))
487
new_delta.append((start, end, count, new_lines))
489
return self.factory.lower_line_delta(new_delta)
491
def _reannotate_fulltext(self, other, lines, new_version_id,
493
"""Re-annotate fulltext and return new version."""
494
content = self.factory.parse_fulltext(lines, new_version_idx)
496
for origin, line in content.annotate_iter():
497
old_version_id = other._index.idx_to_name(origin)
498
if old_version_id == new_version_id:
499
idx = new_version_idx
501
idx = self._index.lookup(old_version_id)
502
new_lines.append((idx, line))
504
return self.factory.lower_fulltext(KnitContent(new_lines))
506
def join(self, other, pb=None, msg=None, version_ids=None):
507
"""See VersionedFile.join."""
508
assert isinstance(other, KnitVersionedFile)
510
if version_ids is None:
511
version_ids = other.versions()
516
from bzrlib.progress import DummyProgress
519
version_ids = list(version_ids)
520
if None in version_ids:
521
version_ids.remove(None)
523
other_ancestry = set(other.get_ancestry(version_ids))
524
this_versions = set(self._index.get_versions())
525
needed_versions = other_ancestry - this_versions
526
cross_check_versions = other_ancestry.intersection(this_versions)
527
mismatched_versions = set()
528
for version in cross_check_versions:
529
# scan to include needed parents.
530
n1 = set(self.get_parents(version))
531
n2 = set(other.get_parents(version))
533
# FIXME TEST this check for cycles being introduced works
534
# the logic is we have a cycle if in our graph we are an
535
# ancestor of any of the n2 revisions.
541
parent_ancestors = other.get_ancestry(parent)
542
if version in parent_ancestors:
543
raise errors.GraphCycleError([parent, version])
544
# ensure this parent will be available later.
545
new_parents = n2.difference(n1)
546
needed_versions.update(new_parents.difference(this_versions))
547
mismatched_versions.add(version)
549
if not needed_versions and not cross_check_versions:
551
full_list = topo_sort(other._index.get_graph())
553
version_list = [i for i in full_list if (not self.has_version(i)
554
and i in needed_versions)]
557
for version_id in version_list:
558
data_pos, data_size = other._index.get_position(version_id)
559
records.append((version_id, data_pos, data_size))
562
for version_id, lines, digest \
563
in other._data.read_records_iter(records):
564
options = other._index.get_options(version_id)
565
parents = other._index.get_parents(version_id)
567
for parent in parents:
568
assert self.has_version(parent)
570
if self.factory.annotated:
571
# FIXME jrydberg: it should be possible to skip
572
# re-annotating components if we know that we are
573
# going to pull all revisions in the same order.
574
new_version_id = version_id
575
new_version_idx = self._index.num_versions()
576
if 'fulltext' in options:
577
lines = self._reannotate_fulltext(other, lines,
578
new_version_id, new_version_idx)
579
elif 'line-delta' in options:
580
lines = self._reannotate_line_delta(other, lines,
581
new_version_id, new_version_idx)
584
pb.update(self.filename, count, len(version_list))
586
pos, size = self._data.add_record(version_id, digest, lines)
587
self._index.add_version(version_id, options, pos, size, parents)
589
for version in mismatched_versions:
590
n1 = set(self.get_parents(version))
591
n2 = set(other.get_parents(version))
592
# write a combined record to our history.
593
new_parents = self.get_parents(version) + list(n2.difference(n1))
594
current_values = self._index._cache[version]
595
self._index.add_version(version,
603
def walk(self, version_ids):
604
"""See VersionedFile.walk."""
605
# We take the short path here, and extract all relevant texts
606
# and put them in a weave and let that do all the work. Far
607
# from optimal, but is much simpler.
608
# FIXME RB 20060228 this really is inefficient!
609
from bzrlib.weave import Weave
611
w = Weave(self.filename)
612
ancestry = self.get_ancestry(version_ids)
613
sorted_graph = topo_sort(self._index.get_graph())
614
version_list = [vid for vid in sorted_graph if vid in ancestry]
616
for version_id in version_list:
617
lines = self.get_lines(version_id)
618
w.add_lines(version_id, self.get_parents(version_id), lines)
620
for lineno, insert_id, dset, line in w.walk(version_ids):
621
yield lineno, insert_id, dset, line
624
class _KnitComponentFile(object):
625
"""One of the files used to implement a knit database"""
627
def __init__(self, transport, filename, mode):
628
self._transport = transport
629
self._filename = filename
632
def write_header(self):
633
old_len = self._transport.append(self._filename, self.HEADER)
635
raise KnitCorrupt(self._filename, 'misaligned after writing header')
637
def check_header(self, fp):
638
line = fp.read(len(self.HEADER))
639
if line != self.HEADER:
640
raise KnitHeaderError(badline=line)
643
"""Commit is a nop."""
646
return '%s(%s)' % (self.__class__.__name__, self._filename)
649
class _KnitIndex(_KnitComponentFile):
650
"""Manages knit index file.
652
The index is already kept in memory and read on startup, to enable
653
fast lookups of revision information. The cursor of the index
654
file is always pointing to the end, making it easy to append
657
_cache is a cache for fast mapping from version id to a Index
660
_history is a cache for fast mapping from indexes to version ids.
662
The index data format is dictionary compressed when it comes to
663
parent references; a index entry may only have parents that with a
664
lover index number. As a result, the index is topological sorted.
666
Duplicate entries may be written to the index for a single version id
667
if this is done then the latter one completely replaces the former:
668
this allows updates to correct version and parent information.
669
Note that the two entries may share the delta, and that successive
670
annotations and references MUST point to the first entry.
673
HEADER = "# bzr knit index 7\n"
675
def _cache_version(self, version_id, options, pos, size, parents):
676
val = (version_id, options, pos, size, parents)
677
self._cache[version_id] = val
678
if not version_id in self._history:
679
self._history.append(version_id)
681
def _iter_index(self, fp):
683
for l in lines.splitlines(False):
686
def __init__(self, transport, filename, mode):
687
_KnitComponentFile.__init__(self, transport, filename, mode)
689
# position in _history is the 'official' index for a revision
690
# but the values may have come from a newer entry.
691
# so - wc -l of a knit index is != the number of uniqe names
695
fp = self._transport.get(self._filename)
696
self.check_header(fp)
697
for rec in self._iter_index(fp):
698
self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),
699
[self._history[int(i)] for i in rec[4:]])
700
except NoSuchFile, e:
707
for version_id, index in self._cache.iteritems():
708
graph.append((version_id, index[4]))
711
def get_ancestry(self, versions):
712
"""See VersionedFile.get_ancestry."""
714
for version_id in versions:
715
version_idxs.append(self._history.index(version_id))
717
for v in xrange(max(version_idxs), 0, -1):
718
if self._history[v] in i:
719
# include all its parents
720
i.update(self._cache[self._history[v]][4])
723
def num_versions(self):
724
return len(self._history)
726
__len__ = num_versions
728
def get_versions(self):
731
def idx_to_name(self, idx):
732
return self._history[idx]
734
def lookup(self, version_id):
735
assert version_id in self._cache
736
return self._history.index(version_id)
738
def add_version(self, version_id, options, pos, size, parents):
739
"""Add a version record to the index."""
740
self._cache_version(version_id, options, pos, size, parents)
742
content = "%s %s %s %s %s\n" % (version_id,
746
' '.join([str(self.lookup(vid)) for
748
self._transport.append(self._filename, content)
750
def has_version(self, version_id):
751
"""True if the version is in the index."""
752
return self._cache.has_key(version_id)
754
def get_position(self, version_id):
755
"""Return data position and size of specified version."""
756
return (self._cache[version_id][2], \
757
self._cache[version_id][3])
759
def get_method(self, version_id):
760
"""Return compression method of specified version."""
761
options = self._cache[version_id][1]
762
if 'fulltext' in options:
765
assert 'line-delta' in options
768
def get_options(self, version_id):
769
return self._cache[version_id][1]
771
def get_parents(self, version_id):
772
"""Return parents of specified version."""
773
return self._cache[version_id][4]
775
def check_versions_present(self, version_ids):
776
"""Check that all specified versions are present."""
777
version_ids = set(version_ids)
778
for version_id in list(version_ids):
779
if version_id in self._cache:
780
version_ids.remove(version_id)
782
raise RevisionNotPresent(list(version_ids)[0], self.filename)
785
class _KnitData(_KnitComponentFile):
786
"""Contents of the knit data file"""
788
HEADER = "# bzr knit data 7\n"
790
def __init__(self, transport, filename, mode):
791
_KnitComponentFile.__init__(self, transport, filename, mode)
793
self._checked = False
795
def _open_file(self):
796
if self._file is None:
798
self._file = self._transport.get(self._filename)
803
def add_record(self, version_id, digest, lines):
804
"""Write new text record to disk. Returns the position in the
805
file where it was written."""
807
data_file = GzipFile(None, mode='wb', fileobj=sio)
808
print >>data_file, "version %s %d %s" % (version_id, len(lines), digest)
809
data_file.writelines(lines)
810
print >>data_file, "end %s\n" % version_id
813
content = sio.getvalue()
814
start_pos = self._transport.append(self._filename, content)
815
return start_pos, len(content)
817
def _parse_record(self, version_id, data):
818
df = GzipFile(mode='rb', fileobj=StringIO(data))
819
rec = df.readline().split()
821
raise KnitCorrupt(self._filename, 'unexpected number of records')
822
if rec[1] != version_id:
823
raise KnitCorrupt(self.file.name,
824
'unexpected version, wanted %r' % version_id)
826
record_contents = self._read_record_contents(df, lines)
828
if l != 'end %s\n' % version_id:
829
raise KnitCorrupt(self._filename, 'unexpected version end line %r, wanted %r'
831
return record_contents, rec[3]
833
def _read_record_contents(self, df, record_lines):
834
"""Read and return n lines from datafile."""
836
for i in range(record_lines):
837
r.append(df.readline())
840
def read_records_iter(self, records):
841
"""Read text records from data file and yield result.
843
Each passed record is a tuple of (version_id, pos, len) and
844
will be read in the given order. Yields (version_id,
848
class ContinuousRange:
849
def __init__(self, rec_id, pos, size):
851
self.end_pos = pos + size
852
self.versions = [(rec_id, pos, size)]
854
def add(self, rec_id, pos, size):
855
if self.end_pos != pos:
857
self.end_pos = pos + size
858
self.versions.append((rec_id, pos, size))
862
for rec_id, pos, size in self.versions:
863
yield rec_id, fp.read(size)
865
fp = self._open_file()
867
# Loop through all records and try to collect as large
868
# continuous region as possible to read.
870
record_id, pos, size = records.pop(0)
871
continuous_range = ContinuousRange(record_id, pos, size)
873
record_id, pos, size = records[0]
874
if continuous_range.add(record_id, pos, size):
878
fp.seek(continuous_range.start_pos, 0)
879
for record_id, data in continuous_range.split(fp):
880
content, digest = self._parse_record(record_id, data)
881
yield record_id, content, digest
885
def read_records(self, records):
886
"""Read records into a dictionary."""
888
for record_id, content, digest in self.read_records_iter(records):
889
components[record_id] = (content, digest)