1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Knit versionedfile implementation.
19
A knit is a versioned file implementation that supports efficient append only
23
lifeless: the data file is made up of "delta records". each delta record has a delta header
24
that contains; (1) a version id, (2) the size of the delta (in lines), and (3) the digest of
25
the -expanded data- (ie, the delta applied to the parent). the delta also ends with a
26
end-marker; simply "end VERSION"
28
delta can be line or full contents.a
29
... the 8's there are the index number of the annotation.
30
version robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad 7 c7d23b2a5bd6ca00e8e266cec0ec228158ee9f9e
34
8 e.set('executable', 'yes')
36
8 if elt.get('executable') == 'yes':
37
8 ie.executable = True
38
end robertc@robertcollins.net-20051003014215-ee2990904cc4c7ad
42
09:33 < jrydberg> lifeless: each index is made up of a tuple of; version id, options, position, size, parents
43
09:33 < jrydberg> lifeless: the parents are currently dictionary compressed
44
09:33 < jrydberg> lifeless: (meaning it currently does not support ghosts)
45
09:33 < lifeless> right
46
09:33 < jrydberg> lifeless: the position and size is the range in the data file
49
so the index sequence is the dictionary compressed sequence number used
50
in the deltas to provide line annotation
55
# 10:16 < lifeless> make partial index writes safe
56
# 10:16 < lifeless> implement 'knit.check()' like weave.check()
57
# 10:17 < lifeless> record known ghosts so we can detect when they are filled in rather than the current 'reweave
59
# move sha1 out of the content so that join is faster at verifying parents
60
# record content length ?
63
from cStringIO import StringIO
64
from itertools import izip, chain
70
from zlib import Z_DEFAULT_COMPRESSION
73
from bzrlib.lazy_import import lazy_import
74
lazy_import(globals(), """
95
from bzrlib.errors import (
103
RevisionAlreadyPresent,
105
from bzrlib.graph import Graph
106
from bzrlib.osutils import (
113
from bzrlib.tsort import topo_sort
114
from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip
116
from bzrlib.versionedfile import (
117
AbsentContentFactory,
121
FulltextContentFactory,
128
# TODO: Split out code specific to this format into an associated object.
130
# TODO: Can we put in some kind of value to check that the index and data
131
# files belong together?
133
# TODO: accommodate binaries, perhaps by storing a byte count
135
# TODO: function to check whole file
137
# TODO: atomically append data, then measure backwards from the cursor
138
# position after writing to work out where it was located. we may need to
139
# bypass python file buffering.
141
DATA_SUFFIX = '.knit'
142
INDEX_SUFFIX = '.kndx'
145
class KnitAdapter(object):
146
"""Base class for knit record adaption."""
148
def __init__(self, basis_vf):
149
"""Create an adapter which accesses full texts from basis_vf.
151
:param basis_vf: A versioned file to access basis texts of deltas from.
152
May be None for adapters that do not need to access basis texts.
154
self._data = KnitVersionedFiles(None, None)
155
self._annotate_factory = KnitAnnotateFactory()
156
self._plain_factory = KnitPlainFactory()
157
self._basis_vf = basis_vf
160
class FTAnnotatedToUnannotated(KnitAdapter):
161
"""An adapter from FT annotated knits to unannotated ones."""
163
def get_bytes(self, factory, annotated_compressed_bytes):
165
self._data._parse_record_unchecked(annotated_compressed_bytes)
166
content = self._annotate_factory.parse_fulltext(contents, rec[1])
167
size, bytes = self._data._record_to_data((rec[1],), rec[3], content.text())
171
class DeltaAnnotatedToUnannotated(KnitAdapter):
172
"""An adapter for deltas from annotated to unannotated."""
174
def get_bytes(self, factory, annotated_compressed_bytes):
176
self._data._parse_record_unchecked(annotated_compressed_bytes)
177
delta = self._annotate_factory.parse_line_delta(contents, rec[1],
179
contents = self._plain_factory.lower_line_delta(delta)
180
size, bytes = self._data._record_to_data((rec[1],), rec[3], contents)
184
class FTAnnotatedToFullText(KnitAdapter):
185
"""An adapter from FT annotated knits to unannotated ones."""
187
def get_bytes(self, factory, annotated_compressed_bytes):
189
self._data._parse_record_unchecked(annotated_compressed_bytes)
190
content, delta = self._annotate_factory.parse_record(factory.key[-1],
191
contents, factory._build_details, None)
192
return ''.join(content.text())
195
class DeltaAnnotatedToFullText(KnitAdapter):
196
"""An adapter for deltas from annotated to unannotated."""
198
def get_bytes(self, factory, annotated_compressed_bytes):
200
self._data._parse_record_unchecked(annotated_compressed_bytes)
201
delta = self._annotate_factory.parse_line_delta(contents, rec[1],
203
compression_parent = factory.parents[0]
204
basis_entry = self._basis_vf.get_record_stream(
205
[compression_parent], 'unordered', True).next()
206
if basis_entry.storage_kind == 'absent':
207
raise errors.RevisionNotPresent(compression_parent, self._basis_vf)
208
basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))
209
# Manually apply the delta because we have one annotated content and
211
basis_content = PlainKnitContent(basis_lines, compression_parent)
212
basis_content.apply_delta(delta, rec[1])
213
basis_content._should_strip_eol = factory._build_details[1]
214
return ''.join(basis_content.text())
217
class FTPlainToFullText(KnitAdapter):
218
"""An adapter from FT plain knits to unannotated ones."""
220
def get_bytes(self, factory, compressed_bytes):
222
self._data._parse_record_unchecked(compressed_bytes)
223
content, delta = self._plain_factory.parse_record(factory.key[-1],
224
contents, factory._build_details, None)
225
return ''.join(content.text())
228
class DeltaPlainToFullText(KnitAdapter):
229
"""An adapter for deltas from annotated to unannotated."""
231
def get_bytes(self, factory, compressed_bytes):
233
self._data._parse_record_unchecked(compressed_bytes)
234
delta = self._plain_factory.parse_line_delta(contents, rec[1])
235
compression_parent = factory.parents[0]
236
# XXX: string splitting overhead.
237
basis_entry = self._basis_vf.get_record_stream(
238
[compression_parent], 'unordered', True).next()
239
if basis_entry.storage_kind == 'absent':
240
raise errors.RevisionNotPresent(compression_parent, self._basis_vf)
241
basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))
242
basis_content = PlainKnitContent(basis_lines, compression_parent)
243
# Manually apply the delta because we have one annotated content and
245
content, _ = self._plain_factory.parse_record(rec[1], contents,
246
factory._build_details, basis_content)
247
return ''.join(content.text())
250
class KnitContentFactory(ContentFactory):
251
"""Content factory for streaming from knits.
253
:seealso ContentFactory:
256
def __init__(self, key, parents, build_details, sha1, raw_record,
257
annotated, knit=None):
258
"""Create a KnitContentFactory for key.
261
:param parents: The parents.
262
:param build_details: The build details as returned from
264
:param sha1: The sha1 expected from the full text of this object.
265
:param raw_record: The bytes of the knit data from disk.
266
:param annotated: True if the raw data is annotated.
268
ContentFactory.__init__(self)
271
self.parents = parents
272
if build_details[0] == 'line-delta':
277
annotated_kind = 'annotated-'
280
self.storage_kind = 'knit-%s%s-gz' % (annotated_kind, kind)
281
self._raw_record = raw_record
282
self._build_details = build_details
285
def get_bytes_as(self, storage_kind):
286
if storage_kind == self.storage_kind:
287
return self._raw_record
288
if storage_kind == 'fulltext' and self._knit is not None:
289
return self._knit.get_text(self.key[0])
291
raise errors.UnavailableRepresentation(self.key, storage_kind,
295
class KnitContent(object):
296
"""Content of a knit version to which deltas can be applied.
298
This is always stored in memory as a list of lines with \n at the end,
299
plus a flag saying if the final ending is really there or not, because that
300
corresponds to the on-disk knit representation.
304
self._should_strip_eol = False
306
def apply_delta(self, delta, new_version_id):
307
"""Apply delta to this object to become new_version_id."""
308
raise NotImplementedError(self.apply_delta)
310
def line_delta_iter(self, new_lines):
311
"""Generate line-based delta from this content to new_lines."""
312
new_texts = new_lines.text()
313
old_texts = self.text()
314
s = patiencediff.PatienceSequenceMatcher(None, old_texts, new_texts)
315
for tag, i1, i2, j1, j2 in s.get_opcodes():
318
# ofrom, oto, length, data
319
yield i1, i2, j2 - j1, new_lines._lines[j1:j2]
321
def line_delta(self, new_lines):
322
return list(self.line_delta_iter(new_lines))
325
def get_line_delta_blocks(knit_delta, source, target):
326
"""Extract SequenceMatcher.get_matching_blocks() from a knit delta"""
327
target_len = len(target)
330
for s_begin, s_end, t_len, new_text in knit_delta:
331
true_n = s_begin - s_pos
334
# knit deltas do not provide reliable info about whether the
335
# last line of a file matches, due to eol handling.
336
if source[s_pos + n -1] != target[t_pos + n -1]:
339
yield s_pos, t_pos, n
340
t_pos += t_len + true_n
342
n = target_len - t_pos
344
if source[s_pos + n -1] != target[t_pos + n -1]:
347
yield s_pos, t_pos, n
348
yield s_pos + (target_len - t_pos), target_len, 0
351
class AnnotatedKnitContent(KnitContent):
352
"""Annotated content."""
354
def __init__(self, lines):
355
KnitContent.__init__(self)
359
"""Return a list of (origin, text) for each content line."""
360
lines = self._lines[:]
361
if self._should_strip_eol:
362
origin, last_line = lines[-1]
363
lines[-1] = (origin, last_line.rstrip('\n'))
366
def apply_delta(self, delta, new_version_id):
367
"""Apply delta to this object to become new_version_id."""
370
for start, end, count, delta_lines in delta:
371
lines[offset+start:offset+end] = delta_lines
372
offset = offset + (start - end) + count
376
lines = [text for origin, text in self._lines]
377
except ValueError, e:
378
# most commonly (only?) caused by the internal form of the knit
379
# missing annotation information because of a bug - see thread
381
raise KnitCorrupt(self,
382
"line in annotated knit missing annotation information: %s"
384
if self._should_strip_eol:
385
lines[-1] = lines[-1].rstrip('\n')
389
return AnnotatedKnitContent(self._lines[:])
392
class PlainKnitContent(KnitContent):
393
"""Unannotated content.
395
When annotate[_iter] is called on this content, the same version is reported
396
for all lines. Generally, annotate[_iter] is not useful on PlainKnitContent
400
def __init__(self, lines, version_id):
401
KnitContent.__init__(self)
403
self._version_id = version_id
406
"""Return a list of (origin, text) for each content line."""
407
return [(self._version_id, line) for line in self._lines]
409
def apply_delta(self, delta, new_version_id):
410
"""Apply delta to this object to become new_version_id."""
413
for start, end, count, delta_lines in delta:
414
lines[offset+start:offset+end] = delta_lines
415
offset = offset + (start - end) + count
416
self._version_id = new_version_id
419
return PlainKnitContent(self._lines[:], self._version_id)
423
if self._should_strip_eol:
425
lines[-1] = lines[-1].rstrip('\n')
429
class _KnitFactory(object):
430
"""Base class for common Factory functions."""
432
def parse_record(self, version_id, record, record_details,
433
base_content, copy_base_content=True):
434
"""Parse a record into a full content object.
436
:param version_id: The official version id for this content
437
:param record: The data returned by read_records_iter()
438
:param record_details: Details about the record returned by
440
:param base_content: If get_build_details returns a compression_parent,
441
you must return a base_content here, else use None
442
:param copy_base_content: When building from the base_content, decide
443
you can either copy it and return a new object, or modify it in
445
:return: (content, delta) A Content object and possibly a line-delta,
448
method, noeol = record_details
449
if method == 'line-delta':
450
if copy_base_content:
451
content = base_content.copy()
453
content = base_content
454
delta = self.parse_line_delta(record, version_id)
455
content.apply_delta(delta, version_id)
457
content = self.parse_fulltext(record, version_id)
459
content._should_strip_eol = noeol
460
return (content, delta)
463
class KnitAnnotateFactory(_KnitFactory):
464
"""Factory for creating annotated Content objects."""
468
def make(self, lines, version_id):
469
num_lines = len(lines)
470
return AnnotatedKnitContent(zip([version_id] * num_lines, lines))
472
def parse_fulltext(self, content, version_id):
473
"""Convert fulltext to internal representation
475
fulltext content is of the format
476
revid(utf8) plaintext\n
477
internal representation is of the format:
480
# TODO: jam 20070209 The tests expect this to be returned as tuples,
481
# but the code itself doesn't really depend on that.
482
# Figure out a way to not require the overhead of turning the
483
# list back into tuples.
484
lines = [tuple(line.split(' ', 1)) for line in content]
485
return AnnotatedKnitContent(lines)
487
def parse_line_delta_iter(self, lines):
488
return iter(self.parse_line_delta(lines))
490
def parse_line_delta(self, lines, version_id, plain=False):
491
"""Convert a line based delta into internal representation.
493
line delta is in the form of:
494
intstart intend intcount
496
revid(utf8) newline\n
497
internal representation is
498
(start, end, count, [1..count tuples (revid, newline)])
500
:param plain: If True, the lines are returned as a plain
501
list without annotations, not as a list of (origin, content) tuples, i.e.
502
(start, end, count, [1..count newline])
509
def cache_and_return(line):
510
origin, text = line.split(' ', 1)
511
return cache.setdefault(origin, origin), text
513
# walk through the lines parsing.
514
# Note that the plain test is explicitly pulled out of the
515
# loop to minimise any performance impact
518
start, end, count = [int(n) for n in header.split(',')]
519
contents = [next().split(' ', 1)[1] for i in xrange(count)]
520
result.append((start, end, count, contents))
523
start, end, count = [int(n) for n in header.split(',')]
524
contents = [tuple(next().split(' ', 1)) for i in xrange(count)]
525
result.append((start, end, count, contents))
528
def get_fulltext_content(self, lines):
529
"""Extract just the content lines from a fulltext."""
530
return (line.split(' ', 1)[1] for line in lines)
532
def get_linedelta_content(self, lines):
533
"""Extract just the content from a line delta.
535
This doesn't return all of the extra information stored in a delta.
536
Only the actual content lines.
541
header = header.split(',')
542
count = int(header[2])
543
for i in xrange(count):
544
origin, text = next().split(' ', 1)
547
def lower_fulltext(self, content):
548
"""convert a fulltext content record into a serializable form.
550
see parse_fulltext which this inverts.
552
# TODO: jam 20070209 We only do the caching thing to make sure that
553
# the origin is a valid utf-8 line, eventually we could remove it
554
return ['%s %s' % (o, t) for o, t in content._lines]
556
def lower_line_delta(self, delta):
557
"""convert a delta into a serializable form.
559
See parse_line_delta which this inverts.
561
# TODO: jam 20070209 We only do the caching thing to make sure that
562
# the origin is a valid utf-8 line, eventually we could remove it
564
for start, end, c, lines in delta:
565
out.append('%d,%d,%d\n' % (start, end, c))
566
out.extend(origin + ' ' + text
567
for origin, text in lines)
570
def annotate(self, knit, key):
571
content = knit._get_content(key)
572
# adjust for the fact that serialised annotations are only key suffixes
574
if type(key) == tuple:
576
origins = content.annotate()
578
for origin, line in origins:
579
result.append((prefix + (origin,), line))
582
# XXX: This smells a bit. Why would key ever be a non-tuple here?
583
# Aren't keys defined to be tuples? -- spiv 20080618
584
return content.annotate()
587
class KnitPlainFactory(_KnitFactory):
588
"""Factory for creating plain Content objects."""
592
def make(self, lines, version_id):
593
return PlainKnitContent(lines, version_id)
595
def parse_fulltext(self, content, version_id):
596
"""This parses an unannotated fulltext.
598
Note that this is not a noop - the internal representation
599
has (versionid, line) - its just a constant versionid.
601
return self.make(content, version_id)
603
def parse_line_delta_iter(self, lines, version_id):
605
num_lines = len(lines)
606
while cur < num_lines:
609
start, end, c = [int(n) for n in header.split(',')]
610
yield start, end, c, lines[cur:cur+c]
613
def parse_line_delta(self, lines, version_id):
614
return list(self.parse_line_delta_iter(lines, version_id))
616
def get_fulltext_content(self, lines):
617
"""Extract just the content lines from a fulltext."""
620
def get_linedelta_content(self, lines):
621
"""Extract just the content from a line delta.
623
This doesn't return all of the extra information stored in a delta.
624
Only the actual content lines.
629
header = header.split(',')
630
count = int(header[2])
631
for i in xrange(count):
634
def lower_fulltext(self, content):
635
return content.text()
637
def lower_line_delta(self, delta):
639
for start, end, c, lines in delta:
640
out.append('%d,%d,%d\n' % (start, end, c))
644
def annotate(self, knit, key):
645
annotator = _KnitAnnotator(knit)
646
return annotator.annotate(key)
650
def make_file_factory(annotated, mapper):
651
"""Create a factory for creating a file based KnitVersionedFiles.
653
This is only functional enough to run interface tests, it doesn't try to
654
provide a full pack environment.
656
:param annotated: knit annotations are wanted.
657
:param mapper: The mapper from keys to paths.
659
def factory(transport):
660
index = _KndxIndex(transport, mapper, lambda:None, lambda:True, lambda:True)
661
access = _KnitKeyAccess(transport, mapper)
662
return KnitVersionedFiles(index, access, annotated=annotated)
666
def make_pack_factory(graph, delta, keylength):
667
"""Create a factory for creating a pack based VersionedFiles.
669
This is only functional enough to run interface tests, it doesn't try to
670
provide a full pack environment.
672
:param graph: Store a graph.
673
:param delta: Delta compress contents.
674
:param keylength: How long should keys be.
676
def factory(transport):
677
parents = graph or delta
683
max_delta_chain = 200
686
graph_index = _mod_index.InMemoryGraphIndex(reference_lists=ref_length,
687
key_elements=keylength)
688
stream = transport.open_write_stream('newpack')
689
writer = pack.ContainerWriter(stream.write)
691
index = _KnitGraphIndex(graph_index, lambda:True, parents=parents,
692
deltas=delta, add_callback=graph_index.add_nodes)
693
access = _DirectPackAccess({})
694
access.set_writer(writer, graph_index, (transport, 'newpack'))
695
result = KnitVersionedFiles(index, access,
696
max_delta_chain=max_delta_chain)
697
result.stream = stream
698
result.writer = writer
703
def cleanup_pack_knit(versioned_files):
704
versioned_files.stream.close()
705
versioned_files.writer.end()
708
class KnitVersionedFiles(VersionedFiles):
709
"""Storage for many versioned files using knit compression.
711
Backend storage is managed by indices and data objects.
714
def __init__(self, index, data_access, max_delta_chain=200,
716
"""Create a KnitVersionedFiles with index and data_access.
718
:param index: The index for the knit data.
719
:param data_access: The access object to store and retrieve knit
721
:param max_delta_chain: The maximum number of deltas to permit during
722
insertion. Set to 0 to prohibit the use of deltas.
723
:param annotated: Set to True to cause annotations to be calculated and
724
stored during insertion.
727
self._access = data_access
728
self._max_delta_chain = max_delta_chain
730
self._factory = KnitAnnotateFactory()
732
self._factory = KnitPlainFactory()
734
def add_lines(self, key, parents, lines, parent_texts=None,
735
left_matching_blocks=None, nostore_sha=None, random_id=False,
737
"""See VersionedFiles.add_lines()."""
738
self._index._check_write_ok()
739
self._check_add(key, lines, random_id, check_content)
741
# The caller might pass None if there is no graph data, but kndx
742
# indexes can't directly store that, so we give them
743
# an empty tuple instead.
745
return self._add(key, lines, parents,
746
parent_texts, left_matching_blocks, nostore_sha, random_id)
748
def _add(self, key, lines, parents, parent_texts,
749
left_matching_blocks, nostore_sha, random_id):
750
"""Add a set of lines on top of version specified by parents.
752
Any versions not present will be converted into ghosts.
754
# first thing, if the content is something we don't need to store, find
756
line_bytes = ''.join(lines)
757
digest = sha_string(line_bytes)
758
if nostore_sha == digest:
759
raise errors.ExistingContent
762
if parent_texts is None:
764
# Do a single query to ascertain parent presence.
765
present_parent_map = self.get_parent_map(parents)
766
for parent in parents:
767
if parent in present_parent_map:
768
present_parents.append(parent)
770
# Currently we can only compress against the left most present parent.
771
if (len(present_parents) == 0 or
772
present_parents[0] != parents[0]):
775
# To speed the extract of texts the delta chain is limited
776
# to a fixed number of deltas. This should minimize both
777
# I/O and the time spend applying deltas.
778
delta = self._check_should_delta(present_parents[0])
780
text_length = len(line_bytes)
783
if lines[-1][-1] != '\n':
784
# copy the contents of lines.
786
options.append('no-eol')
787
lines[-1] = lines[-1] + '\n'
791
if type(element) != str:
792
raise TypeError("key contains non-strings: %r" % (key,))
793
# Knit hunks are still last-element only
795
content = self._factory.make(lines, version_id)
796
if 'no-eol' in options:
797
# Hint to the content object that its text() call should strip the
799
content._should_strip_eol = True
800
if delta or (self._factory.annotated and len(present_parents) > 0):
801
# Merge annotations from parent texts if needed.
802
delta_hunks = self._merge_annotations(content, present_parents,
803
parent_texts, delta, self._factory.annotated,
804
left_matching_blocks)
807
options.append('line-delta')
808
store_lines = self._factory.lower_line_delta(delta_hunks)
809
size, bytes = self._record_to_data(key, digest,
812
options.append('fulltext')
813
# isinstance is slower and we have no hierarchy.
814
if self._factory.__class__ == KnitPlainFactory:
815
# Use the already joined bytes saving iteration time in
817
size, bytes = self._record_to_data(key, digest,
820
# get mixed annotation + content and feed it into the
822
store_lines = self._factory.lower_fulltext(content)
823
size, bytes = self._record_to_data(key, digest,
826
access_memo = self._access.add_raw_records([(key, size)], bytes)[0]
827
self._index.add_records(
828
((key, options, access_memo, parents),),
830
return digest, text_length, content
832
def annotate(self, key):
833
"""See VersionedFiles.annotate."""
834
return self._factory.annotate(self, key)
836
def check(self, progress_bar=None):
837
"""See VersionedFiles.check()."""
838
# This doesn't actually test extraction of everything, but that will
839
# impact 'bzr check' substantially, and needs to be integrated with
840
# care. However, it does check for the obvious problem of a delta with
843
parent_map = self.get_parent_map(keys)
845
if self._index.get_method(key) != 'fulltext':
846
compression_parent = parent_map[key][0]
847
if compression_parent not in parent_map:
848
raise errors.KnitCorrupt(self,
849
"Missing basis parent %s for %s" % (
850
compression_parent, key))
852
def _check_add(self, key, lines, random_id, check_content):
853
"""check that version_id and lines are safe to add."""
855
if contains_whitespace(version_id):
856
raise InvalidRevisionId(version_id, self.filename)
857
self.check_not_reserved_id(version_id)
858
# TODO: If random_id==False and the key is already present, we should
859
# probably check that the existing content is identical to what is
860
# being inserted, and otherwise raise an exception. This would make
861
# the bundle code simpler.
863
self._check_lines_not_unicode(lines)
864
self._check_lines_are_lines(lines)
866
def _check_header(self, key, line):
867
rec = self._split_header(line)
868
self._check_header_version(rec, key[-1])
871
def _check_header_version(self, rec, version_id):
872
"""Checks the header version on original format knit records.
874
These have the last component of the key embedded in the record.
876
if rec[1] != version_id:
877
raise KnitCorrupt(self,
878
'unexpected version, wanted %r, got %r' % (version_id, rec[1]))
880
def _check_should_delta(self, parent):
881
"""Iterate back through the parent listing, looking for a fulltext.
883
This is used when we want to decide whether to add a delta or a new
884
fulltext. It searches for _max_delta_chain parents. When it finds a
885
fulltext parent, it sees if the total size of the deltas leading up to
886
it is large enough to indicate that we want a new full text anyway.
888
Return True if we should create a new delta, False if we should use a
893
for count in xrange(self._max_delta_chain):
894
# XXX: Collapse these two queries:
895
method = self._index.get_method(parent)
896
index, pos, size = self._index.get_position(parent)
897
if method == 'fulltext':
901
# We don't explicitly check for presence because this is in an
902
# inner loop, and if it's missing it'll fail anyhow.
903
# TODO: This should be asking for compression parent, not graph
905
parent = self._index.get_parent_map([parent])[parent][0]
907
# We couldn't find a fulltext, so we must create a new one
909
# Simple heuristic - if the total I/O wold be greater as a delta than
910
# the originally installed fulltext, we create a new fulltext.
911
return fulltext_size > delta_size
913
def _build_details_to_components(self, build_details):
914
"""Convert a build_details tuple to a position tuple."""
915
# record_details, access_memo, compression_parent
916
return build_details[3], build_details[0], build_details[1]
918
def _get_components_positions(self, keys, allow_missing=False):
919
"""Produce a map of position data for the components of keys.
921
This data is intended to be used for retrieving the knit records.
923
A dict of key to (record_details, index_memo, next, parents) is
925
method is the way referenced data should be applied.
926
index_memo is the handle to pass to the data access to actually get the
928
next is the build-parent of the version, or None for fulltexts.
929
parents is the version_ids of the parents of this version
931
:param allow_missing: If True do not raise an error on a missing component,
935
pending_components = keys
936
while pending_components:
937
build_details = self._index.get_build_details(pending_components)
938
current_components = set(pending_components)
939
pending_components = set()
940
for key, details in build_details.iteritems():
941
(index_memo, compression_parent, parents,
942
record_details) = details
943
method = record_details[0]
944
if compression_parent is not None:
945
pending_components.add(compression_parent)
946
component_data[key] = self._build_details_to_components(details)
947
missing = current_components.difference(build_details)
948
if missing and not allow_missing:
949
raise errors.RevisionNotPresent(missing.pop(), self)
950
return component_data
952
def _get_content(self, key, parent_texts={}):
953
"""Returns a content object that makes up the specified
955
cached_version = parent_texts.get(key, None)
956
if cached_version is not None:
957
# Ensure the cache dict is valid.
958
if not self.get_parent_map([key]):
959
raise RevisionNotPresent(key, self)
960
return cached_version
961
text_map, contents_map = self._get_content_maps([key])
962
return contents_map[key]
964
def _get_content_maps(self, keys):
965
"""Produce maps of text and KnitContents
967
:return: (text_map, content_map) where text_map contains the texts for
968
the requested versions and content_map contains the KnitContents.
970
# FUTURE: This function could be improved for the 'extract many' case
971
# by tracking each component and only doing the copy when the number of
972
# children than need to apply delta's to it is > 1 or it is part of the
975
multiple_versions = len(keys) != 1
976
record_map = self._get_record_map(keys)
984
while cursor is not None:
985
record, record_details, digest, next = record_map[cursor]
986
components.append((cursor, record, record_details, digest))
987
if cursor in content_map:
992
for (component_id, record, record_details,
993
digest) in reversed(components):
994
if component_id in content_map:
995
content = content_map[component_id]
997
content, delta = self._factory.parse_record(key[-1],
998
record, record_details, content,
999
copy_base_content=multiple_versions)
1000
if multiple_versions:
1001
content_map[component_id] = content
1003
final_content[key] = content
1005
# digest here is the digest from the last applied component.
1006
text = content.text()
1007
actual_sha = sha_strings(text)
1008
if actual_sha != digest:
1009
raise KnitCorrupt(self,
1011
'\n of reconstructed text does not match'
1013
'\n for version %s' %
1014
(actual_sha, digest, key))
1015
text_map[key] = text
1016
return text_map, final_content
1018
def get_parent_map(self, keys):
1019
"""Get a map of the parents of keys.
1021
:param keys: The keys to look up parents for.
1022
:return: A mapping from keys to parents. Absent keys are absent from
1025
return self._index.get_parent_map(keys)
1027
def _get_record_map(self, keys):
1028
"""Produce a dictionary of knit records.
1030
:return: {key:(record, record_details, digest, next)}
1032
data returned from read_records
1034
opaque information to pass to parse_record
1036
SHA1 digest of the full text after all steps are done
1038
build-parent of the version, i.e. the leftmost ancestor.
1039
Will be None if the record is not a delta.
1041
position_map = self._get_components_positions(keys)
1042
# key = component_id, r = record_details, i_m = index_memo, n = next
1043
records = [(key, i_m) for key, (r, i_m, n)
1044
in position_map.iteritems()]
1046
for key, record, digest in \
1047
self._read_records_iter(records):
1048
(record_details, index_memo, next) = position_map[key]
1049
record_map[key] = record, record_details, digest, next
1052
def get_record_stream(self, keys, ordering, include_delta_closure):
1053
"""Get a stream of records for keys.
1055
:param keys: The keys to include.
1056
:param ordering: Either 'unordered' or 'topological'. A topologically
1057
sorted stream has compression parents strictly before their
1059
:param include_delta_closure: If True then the closure across any
1060
compression parents will be included (in the opaque data).
1061
:return: An iterator of ContentFactory objects, each of which is only
1062
valid until the iterator is advanced.
1064
# keys might be a generator
1066
if not self._index.has_graph:
1067
# Cannot topological order when no graph has been stored.
1068
ordering = 'unordered'
1069
if include_delta_closure:
1070
positions = self._get_components_positions(keys, allow_missing=True)
1072
build_details = self._index.get_build_details(keys)
1074
# (record_details, access_memo, compression_parent_key)
1075
positions = dict((key, self._build_details_to_components(details))
1076
for key, details in build_details.iteritems())
1077
absent_keys = keys.difference(set(positions))
1078
# There may be more absent keys : if we're missing the basis component
1079
# and are trying to include the delta closure.
1080
if include_delta_closure:
1081
# Build up reconstructable_keys dict. key:True in this dict means
1082
# the key can be reconstructed.
1083
reconstructable_keys = {}
1087
chain = [key, positions[key][2]]
1089
absent_keys.add(key)
1092
while chain[-1] is not None:
1093
if chain[-1] in reconstructable_keys:
1094
result = reconstructable_keys[chain[-1]]
1098
chain.append(positions[chain[-1]][2])
1100
# missing basis component
1103
for chain_key in chain[:-1]:
1104
reconstructable_keys[chain_key] = result
1106
absent_keys.add(key)
1107
for key in absent_keys:
1108
yield AbsentContentFactory(key)
1109
# restrict our view to the keys we can answer.
1110
keys = keys - absent_keys
1111
# Double index lookups here : need a unified api ?
1112
parent_map = self.get_parent_map(keys)
1113
if ordering == 'topological':
1114
present_keys = topo_sort(parent_map)
1117
# XXX: Memory: TODO: batch data here to cap buffered data at (say) 1MB.
1118
# XXX: At that point we need to consider double reads by utilising
1119
# components multiple times.
1120
if include_delta_closure:
1121
# XXX: get_content_maps performs its own index queries; allow state
1123
text_map, _ = self._get_content_maps(present_keys)
1124
for key in present_keys:
1125
yield FulltextContentFactory(key, parent_map[key], None,
1126
''.join(text_map[key]))
1128
records = [(key, positions[key][1]) for key in present_keys]
1129
for key, raw_data, sha1 in self._read_records_iter_raw(records):
1130
(record_details, index_memo, _) = positions[key]
1131
yield KnitContentFactory(key, parent_map[key],
1132
record_details, sha1, raw_data, self._factory.annotated, None)
1134
def get_sha1s(self, keys):
1135
"""See VersionedFiles.get_sha1s()."""
1136
record_map = self._get_record_map(keys)
1137
# record entry 2 is the 'digest'.
1138
return [record_map[key][2] for key in keys]
1140
def insert_record_stream(self, stream):
1141
"""Insert a record stream into this container.
1143
:param stream: A stream of records to insert.
1145
:seealso VersionedFiles.get_record_stream:
1147
def get_adapter(adapter_key):
1149
return adapters[adapter_key]
1151
adapter_factory = adapter_registry.get(adapter_key)
1152
adapter = adapter_factory(self)
1153
adapters[adapter_key] = adapter
1155
if self._factory.annotated:
1156
# self is annotated, we need annotated knits to use directly.
1157
annotated = "annotated-"
1160
# self is not annotated, but we can strip annotations cheaply.
1162
convertibles = set(["knit-annotated-ft-gz"])
1163
if self._max_delta_chain:
1164
convertibles.add("knit-annotated-delta-gz")
1165
# The set of types we can cheaply adapt without needing basis texts.
1166
native_types = set()
1167
if self._max_delta_chain:
1168
native_types.add("knit-%sdelta-gz" % annotated)
1169
native_types.add("knit-%sft-gz" % annotated)
1170
knit_types = native_types.union(convertibles)
1172
# Buffer all index entries that we can't add immediately because their
1173
# basis parent is missing. We don't buffer all because generating
1174
# annotations may require access to some of the new records. However we
1175
# can't generate annotations from new deltas until their basis parent
1176
# is present anyway, so we get away with not needing an index that
1177
# includes the new keys.
1178
# key = basis_parent, value = index entry to add
1179
buffered_index_entries = {}
1180
for record in stream:
1181
parents = record.parents
1182
# Raise an error when a record is missing.
1183
if record.storage_kind == 'absent':
1184
raise RevisionNotPresent([record.key], self)
1185
if record.storage_kind in knit_types:
1186
if record.storage_kind not in native_types:
1188
adapter_key = (record.storage_kind, "knit-delta-gz")
1189
adapter = get_adapter(adapter_key)
1191
adapter_key = (record.storage_kind, "knit-ft-gz")
1192
adapter = get_adapter(adapter_key)
1193
bytes = adapter.get_bytes(
1194
record, record.get_bytes_as(record.storage_kind))
1196
bytes = record.get_bytes_as(record.storage_kind)
1197
options = [record._build_details[0]]
1198
if record._build_details[1]:
1199
options.append('no-eol')
1200
# Just blat it across.
1201
# Note: This does end up adding data on duplicate keys. As
1202
# modern repositories use atomic insertions this should not
1203
# lead to excessive growth in the event of interrupted fetches.
1204
# 'knit' repositories may suffer excessive growth, but as a
1205
# deprecated format this is tolerable. It can be fixed if
1206
# needed by in the kndx index support raising on a duplicate
1207
# add with identical parents and options.
1208
access_memo = self._access.add_raw_records(
1209
[(record.key, len(bytes))], bytes)[0]
1210
index_entry = (record.key, options, access_memo, parents)
1212
if 'fulltext' not in options:
1213
basis_parent = parents[0]
1214
# Note that pack backed knits don't need to buffer here
1215
# because they buffer all writes to the transaction level,
1216
# but we don't expose that difference at the index level. If
1217
# the query here has sufficient cost to show up in
1218
# profiling we should do that.
1219
if basis_parent not in self.get_parent_map([basis_parent]):
1220
pending = buffered_index_entries.setdefault(
1222
pending.append(index_entry)
1225
self._index.add_records([index_entry])
1226
elif record.storage_kind == 'fulltext':
1227
self.add_lines(record.key, parents,
1228
split_lines(record.get_bytes_as('fulltext')))
1230
adapter_key = record.storage_kind, 'fulltext'
1231
adapter = get_adapter(adapter_key)
1232
lines = split_lines(adapter.get_bytes(
1233
record, record.get_bytes_as(record.storage_kind)))
1235
self.add_lines(record.key, parents, lines)
1236
except errors.RevisionAlreadyPresent:
1238
# Add any records whose basis parent is now available.
1239
added_keys = [record.key]
1241
key = added_keys.pop(0)
1242
if key in buffered_index_entries:
1243
index_entries = buffered_index_entries[key]
1244
self._index.add_records(index_entries)
1246
[index_entry[0] for index_entry in index_entries])
1247
del buffered_index_entries[key]
1248
# If there were any deltas which had a missing basis parent, error.
1249
if buffered_index_entries:
1250
raise errors.RevisionNotPresent(buffered_index_entries.keys()[0],
1253
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1254
"""Iterate over the lines in the versioned files from keys.
1256
This may return lines from other keys. Each item the returned
1257
iterator yields is a tuple of a line and a text version that that line
1258
is present in (not introduced in).
1260
Ordering of results is in whatever order is most suitable for the
1261
underlying storage format.
1263
If a progress bar is supplied, it may be used to indicate progress.
1264
The caller is responsible for cleaning up progress bars (because this
1268
* Lines are normalised by the underlying store: they will all have \n
1270
* Lines are returned in arbitrary order.
1272
:return: An iterator over (line, key).
1275
pb = progress.DummyProgress()
1277
# filter for available keys
1278
parent_map = self.get_parent_map(keys)
1279
if len(parent_map) != len(keys):
1280
missing = set(parent_map) - requested_keys
1281
raise RevisionNotPresent(key, self.filename)
1282
# we don't care about inclusions, the caller cares.
1283
# but we need to setup a list of records to visit.
1284
# we need key, position, length
1286
build_details = self._index.get_build_details(keys)
1289
key_records.append((key, build_details[key][0]))
1290
total = len(key_records)
1291
records_iter = enumerate(self._read_records_iter(key_records))
1292
for (key_idx, (key, data, sha_value)) in records_iter:
1293
pb.update('Walking content.', key_idx, total)
1294
compression_parent = build_details[key][1]
1295
if compression_parent is None:
1297
line_iterator = self._factory.get_fulltext_content(data)
1300
line_iterator = self._factory.get_linedelta_content(data)
1301
# XXX: It might be more efficient to yield (key,
1302
# line_iterator) in the future. However for now, this is a simpler
1303
# change to integrate into the rest of the codebase. RBC 20071110
1304
for line in line_iterator:
1306
pb.update('Walking content.', total, total)
1308
def _make_line_delta(self, delta_seq, new_content):
1309
"""Generate a line delta from delta_seq and new_content."""
1311
for op in delta_seq.get_opcodes():
1312
if op[0] == 'equal':
1314
diff_hunks.append((op[1], op[2], op[4]-op[3], new_content._lines[op[3]:op[4]]))
1317
def _merge_annotations(self, content, parents, parent_texts={},
1318
delta=None, annotated=None,
1319
left_matching_blocks=None):
1320
"""Merge annotations for content and generate deltas.
1322
This is done by comparing the annotations based on changes to the text
1323
and generating a delta on the resulting full texts. If annotations are
1324
not being created then a simple delta is created.
1326
if left_matching_blocks is not None:
1327
delta_seq = diff._PrematchedMatcher(left_matching_blocks)
1331
for parent_key in parents:
1332
merge_content = self._get_content(parent_key, parent_texts)
1333
if (parent_key == parents[0] and delta_seq is not None):
1336
seq = patiencediff.PatienceSequenceMatcher(
1337
None, merge_content.text(), content.text())
1338
for i, j, n in seq.get_matching_blocks():
1341
# this copies (origin, text) pairs across to the new
1342
# content for any line that matches the last-checked
1344
content._lines[j:j+n] = merge_content._lines[i:i+n]
1345
# XXX: Robert says the following block is a workaround for a
1346
# now-fixed bug and it can probably be deleted. -- mbp 20080618
1347
if content._lines and content._lines[-1][1][-1] != '\n':
1348
# The copied annotation was from a line without a trailing EOL,
1349
# reinstate one for the content object, to ensure correct
1351
line = content._lines[-1][1] + '\n'
1352
content._lines[-1] = (content._lines[-1][0], line)
1354
if delta_seq is None:
1355
reference_content = self._get_content(parents[0], parent_texts)
1356
new_texts = content.text()
1357
old_texts = reference_content.text()
1358
delta_seq = patiencediff.PatienceSequenceMatcher(
1359
None, old_texts, new_texts)
1360
return self._make_line_delta(delta_seq, content)
1362
def _parse_record(self, version_id, data):
1363
"""Parse an original format knit record.
1365
These have the last element of the key only present in the stored data.
1367
rec, record_contents = self._parse_record_unchecked(data)
1368
self._check_header_version(rec, version_id)
1369
return record_contents, rec[3]
1371
def _parse_record_header(self, key, raw_data):
1372
"""Parse a record header for consistency.
1374
:return: the header and the decompressor stream.
1375
as (stream, header_record)
1377
df = GzipFile(mode='rb', fileobj=StringIO(raw_data))
1380
rec = self._check_header(key, df.readline())
1381
except Exception, e:
1382
raise KnitCorrupt(self,
1383
"While reading {%s} got %s(%s)"
1384
% (key, e.__class__.__name__, str(e)))
1387
def _parse_record_unchecked(self, data):
1389
# 4168 calls in 2880 217 internal
1390
# 4168 calls to _parse_record_header in 2121
1391
# 4168 calls to readlines in 330
1392
df = GzipFile(mode='rb', fileobj=StringIO(data))
1394
record_contents = df.readlines()
1395
except Exception, e:
1396
raise KnitCorrupt(self, "Corrupt compressed record %r, got %s(%s)" %
1397
(data, e.__class__.__name__, str(e)))
1398
header = record_contents.pop(0)
1399
rec = self._split_header(header)
1400
last_line = record_contents.pop()
1401
if len(record_contents) != int(rec[2]):
1402
raise KnitCorrupt(self,
1403
'incorrect number of lines %s != %s'
1404
' for version {%s} %s'
1405
% (len(record_contents), int(rec[2]),
1406
rec[1], record_contents))
1407
if last_line != 'end %s\n' % rec[1]:
1408
raise KnitCorrupt(self,
1409
'unexpected version end line %r, wanted %r'
1410
% (last_line, rec[1]))
1412
return rec, record_contents
1414
def _read_records_iter(self, records):
1415
"""Read text records from data file and yield result.
1417
The result will be returned in whatever is the fastest to read.
1418
Not by the order requested. Also, multiple requests for the same
1419
record will only yield 1 response.
1420
:param records: A list of (key, access_memo) entries
1421
:return: Yields (key, contents, digest) in the order
1422
read, not the order requested
1427
# XXX: This smells wrong, IO may not be getting ordered right.
1428
needed_records = sorted(set(records), key=operator.itemgetter(1))
1429
if not needed_records:
1432
# The transport optimizes the fetching as well
1433
# (ie, reads continuous ranges.)
1434
raw_data = self._access.get_raw_records(
1435
[index_memo for key, index_memo in needed_records])
1437
for (key, index_memo), data in \
1438
izip(iter(needed_records), raw_data):
1439
content, digest = self._parse_record(key[-1], data)
1440
yield key, content, digest
1442
def _read_records_iter_raw(self, records):
1443
"""Read text records from data file and yield raw data.
1445
This unpacks enough of the text record to validate the id is
1446
as expected but thats all.
1448
Each item the iterator yields is (key, bytes, sha1_of_full_text).
1450
# setup an iterator of the external records:
1451
# uses readv so nice and fast we hope.
1453
# grab the disk data needed.
1454
needed_offsets = [index_memo for key, index_memo
1456
raw_records = self._access.get_raw_records(needed_offsets)
1458
for key, index_memo in records:
1459
data = raw_records.next()
1460
# validate the header (note that we can only use the suffix in
1461
# current knit records).
1462
df, rec = self._parse_record_header(key, data)
1464
yield key, data, rec[3]
1466
def _record_to_data(self, key, digest, lines, dense_lines=None):
1467
"""Convert key, digest, lines into a raw data block.
1469
:param key: The key of the record. Currently keys are always serialised
1470
using just the trailing component.
1471
:param dense_lines: The bytes of lines but in a denser form. For
1472
instance, if lines is a list of 1000 bytestrings each ending in \n,
1473
dense_lines may be a list with one line in it, containing all the
1474
1000's lines and their \n's. Using dense_lines if it is already
1475
known is a win because the string join to create bytes in this
1476
function spends less time resizing the final string.
1477
:return: (len, a StringIO instance with the raw data ready to read.)
1479
# Note: using a string copy here increases memory pressure with e.g.
1480
# ISO's, but it is about 3 seconds faster on a 1.2Ghz intel machine
1481
# when doing the initial commit of a mozilla tree. RBC 20070921
1482
bytes = ''.join(chain(
1483
["version %s %d %s\n" % (key[-1],
1486
dense_lines or lines,
1487
["end %s\n" % key[-1]]))
1488
if type(bytes) != str:
1489
raise AssertionError(
1490
'data must be plain bytes was %s' % type(bytes))
1491
if lines and lines[-1][-1] != '\n':
1492
raise ValueError('corrupt lines value %r' % lines)
1493
compressed_bytes = bytes_to_gzip(bytes)
1494
return len(compressed_bytes), compressed_bytes
1496
def _split_header(self, line):
1499
raise KnitCorrupt(self,
1500
'unexpected number of elements in record header')
1504
"""See VersionedFiles.keys."""
1505
if 'evil' in debug.debug_flags:
1506
trace.mutter_callsite(2, "keys scales with size of history")
1507
return self._index.keys()
1510
class _KndxIndex(object):
1511
"""Manages knit index files
1513
The index is kept in memory and read on startup, to enable
1514
fast lookups of revision information. The cursor of the index
1515
file is always pointing to the end, making it easy to append
1518
_cache is a cache for fast mapping from version id to a Index
1521
_history is a cache for fast mapping from indexes to version ids.
1523
The index data format is dictionary compressed when it comes to
1524
parent references; a index entry may only have parents that with a
1525
lover index number. As a result, the index is topological sorted.
1527
Duplicate entries may be written to the index for a single version id
1528
if this is done then the latter one completely replaces the former:
1529
this allows updates to correct version and parent information.
1530
Note that the two entries may share the delta, and that successive
1531
annotations and references MUST point to the first entry.
1533
The index file on disc contains a header, followed by one line per knit
1534
record. The same revision can be present in an index file more than once.
1535
The first occurrence gets assigned a sequence number starting from 0.
1537
The format of a single line is
1538
REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n
1539
REVISION_ID is a utf8-encoded revision id
1540
FLAGS is a comma separated list of flags about the record. Values include
1541
no-eol, line-delta, fulltext.
1542
BYTE_OFFSET is the ascii representation of the byte offset in the data file
1543
that the the compressed data starts at.
1544
LENGTH is the ascii representation of the length of the data file.
1545
PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of
1547
PARENT_SEQUENCE_ID the ascii representation of the sequence number of a
1548
revision id already in the knit that is a parent of REVISION_ID.
1549
The ' :' marker is the end of record marker.
1552
when a write is interrupted to the index file, it will result in a line
1553
that does not end in ' :'. If the ' :' is not present at the end of a line,
1554
or at the end of the file, then the record that is missing it will be
1555
ignored by the parser.
1557
When writing new records to the index file, the data is preceded by '\n'
1558
to ensure that records always start on new lines even if the last write was
1559
interrupted. As a result its normal for the last line in the index to be
1560
missing a trailing newline. One can be added with no harmful effects.
1562
:ivar _kndx_cache: dict from prefix to the old state of KnitIndex objects,
1563
where prefix is e.g. the (fileid,) for .texts instances or () for
1564
constant-mapped things like .revisions, and the old state is
1565
tuple(history_vector, cache_dict). This is used to prevent having an
1566
ABI change with the C extension that reads .kndx files.
1569
HEADER = "# bzr knit index 8\n"
1571
def __init__(self, transport, mapper, get_scope, allow_writes, is_locked):
1572
"""Create a _KndxIndex on transport using mapper."""
1573
self._transport = transport
1574
self._mapper = mapper
1575
self._get_scope = get_scope
1576
self._allow_writes = allow_writes
1577
self._is_locked = is_locked
1579
self.has_graph = True
1581
def add_records(self, records, random_id=False):
1582
"""Add multiple records to the index.
1584
:param records: a list of tuples:
1585
(key, options, access_memo, parents).
1586
:param random_id: If True the ids being added were randomly generated
1587
and no check for existence will be performed.
1590
for record in records:
1593
path = self._mapper.map(key) + '.kndx'
1594
path_keys = paths.setdefault(path, (prefix, []))
1595
path_keys[1].append(record)
1596
for path in sorted(paths):
1597
prefix, path_keys = paths[path]
1598
self._load_prefixes([prefix])
1600
orig_history = self._kndx_cache[prefix][1][:]
1601
orig_cache = self._kndx_cache[prefix][0].copy()
1604
for key, options, (_, pos, size), parents in path_keys:
1606
# kndx indices cannot be parentless.
1608
line = "\n%s %s %s %s %s :" % (
1609
key[-1], ','.join(options), pos, size,
1610
self._dictionary_compress(parents))
1611
if type(line) != str:
1612
raise AssertionError(
1613
'data must be utf8 was %s' % type(line))
1615
self._cache_key(key, options, pos, size, parents)
1616
if len(orig_history):
1617
self._transport.append_bytes(path, ''.join(lines))
1619
self._init_index(path, lines)
1621
# If any problems happen, restore the original values and re-raise
1622
self._kndx_cache[prefix] = (orig_cache, orig_history)
1625
def _cache_key(self, key, options, pos, size, parent_keys):
1626
"""Cache a version record in the history array and index cache.
1628
This is inlined into _load_data for performance. KEEP IN SYNC.
1629
(It saves 60ms, 25% of the __init__ overhead on local 4000 record
1633
version_id = key[-1]
1634
# last-element only for compatibilty with the C load_data.
1635
parents = tuple(parent[-1] for parent in parent_keys)
1636
for parent in parent_keys:
1637
if parent[:-1] != prefix:
1638
raise ValueError("mismatched prefixes for %r, %r" % (
1640
cache, history = self._kndx_cache[prefix]
1641
# only want the _history index to reference the 1st index entry
1643
if version_id not in cache:
1644
index = len(history)
1645
history.append(version_id)
1647
index = cache[version_id][5]
1648
cache[version_id] = (version_id,
1655
def check_header(self, fp):
1656
line = fp.readline()
1658
# An empty file can actually be treated as though the file doesn't
1660
raise errors.NoSuchFile(self)
1661
if line != self.HEADER:
1662
raise KnitHeaderError(badline=line, filename=self)
1664
def _check_read(self):
1665
if not self._is_locked():
1666
raise errors.ObjectNotLocked(self)
1667
if self._get_scope() != self._scope:
1670
def _check_write_ok(self):
1671
"""Assert if not writes are permitted."""
1672
if not self._is_locked():
1673
raise errors.ObjectNotLocked(self)
1674
if self._get_scope() != self._scope:
1676
if self._mode != 'w':
1677
raise errors.ReadOnlyObjectDirtiedError(self)
1679
def get_build_details(self, keys):
1680
"""Get the method, index_memo and compression parent for keys.
1682
Ghosts are omitted from the result.
1684
:param keys: An iterable of keys.
1685
:return: A dict of key:(index_memo, compression_parent, parents,
1688
opaque structure to pass to read_records to extract the raw
1691
Content that this record is built upon, may be None
1693
Logical parents of this node
1695
extra information about the content which needs to be passed to
1696
Factory.parse_record
1698
prefixes = self._partition_keys(keys)
1699
parent_map = self.get_parent_map(keys)
1702
if key not in parent_map:
1704
method = self.get_method(key)
1705
parents = parent_map[key]
1706
if method == 'fulltext':
1707
compression_parent = None
1709
compression_parent = parents[0]
1710
noeol = 'no-eol' in self.get_options(key)
1711
index_memo = self.get_position(key)
1712
result[key] = (index_memo, compression_parent,
1713
parents, (method, noeol))
1716
def get_method(self, key):
1717
"""Return compression method of specified key."""
1718
options = self.get_options(key)
1719
if 'fulltext' in options:
1721
elif 'line-delta' in options:
1724
raise errors.KnitIndexUnknownMethod(self, options)
1726
def get_options(self, key):
1727
"""Return a list representing options.
1731
prefix, suffix = self._split_key(key)
1732
self._load_prefixes([prefix])
1733
return self._kndx_cache[prefix][0][suffix][1]
1735
def get_parent_map(self, keys):
1736
"""Get a map of the parents of keys.
1738
:param keys: The keys to look up parents for.
1739
:return: A mapping from keys to parents. Absent keys are absent from
1742
# Parse what we need to up front, this potentially trades off I/O
1743
# locality (.kndx and .knit in the same block group for the same file
1744
# id) for less checking in inner loops.
1745
prefixes = set(key[:-1] for key in keys)
1746
self._load_prefixes(prefixes)
1751
suffix_parents = self._kndx_cache[prefix][0][key[-1]][4]
1755
result[key] = tuple(prefix + (suffix,) for
1756
suffix in suffix_parents)
1759
def get_position(self, key):
1760
"""Return details needed to access the version.
1762
:return: a tuple (key, data position, size) to hand to the access
1763
logic to get the record.
1765
prefix, suffix = self._split_key(key)
1766
self._load_prefixes([prefix])
1767
entry = self._kndx_cache[prefix][0][suffix]
1768
return key, entry[2], entry[3]
1770
def _init_index(self, path, extra_lines=[]):
1771
"""Initialize an index."""
1773
sio.write(self.HEADER)
1774
sio.writelines(extra_lines)
1776
self._transport.put_file_non_atomic(path, sio,
1777
create_parent_dir=True)
1778
# self._create_parent_dir)
1779
# mode=self._file_mode,
1780
# dir_mode=self._dir_mode)
1783
"""Get all the keys in the collection.
1785
The keys are not ordered.
1788
# Identify all key prefixes.
1789
# XXX: A bit hacky, needs polish.
1790
if type(self._mapper) == ConstantMapper:
1794
for quoted_relpath in self._transport.iter_files_recursive():
1795
path, ext = os.path.splitext(quoted_relpath)
1797
prefixes = [self._mapper.unmap(path) for path in relpaths]
1798
self._load_prefixes(prefixes)
1799
for prefix in prefixes:
1800
for suffix in self._kndx_cache[prefix][1]:
1801
result.add(prefix + (suffix,))
1804
def _load_prefixes(self, prefixes):
1805
"""Load the indices for prefixes."""
1807
for prefix in prefixes:
1808
if prefix not in self._kndx_cache:
1809
# the load_data interface writes to these variables.
1812
self._filename = prefix
1814
path = self._mapper.map(prefix) + '.kndx'
1815
fp = self._transport.get(path)
1817
# _load_data may raise NoSuchFile if the target knit is
1819
_load_data(self, fp)
1822
self._kndx_cache[prefix] = (self._cache, self._history)
1827
self._kndx_cache[prefix] = ({}, [])
1828
if type(self._mapper) == ConstantMapper:
1829
# preserve behaviour for revisions.kndx etc.
1830
self._init_index(path)
1835
def _partition_keys(self, keys):
1836
"""Turn keys into a dict of prefix:suffix_list."""
1839
prefix_keys = result.setdefault(key[:-1], [])
1840
prefix_keys.append(key[-1])
1843
def _dictionary_compress(self, keys):
1844
"""Dictionary compress keys.
1846
:param keys: The keys to generate references to.
1847
:return: A string representation of keys. keys which are present are
1848
dictionary compressed, and others are emitted as fulltext with a
1854
prefix = keys[0][:-1]
1855
cache = self._kndx_cache[prefix][0]
1857
if key[:-1] != prefix:
1858
# kndx indices cannot refer across partitioned storage.
1859
raise ValueError("mismatched prefixes for %r" % keys)
1860
if key[-1] in cache:
1861
# -- inlined lookup() --
1862
result_list.append(str(cache[key[-1]][5]))
1863
# -- end lookup () --
1865
result_list.append('.' + key[-1])
1866
return ' '.join(result_list)
1868
def _reset_cache(self):
1869
# Possibly this should be a LRU cache. A dictionary from key_prefix to
1870
# (cache_dict, history_vector) for parsed kndx files.
1871
self._kndx_cache = {}
1872
self._scope = self._get_scope()
1873
allow_writes = self._allow_writes()
1879
def _split_key(self, key):
1880
"""Split key into a prefix and suffix."""
1881
return key[:-1], key[-1]
1884
class _KnitGraphIndex(object):
1885
"""A KnitVersionedFiles index layered on GraphIndex."""
1887
def __init__(self, graph_index, is_locked, deltas=False, parents=True,
1889
"""Construct a KnitGraphIndex on a graph_index.
1891
:param graph_index: An implementation of bzrlib.index.GraphIndex.
1892
:param is_locked: A callback to check whether the object should answer
1894
:param deltas: Allow delta-compressed records.
1895
:param parents: If True, record knits parents, if not do not record
1897
:param add_callback: If not None, allow additions to the index and call
1898
this callback with a list of added GraphIndex nodes:
1899
[(node, value, node_refs), ...]
1900
:param is_locked: A callback, returns True if the index is locked and
1903
self._add_callback = add_callback
1904
self._graph_index = graph_index
1905
self._deltas = deltas
1906
self._parents = parents
1907
if deltas and not parents:
1908
# XXX: TODO: Delta tree and parent graph should be conceptually
1910
raise KnitCorrupt(self, "Cannot do delta compression without "
1912
self.has_graph = parents
1913
self._is_locked = is_locked
1915
def add_records(self, records, random_id=False):
1916
"""Add multiple records to the index.
1918
This function does not insert data into the Immutable GraphIndex
1919
backing the KnitGraphIndex, instead it prepares data for insertion by
1920
the caller and checks that it is safe to insert then calls
1921
self._add_callback with the prepared GraphIndex nodes.
1923
:param records: a list of tuples:
1924
(key, options, access_memo, parents).
1925
:param random_id: If True the ids being added were randomly generated
1926
and no check for existence will be performed.
1928
if not self._add_callback:
1929
raise errors.ReadOnlyError(self)
1930
# we hope there are no repositories with inconsistent parentage
1934
for (key, options, access_memo, parents) in records:
1936
parents = tuple(parents)
1937
index, pos, size = access_memo
1938
if 'no-eol' in options:
1942
value += "%d %d" % (pos, size)
1943
if not self._deltas:
1944
if 'line-delta' in options:
1945
raise KnitCorrupt(self, "attempt to add line-delta in non-delta knit")
1948
if 'line-delta' in options:
1949
node_refs = (parents, (parents[0],))
1951
node_refs = (parents, ())
1953
node_refs = (parents, )
1956
raise KnitCorrupt(self, "attempt to add node with parents "
1957
"in parentless index.")
1959
keys[key] = (value, node_refs)
1962
present_nodes = self._get_entries(keys)
1963
for (index, key, value, node_refs) in present_nodes:
1964
if (value[0] != keys[key][0][0] or
1965
node_refs != keys[key][1]):
1966
raise KnitCorrupt(self, "inconsistent details in add_records"
1967
": %s %s" % ((value, node_refs), keys[key]))
1971
for key, (value, node_refs) in keys.iteritems():
1972
result.append((key, value, node_refs))
1974
for key, (value, node_refs) in keys.iteritems():
1975
result.append((key, value))
1976
self._add_callback(result)
1978
def _check_read(self):
1979
"""raise if reads are not permitted."""
1980
if not self._is_locked():
1981
raise errors.ObjectNotLocked(self)
1983
def _check_write_ok(self):
1984
"""Assert if writes are not permitted."""
1985
if not self._is_locked():
1986
raise errors.ObjectNotLocked(self)
1988
def _compression_parent(self, an_entry):
1989
# return the key that an_entry is compressed against, or None
1990
# Grab the second parent list (as deltas implies parents currently)
1991
compression_parents = an_entry[3][1]
1992
if not compression_parents:
1994
if len(compression_parents) != 1:
1995
raise AssertionError(
1996
"Too many compression parents: %r" % compression_parents)
1997
return compression_parents[0]
1999
def get_build_details(self, keys):
2000
"""Get the method, index_memo and compression parent for version_ids.
2002
Ghosts are omitted from the result.
2004
:param keys: An iterable of keys.
2005
:return: A dict of key:
2006
(index_memo, compression_parent, parents, record_details).
2008
opaque structure to pass to read_records to extract the raw
2011
Content that this record is built upon, may be None
2013
Logical parents of this node
2015
extra information about the content which needs to be passed to
2016
Factory.parse_record
2020
entries = self._get_entries(keys, False)
2021
for entry in entries:
2023
if not self._parents:
2026
parents = entry[3][0]
2027
if not self._deltas:
2028
compression_parent_key = None
2030
compression_parent_key = self._compression_parent(entry)
2031
noeol = (entry[2][0] == 'N')
2032
if compression_parent_key:
2033
method = 'line-delta'
2036
result[key] = (self._node_to_position(entry),
2037
compression_parent_key, parents,
2041
def _get_entries(self, keys, check_present=False):
2042
"""Get the entries for keys.
2044
:param keys: An iterable of index key tuples.
2049
for node in self._graph_index.iter_entries(keys):
2051
found_keys.add(node[1])
2053
# adapt parentless index to the rest of the code.
2054
for node in self._graph_index.iter_entries(keys):
2055
yield node[0], node[1], node[2], ()
2056
found_keys.add(node[1])
2058
missing_keys = keys.difference(found_keys)
2060
raise RevisionNotPresent(missing_keys.pop(), self)
2062
def get_method(self, key):
2063
"""Return compression method of specified key."""
2064
return self._get_method(self._get_node(key))
2066
def _get_method(self, node):
2067
if not self._deltas:
2069
if self._compression_parent(node):
2074
def _get_node(self, key):
2076
return list(self._get_entries([key]))[0]
2078
raise RevisionNotPresent(key, self)
2080
def get_options(self, key):
2081
"""Return a list representing options.
2085
node = self._get_node(key)
2086
options = [self._get_method(node)]
2087
if node[2][0] == 'N':
2088
options.append('no-eol')
2091
def get_parent_map(self, keys):
2092
"""Get a map of the parents of keys.
2094
:param keys: The keys to look up parents for.
2095
:return: A mapping from keys to parents. Absent keys are absent from
2099
nodes = self._get_entries(keys)
2103
result[node[1]] = node[3][0]
2106
result[node[1]] = None
2109
def get_position(self, key):
2110
"""Return details needed to access the version.
2112
:return: a tuple (index, data position, size) to hand to the access
2113
logic to get the record.
2115
node = self._get_node(key)
2116
return self._node_to_position(node)
2119
"""Get all the keys in the collection.
2121
The keys are not ordered.
2124
return [node[1] for node in self._graph_index.iter_all_entries()]
2126
def _node_to_position(self, node):
2127
"""Convert an index value to position details."""
2128
bits = node[2][1:].split(' ')
2129
return node[0], int(bits[0]), int(bits[1])
2132
class _KnitKeyAccess(object):
2133
"""Access to records in .knit files."""
2135
def __init__(self, transport, mapper):
2136
"""Create a _KnitKeyAccess with transport and mapper.
2138
:param transport: The transport the access object is rooted at.
2139
:param mapper: The mapper used to map keys to .knit files.
2141
self._transport = transport
2142
self._mapper = mapper
2144
def add_raw_records(self, key_sizes, raw_data):
2145
"""Add raw knit bytes to a storage area.
2147
The data is spooled to the container writer in one bytes-record per
2150
:param sizes: An iterable of tuples containing the key and size of each
2152
:param raw_data: A bytestring containing the data.
2153
:return: A list of memos to retrieve the record later. Each memo is an
2154
opaque index memo. For _KnitKeyAccess the memo is (key, pos,
2155
length), where the key is the record key.
2157
if type(raw_data) != str:
2158
raise AssertionError(
2159
'data must be plain bytes was %s' % type(raw_data))
2162
# TODO: This can be tuned for writing to sftp and other servers where
2163
# append() is relatively expensive by grouping the writes to each key
2165
for key, size in key_sizes:
2166
path = self._mapper.map(key)
2168
base = self._transport.append_bytes(path + '.knit',
2169
raw_data[offset:offset+size])
2170
except errors.NoSuchFile:
2171
self._transport.mkdir(osutils.dirname(path))
2172
base = self._transport.append_bytes(path + '.knit',
2173
raw_data[offset:offset+size])
2177
result.append((key, base, size))
2180
def get_raw_records(self, memos_for_retrieval):
2181
"""Get the raw bytes for a records.
2183
:param memos_for_retrieval: An iterable containing the access memo for
2184
retrieving the bytes.
2185
:return: An iterator over the bytes of the records.
2187
# first pass, group into same-index request to minimise readv's issued.
2189
current_prefix = None
2190
for (key, offset, length) in memos_for_retrieval:
2191
if current_prefix == key[:-1]:
2192
current_list.append((offset, length))
2194
if current_prefix is not None:
2195
request_lists.append((current_prefix, current_list))
2196
current_prefix = key[:-1]
2197
current_list = [(offset, length)]
2198
# handle the last entry
2199
if current_prefix is not None:
2200
request_lists.append((current_prefix, current_list))
2201
for prefix, read_vector in request_lists:
2202
path = self._mapper.map(prefix) + '.knit'
2203
for pos, data in self._transport.readv(path, read_vector):
2207
class _DirectPackAccess(object):
2208
"""Access to data in one or more packs with less translation."""
2210
def __init__(self, index_to_packs):
2211
"""Create a _DirectPackAccess object.
2213
:param index_to_packs: A dict mapping index objects to the transport
2214
and file names for obtaining data.
2216
self._container_writer = None
2217
self._write_index = None
2218
self._indices = index_to_packs
2220
def add_raw_records(self, key_sizes, raw_data):
2221
"""Add raw knit bytes to a storage area.
2223
The data is spooled to the container writer in one bytes-record per
2226
:param sizes: An iterable of tuples containing the key and size of each
2228
:param raw_data: A bytestring containing the data.
2229
:return: A list of memos to retrieve the record later. Each memo is an
2230
opaque index memo. For _DirectPackAccess the memo is (index, pos,
2231
length), where the index field is the write_index object supplied
2232
to the PackAccess object.
2234
if type(raw_data) != str:
2235
raise AssertionError(
2236
'data must be plain bytes was %s' % type(raw_data))
2239
for key, size in key_sizes:
2240
p_offset, p_length = self._container_writer.add_bytes_record(
2241
raw_data[offset:offset+size], [])
2243
result.append((self._write_index, p_offset, p_length))
2246
def get_raw_records(self, memos_for_retrieval):
2247
"""Get the raw bytes for a records.
2249
:param memos_for_retrieval: An iterable containing the (index, pos,
2250
length) memo for retrieving the bytes. The Pack access method
2251
looks up the pack to use for a given record in its index_to_pack
2253
:return: An iterator over the bytes of the records.
2255
# first pass, group into same-index requests
2257
current_index = None
2258
for (index, offset, length) in memos_for_retrieval:
2259
if current_index == index:
2260
current_list.append((offset, length))
2262
if current_index is not None:
2263
request_lists.append((current_index, current_list))
2264
current_index = index
2265
current_list = [(offset, length)]
2266
# handle the last entry
2267
if current_index is not None:
2268
request_lists.append((current_index, current_list))
2269
for index, offsets in request_lists:
2270
transport, path = self._indices[index]
2271
reader = pack.make_readv_reader(transport, path, offsets)
2272
for names, read_func in reader.iter_records():
2273
yield read_func(None)
2275
def set_writer(self, writer, index, transport_packname):
2276
"""Set a writer to use for adding data."""
2277
if index is not None:
2278
self._indices[index] = transport_packname
2279
self._container_writer = writer
2280
self._write_index = index
2283
# Deprecated, use PatienceSequenceMatcher instead
2284
KnitSequenceMatcher = patiencediff.PatienceSequenceMatcher
2287
def annotate_knit(knit, revision_id):
2288
"""Annotate a knit with no cached annotations.
2290
This implementation is for knits with no cached annotations.
2291
It will work for knits with cached annotations, but this is not
2294
annotator = _KnitAnnotator(knit)
2295
return iter(annotator.annotate(revision_id))
2298
class _KnitAnnotator(object):
2299
"""Build up the annotations for a text."""
2301
def __init__(self, knit):
2304
# Content objects, differs from fulltexts because of how final newlines
2305
# are treated by knits. the content objects here will always have a
2307
self._fulltext_contents = {}
2309
# Annotated lines of specific revisions
2310
self._annotated_lines = {}
2312
# Track the raw data for nodes that we could not process yet.
2313
# This maps the revision_id of the base to a list of children that will
2314
# annotated from it.
2315
self._pending_children = {}
2317
# Nodes which cannot be extracted
2318
self._ghosts = set()
2320
# Track how many children this node has, so we know if we need to keep
2322
self._annotate_children = {}
2323
self._compression_children = {}
2325
self._all_build_details = {}
2326
# The children => parent revision_id graph
2327
self._revision_id_graph = {}
2329
self._heads_provider = None
2331
self._nodes_to_keep_annotations = set()
2332
self._generations_until_keep = 100
2334
def set_generations_until_keep(self, value):
2335
"""Set the number of generations before caching a node.
2337
Setting this to -1 will cache every merge node, setting this higher
2338
will cache fewer nodes.
2340
self._generations_until_keep = value
2342
def _add_fulltext_content(self, revision_id, content_obj):
2343
self._fulltext_contents[revision_id] = content_obj
2344
# TODO: jam 20080305 It might be good to check the sha1digest here
2345
return content_obj.text()
2347
def _check_parents(self, child, nodes_to_annotate):
2348
"""Check if all parents have been processed.
2350
:param child: A tuple of (rev_id, parents, raw_content)
2351
:param nodes_to_annotate: If child is ready, add it to
2352
nodes_to_annotate, otherwise put it back in self._pending_children
2354
for parent_id in child[1]:
2355
if (parent_id not in self._annotated_lines):
2356
# This parent is present, but another parent is missing
2357
self._pending_children.setdefault(parent_id,
2361
# This one is ready to be processed
2362
nodes_to_annotate.append(child)
2364
def _add_annotation(self, revision_id, fulltext, parent_ids,
2365
left_matching_blocks=None):
2366
"""Add an annotation entry.
2368
All parents should already have been annotated.
2369
:return: A list of children that now have their parents satisfied.
2371
a = self._annotated_lines
2372
annotated_parent_lines = [a[p] for p in parent_ids]
2373
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
2374
fulltext, revision_id, left_matching_blocks,
2375
heads_provider=self._get_heads_provider()))
2376
self._annotated_lines[revision_id] = annotated_lines
2377
for p in parent_ids:
2378
ann_children = self._annotate_children[p]
2379
ann_children.remove(revision_id)
2380
if (not ann_children
2381
and p not in self._nodes_to_keep_annotations):
2382
del self._annotated_lines[p]
2383
del self._all_build_details[p]
2384
if p in self._fulltext_contents:
2385
del self._fulltext_contents[p]
2386
# Now that we've added this one, see if there are any pending
2387
# deltas to be done, certainly this parent is finished
2388
nodes_to_annotate = []
2389
for child in self._pending_children.pop(revision_id, []):
2390
self._check_parents(child, nodes_to_annotate)
2391
return nodes_to_annotate
2393
def _get_build_graph(self, key):
2394
"""Get the graphs for building texts and annotations.
2396
The data you need for creating a full text may be different than the
2397
data you need to annotate that text. (At a minimum, you need both
2398
parents to create an annotation, but only need 1 parent to generate the
2401
:return: A list of (key, index_memo) records, suitable for
2402
passing to read_records_iter to start reading in the raw data fro/
2405
if key in self._annotated_lines:
2408
pending = set([key])
2413
# get all pending nodes
2415
this_iteration = pending
2416
build_details = self._knit._index.get_build_details(this_iteration)
2417
self._all_build_details.update(build_details)
2418
# new_nodes = self._knit._index._get_entries(this_iteration)
2420
for key, details in build_details.iteritems():
2421
(index_memo, compression_parent, parents,
2422
record_details) = details
2423
self._revision_id_graph[key] = parents
2424
records.append((key, index_memo))
2425
# Do we actually need to check _annotated_lines?
2426
pending.update(p for p in parents
2427
if p not in self._all_build_details)
2428
if compression_parent:
2429
self._compression_children.setdefault(compression_parent,
2432
for parent in parents:
2433
self._annotate_children.setdefault(parent,
2435
num_gens = generation - kept_generation
2436
if ((num_gens >= self._generations_until_keep)
2437
and len(parents) > 1):
2438
kept_generation = generation
2439
self._nodes_to_keep_annotations.add(key)
2441
missing_versions = this_iteration.difference(build_details.keys())
2442
self._ghosts.update(missing_versions)
2443
for missing_version in missing_versions:
2444
# add a key, no parents
2445
self._revision_id_graph[missing_version] = ()
2446
pending.discard(missing_version) # don't look for it
2447
if self._ghosts.intersection(self._compression_children):
2449
"We cannot have nodes which have a ghost compression parent:\n"
2451
"compression children: %r"
2452
% (self._ghosts, self._compression_children))
2453
# Cleanout anything that depends on a ghost so that we don't wait for
2454
# the ghost to show up
2455
for node in self._ghosts:
2456
if node in self._annotate_children:
2457
# We won't be building this node
2458
del self._annotate_children[node]
2459
# Generally we will want to read the records in reverse order, because
2460
# we find the parent nodes after the children
2464
def _annotate_records(self, records):
2465
"""Build the annotations for the listed records."""
2466
# We iterate in the order read, rather than a strict order requested
2467
# However, process what we can, and put off to the side things that
2468
# still need parents, cleaning them up when those parents are
2470
for (rev_id, record,
2471
digest) in self._knit._read_records_iter(records):
2472
if rev_id in self._annotated_lines:
2474
parent_ids = self._revision_id_graph[rev_id]
2475
parent_ids = [p for p in parent_ids if p not in self._ghosts]
2476
details = self._all_build_details[rev_id]
2477
(index_memo, compression_parent, parents,
2478
record_details) = details
2479
nodes_to_annotate = []
2480
# TODO: Remove the punning between compression parents, and
2481
# parent_ids, we should be able to do this without assuming
2483
if len(parent_ids) == 0:
2484
# There are no parents for this node, so just add it
2485
# TODO: This probably needs to be decoupled
2486
fulltext_content, delta = self._knit._factory.parse_record(
2487
rev_id, record, record_details, None)
2488
fulltext = self._add_fulltext_content(rev_id, fulltext_content)
2489
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
2490
parent_ids, left_matching_blocks=None))
2492
child = (rev_id, parent_ids, record)
2493
# Check if all the parents are present
2494
self._check_parents(child, nodes_to_annotate)
2495
while nodes_to_annotate:
2496
# Should we use a queue here instead of a stack?
2497
(rev_id, parent_ids, record) = nodes_to_annotate.pop()
2498
(index_memo, compression_parent, parents,
2499
record_details) = self._all_build_details[rev_id]
2500
if compression_parent is not None:
2501
comp_children = self._compression_children[compression_parent]
2502
if rev_id not in comp_children:
2503
raise AssertionError("%r not in compression children %r"
2504
% (rev_id, comp_children))
2505
# If there is only 1 child, it is safe to reuse this
2507
reuse_content = (len(comp_children) == 1
2508
and compression_parent not in
2509
self._nodes_to_keep_annotations)
2511
# Remove it from the cache since it will be changing
2512
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
2513
# Make sure to copy the fulltext since it might be
2515
parent_fulltext = list(parent_fulltext_content.text())
2517
parent_fulltext_content = self._fulltext_contents[compression_parent]
2518
parent_fulltext = parent_fulltext_content.text()
2519
comp_children.remove(rev_id)
2520
fulltext_content, delta = self._knit._factory.parse_record(
2521
rev_id, record, record_details,
2522
parent_fulltext_content,
2523
copy_base_content=(not reuse_content))
2524
fulltext = self._add_fulltext_content(rev_id,
2526
blocks = KnitContent.get_line_delta_blocks(delta,
2527
parent_fulltext, fulltext)
2529
fulltext_content = self._knit._factory.parse_fulltext(
2531
fulltext = self._add_fulltext_content(rev_id,
2534
nodes_to_annotate.extend(
2535
self._add_annotation(rev_id, fulltext, parent_ids,
2536
left_matching_blocks=blocks))
2538
def _get_heads_provider(self):
2539
"""Create a heads provider for resolving ancestry issues."""
2540
if self._heads_provider is not None:
2541
return self._heads_provider
2542
parent_provider = _mod_graph.DictParentsProvider(
2543
self._revision_id_graph)
2544
graph_obj = _mod_graph.Graph(parent_provider)
2545
head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
2546
self._heads_provider = head_cache
2549
def annotate(self, key):
2550
"""Return the annotated fulltext at the given key.
2552
:param key: The key to annotate.
2554
records = self._get_build_graph(key)
2555
if key in self._ghosts:
2556
raise errors.RevisionNotPresent(key, self._knit)
2557
self._annotate_records(records)
2558
return self._annotated_lines[key]
2562
from bzrlib._knit_load_data_c import _load_data_c as _load_data
2564
from bzrlib._knit_load_data_py import _load_data_py as _load_data