17
17
"""Core compression logic for compressing streams of related files."""
19
from __future__ import absolute_import
24
from ..lazy_import import lazy_import
25
lazy_import(globals(), """
37
from bzrlib.btree_index import BTreeBuilder
38
from bzrlib.lru_cache import LRUSizeCache
39
from bzrlib.tsort import topo_sort
40
from bzrlib.versionedfile import (
35
from breezy.bzr import (
41
from breezy.i18n import gettext
47
from .btree_index import BTreeBuilder
48
from ..lru_cache import LRUSizeCache
49
from ..sixish import (
55
from .versionedfile import (
42
58
AbsentContentFactory,
43
59
ChunkedContentFactory,
44
60
FulltextContentFactory,
61
VersionedFilesWithFallbacks,
48
64
# Minimum number of uncompressed bytes to try fetch at once when retrieving
49
65
# groupcompress blocks.
52
_USE_LZMA = False and (pylzma is not None)
68
# osutils.sha_string(b'')
69
_null_sha1 = b'da39a3ee5e6b4b0d3255bfef95601890afd80709'
54
# osutils.sha_string('')
55
_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
57
72
def sort_gc_optimal(parent_map):
58
73
"""Sort and group the keys in parent_map into groupcompress order.
79
94
for prefix in sorted(per_prefix_map):
80
present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))
95
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
81
96
return present_keys
99
class DecompressCorruption(errors.BzrError):
101
_fmt = "Corruption while decompressing repository file%(orig_error)s"
103
def __init__(self, orig_error=None):
104
if orig_error is not None:
105
self.orig_error = ", %s" % (orig_error,)
108
errors.BzrError.__init__(self)
84
111
# The max zlib window size is 32kB, so if we set 'max_size' output of the
85
112
# decompressor to the requested bytes + 32kB, then we should guarantee
86
113
# num_bytes coming out.
87
_ZLIB_DECOMP_WINDOW = 32*1024
114
_ZLIB_DECOMP_WINDOW = 32 * 1024
89
117
class GroupCompressBlock(object):
90
118
"""An object which maintains the internal structure of the compressed data.
132
160
# Expand the content if required
133
161
if self._content is None:
134
162
if self._content_chunks is not None:
135
self._content = ''.join(self._content_chunks)
163
self._content = b''.join(self._content_chunks)
136
164
self._content_chunks = None
137
165
if self._content is None:
138
if self._z_content is None:
166
# We join self._z_content_chunks here, because if we are
167
# decompressing, then it is *very* likely that we have a single
169
if self._z_content_chunks is None:
139
170
raise AssertionError('No content to decompress')
140
if self._z_content == '':
171
z_content = b''.join(self._z_content_chunks)
142
174
elif self._compressor_name == 'lzma':
143
175
# We don't do partial lzma decomp yet
144
self._content = pylzma.decompress(self._z_content)
177
self._content = pylzma.decompress(z_content)
145
178
elif self._compressor_name == 'zlib':
146
179
# Start a zlib decompressor
147
180
if num_bytes * 4 > self._content_length * 3:
148
181
# If we are requesting more that 3/4ths of the content,
149
182
# just extract the whole thing in a single pass
150
183
num_bytes = self._content_length
151
self._content = zlib.decompress(self._z_content)
184
self._content = zlib.decompress(z_content)
153
186
self._z_content_decompressor = zlib.decompressobj()
154
187
# Seed the decompressor with the uncompressed bytes, so
155
188
# that the rest of the code is simplified
156
189
self._content = self._z_content_decompressor.decompress(
157
self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)
190
z_content, num_bytes + _ZLIB_DECOMP_WINDOW)
158
191
if not self._z_content_decompressor.unconsumed_tail:
159
192
self._z_content_decompressor = None
197
230
# At present, we have 2 integers for the compressed and uncompressed
198
231
# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid
199
232
# checking too far, cap the search to 14 bytes.
200
pos2 = bytes.index('\n', pos, pos + 14)
201
self._z_content_length = int(bytes[pos:pos2])
203
pos2 = bytes.index('\n', pos, pos + 14)
204
self._content_length = int(bytes[pos:pos2])
206
if len(bytes) != (pos + self._z_content_length):
233
pos2 = data.index(b'\n', pos, pos + 14)
234
self._z_content_length = int(data[pos:pos2])
236
pos2 = data.index(b'\n', pos, pos + 14)
237
self._content_length = int(data[pos:pos2])
239
if len(data) != (pos + self._z_content_length):
207
240
# XXX: Define some GCCorrupt error ?
208
241
raise AssertionError('Invalid bytes: (%d) != %d + %d' %
209
(len(bytes), pos, self._z_content_length))
210
self._z_content = bytes[pos:]
242
(len(data), pos, self._z_content_length))
243
self._z_content_chunks = (data[pos:],)
246
def _z_content(self):
247
"""Return z_content_chunks as a simple string.
249
Meant only to be used by the test suite.
251
if self._z_content_chunks is not None:
252
return b''.join(self._z_content_chunks)
213
256
def from_bytes(cls, bytes):
215
if bytes[:6] not in cls.GCB_KNOWN_HEADERS:
259
if header not in cls.GCB_KNOWN_HEADERS:
216
260
raise ValueError('bytes did not start with any of %r'
217
261
% (cls.GCB_KNOWN_HEADERS,))
218
# XXX: why not testing the whole header ?
262
if header == cls.GCB_HEADER:
220
263
out._compressor_name = 'zlib'
221
elif bytes[4] == 'l':
264
elif header == cls.GCB_LZ_HEADER:
222
265
out._compressor_name = 'lzma'
224
raise ValueError('unknown compressor: %r' % (bytes,))
267
raise ValueError('unknown compressor: %r' % (header,))
225
268
out._parse_bytes(bytes, 6)
233
276
:return: The bytes for the content
235
278
if start == end == 0:
237
280
self._ensure_content(end)
238
281
# The bytes are 'f' or 'd' for the type, then a variable-length
239
282
# base128 integer for the content size, then the actual content
240
283
# We know that the variable-length integer won't be longer than 5
241
284
# bytes (it takes 5 bytes to encode 2^32)
242
c = self._content[start]
285
c = self._content[start:start + 1]
244
287
type = 'fulltext'
247
290
raise ValueError('Unknown content control code: %s'
250
293
content_len, len_len = decode_base128_int(
251
self._content[start + 1:start + 6])
294
self._content[start + 1:start + 6])
252
295
content_start = start + 1 + len_len
253
296
if end != content_start + content_len:
254
297
raise ValueError('end != len according to field header'
255
' %s != %s' % (end, content_start + content_len))
257
bytes = self._content[content_start:end]
259
bytes = apply_delta_to_source(self._content, content_start, end)
298
' %s != %s' % (end, content_start + content_len))
300
return [self._content[content_start:end]]
301
# Must be type delta as checked above
302
return [apply_delta_to_source(self._content, content_start, end)]
262
304
def set_chunked_content(self, content_chunks, length):
263
305
"""Set the content of this block to the given chunks."""
269
311
self._content_length = length
270
312
self._content_chunks = content_chunks
271
313
self._content = None
272
self._z_content = None
314
self._z_content_chunks = None
274
316
def set_content(self, content):
275
317
"""Set the content of this block."""
276
318
self._content_length = len(content)
277
319
self._content = content
278
self._z_content = None
280
def _create_z_content_using_lzma(self):
281
if self._content_chunks is not None:
282
self._content = ''.join(self._content_chunks)
283
self._content_chunks = None
284
if self._content is None:
285
raise AssertionError('Nothing to compress')
286
self._z_content = pylzma.compress(self._content)
287
self._z_content_length = len(self._z_content)
289
def _create_z_content_from_chunks(self):
320
self._z_content_chunks = None
322
def _create_z_content_from_chunks(self, chunks):
290
323
compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)
291
compressed_chunks = map(compressor.compress, self._content_chunks)
324
# Peak in this point is 1 fulltext, 1 compressed text, + zlib overhead
325
# (measured peak is maybe 30MB over the above...)
326
compressed_chunks = list(map(compressor.compress, chunks))
292
327
compressed_chunks.append(compressor.flush())
293
self._z_content = ''.join(compressed_chunks)
294
self._z_content_length = len(self._z_content)
328
# Ignore empty chunks
329
self._z_content_chunks = [c for c in compressed_chunks if c]
330
self._z_content_length = sum(map(len, self._z_content_chunks))
296
332
def _create_z_content(self):
297
if self._z_content is not None:
300
self._create_z_content_using_lzma()
333
if self._z_content_chunks is not None:
302
335
if self._content_chunks is not None:
303
self._create_z_content_from_chunks()
305
self._z_content = zlib.compress(self._content)
306
self._z_content_length = len(self._z_content)
336
chunks = self._content_chunks
338
chunks = (self._content,)
339
self._create_z_content_from_chunks(chunks)
342
"""Create the byte stream as a series of 'chunks'"""
343
self._create_z_content()
344
header = self.GCB_HEADER
345
chunks = [b'%s%d\n%d\n'
346
% (header, self._z_content_length, self._content_length),
348
chunks.extend(self._z_content_chunks)
349
total_len = sum(map(len, chunks))
350
return total_len, chunks
308
352
def to_bytes(self):
309
353
"""Encode the information into a byte stream."""
310
self._create_z_content()
312
header = self.GCB_LZ_HEADER
314
header = self.GCB_HEADER
316
'%d\n%d\n' % (self._z_content_length, self._content_length),
319
return ''.join(chunks)
354
total_len, chunks = self.to_chunks()
355
return b''.join(chunks)
321
357
def _dump(self, include_text=False):
322
358
"""Take this block, and spit out a human-readable structure.
334
370
while pos < self._content_length:
335
kind = self._content[pos]
371
kind = self._content[pos:pos + 1]
337
if kind not in ('f', 'd'):
373
if kind not in (b'f', b'd'):
338
374
raise ValueError('invalid kind character: %r' % (kind,))
339
375
content_len, len_len = decode_base128_int(
340
self._content[pos:pos + 5])
376
self._content[pos:pos + 5])
342
378
if content_len + pos > self._content_length:
343
379
raise ValueError('invalid content_len %d for record @ pos %d'
344
380
% (content_len, pos - len_len - 1))
345
if kind == 'f': # Fulltext
381
if kind == b'f': # Fulltext
347
text = self._content[pos:pos+content_len]
348
result.append(('f', content_len, text))
383
text = self._content[pos:pos + content_len]
384
result.append((b'f', content_len, text))
350
result.append(('f', content_len))
351
elif kind == 'd': # Delta
352
delta_content = self._content[pos:pos+content_len]
386
result.append((b'f', content_len))
387
elif kind == b'd': # Delta
388
delta_content = self._content[pos:pos + content_len]
354
390
# The first entry in a delta is the decompressed length
355
391
decomp_len, delta_pos = decode_base128_int(delta_content)
356
result.append(('d', content_len, decomp_len, delta_info))
392
result.append((b'd', content_len, decomp_len, delta_info))
358
394
while delta_pos < content_len:
359
c = ord(delta_content[delta_pos])
395
c = indexbytes(delta_content, delta_pos)
363
399
delta_pos) = decode_copy_instruction(delta_content, c,
366
text = self._content[offset:offset+length]
367
delta_info.append(('c', offset, length, text))
402
text = self._content[offset:offset + length]
403
delta_info.append((b'c', offset, length, text))
369
delta_info.append(('c', offset, length))
405
delta_info.append((b'c', offset, length))
370
406
measured_len += length
373
txt = delta_content[delta_pos:delta_pos+c]
409
txt = delta_content[delta_pos:delta_pos + c]
376
delta_info.append(('i', c, txt))
412
delta_info.append((b'i', c, txt))
377
413
measured_len += c
379
415
if delta_pos != content_len:
422
459
def __repr__(self):
423
460
return '%s(%s, first=%s)' % (self.__class__.__name__,
424
self.key, self._first)
461
self.key, self._first)
463
def _extract_bytes(self):
464
# Grab and cache the raw bytes for this entry
465
# and break the ref-cycle with _manager since we don't need it
468
self._manager._prepare_for_extract()
469
except zlib.error as value:
470
raise DecompressCorruption("zlib: " + str(value))
471
block = self._manager._block
472
self._chunks = block.extract(self.key, self._start, self._end)
473
# There are code paths that first extract as fulltext, and then
474
# extract as storage_kind (smart fetch). So we don't break the
475
# refcycle here, but instead in manager.get_record_stream()
426
477
def get_bytes_as(self, storage_kind):
427
478
if storage_kind == self.storage_kind:
429
480
# wire bytes, something...
430
481
return self._manager._wire_bytes()
433
if storage_kind in ('fulltext', 'chunked'):
434
if self._bytes is None:
435
# Grab and cache the raw bytes for this entry
436
# and break the ref-cycle with _manager since we don't need it
438
self._manager._prepare_for_extract()
439
block = self._manager._block
440
self._bytes = block.extract(self.key, self._start, self._end)
441
# There are code paths that first extract as fulltext, and then
442
# extract as storage_kind (smart fetch). So we don't break the
443
# refcycle here, but instead in manager.get_record_stream()
484
if storage_kind in ('fulltext', 'chunked', 'lines'):
485
if self._chunks is None:
486
self._extract_bytes()
444
487
if storage_kind == 'fulltext':
488
return b''.join(self._chunks)
489
elif storage_kind == 'chunked':
492
return osutils.chunks_to_lines(self._chunks)
493
raise errors.UnavailableRepresentation(self.key, storage_kind,
496
def iter_bytes_as(self, storage_kind):
497
if self._chunks is None:
498
self._extract_bytes()
499
if storage_kind == 'chunked':
500
return iter(self._chunks)
501
elif storage_kind == 'lines':
502
return iter(osutils.chunks_to_lines(self._chunks))
448
503
raise errors.UnavailableRepresentation(self.key, storage_kind,
449
504
self.storage_kind)
452
507
class _LazyGroupContentManager(object):
453
508
"""This manages a group of _LazyGroupCompressFactory objects."""
455
_max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of
456
# current size, and still be considered
458
_full_block_size = 4*1024*1024
459
_full_mixed_block_size = 2*1024*1024
460
_full_enough_block_size = 3*1024*1024 # size at which we won't repack
461
_full_enough_mixed_block_size = 2*768*1024 # 1.5MB
510
_max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of
511
# current size, and still be considered
513
_full_block_size = 4 * 1024 * 1024
514
_full_mixed_block_size = 2 * 1024 * 1024
515
_full_enough_block_size = 3 * 1024 * 1024 # size at which we won't repack
516
_full_enough_mixed_block_size = 2 * 768 * 1024 # 1.5MB
463
def __init__(self, block):
518
def __init__(self, block, get_compressor_settings=None):
464
519
self._block = block
465
520
# We need to preserve the ordering
466
521
self._factories = []
467
522
self._last_byte = 0
523
self._get_settings = get_compressor_settings
524
self._compressor_settings = None
526
def _get_compressor_settings(self):
527
if self._compressor_settings is not None:
528
return self._compressor_settings
530
if self._get_settings is not None:
531
settings = self._get_settings()
533
vf = GroupCompressVersionedFiles
534
settings = vf._DEFAULT_COMPRESSOR_SETTINGS
535
self._compressor_settings = settings
536
return self._compressor_settings
469
538
def add_factory(self, key, parents, start, end):
470
539
if not self._factories:
503
572
new_block.set_content(self._block._content[:last_byte])
504
573
self._block = new_block
575
def _make_group_compressor(self):
576
return GroupCompressor(self._get_compressor_settings())
506
578
def _rebuild_block(self):
507
579
"""Create a new GroupCompressBlock with only the referenced texts."""
508
compressor = GroupCompressor()
580
compressor = self._make_group_compressor()
509
581
tstart = time.time()
510
582
old_length = self._block._content_length
512
584
for factory in self._factories:
513
bytes = factory.get_bytes_as('fulltext')
585
chunks = factory.get_bytes_as('chunked')
586
chunks_len = factory.size
587
if chunks_len is None:
588
chunks_len = sum(map(len, chunks))
514
589
(found_sha1, start_point, end_point,
515
type) = compressor.compress(factory.key, bytes, factory.sha1)
590
type) = compressor.compress(
591
factory.key, chunks, chunks_len, factory.sha1)
516
592
# Now update this factory with the new offsets, etc
517
593
factory.sha1 = found_sha1
518
594
factory._start = start_point
662
743
# 1 line for end byte
663
744
header_lines = []
664
745
for factory in self._factories:
665
key_bytes = '\x00'.join(factory.key)
746
key_bytes = b'\x00'.join(factory.key)
666
747
parents = factory.parents
667
748
if parents is None:
668
parent_bytes = 'None:'
749
parent_bytes = b'None:'
670
parent_bytes = '\t'.join('\x00'.join(key) for key in parents)
671
record_header = '%s\n%s\n%d\n%d\n' % (
751
parent_bytes = b'\t'.join(b'\x00'.join(key) for key in parents)
752
record_header = b'%s\n%s\n%d\n%d\n' % (
672
753
key_bytes, parent_bytes, factory._start, factory._end)
673
754
header_lines.append(record_header)
674
755
# TODO: Can we break the refcycle at this point and set
675
756
# factory._manager = None?
676
header_bytes = ''.join(header_lines)
757
header_bytes = b''.join(header_lines)
678
759
header_bytes_len = len(header_bytes)
679
760
z_header_bytes = zlib.compress(header_bytes)
681
762
z_header_bytes_len = len(z_header_bytes)
682
block_bytes = self._block.to_bytes()
683
lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,
763
block_bytes_len, block_chunks = self._block.to_chunks()
764
lines.append(b'%d\n%d\n%d\n' % (
765
z_header_bytes_len, header_bytes_len, block_bytes_len))
685
766
lines.append(z_header_bytes)
686
lines.append(block_bytes)
687
del z_header_bytes, block_bytes
688
return ''.join(lines)
767
lines.extend(block_chunks)
768
del z_header_bytes, block_chunks
769
# TODO: This is a point where we will double the memory consumption. To
770
# avoid this, we probably have to switch to a 'chunked' api
771
return b''.join(lines)
691
774
def from_bytes(cls, bytes):
692
775
# TODO: This does extra string copying, probably better to do it a
776
# different way. At a minimum this creates 2 copies of the
694
778
(storage_kind, z_header_len, header_len,
695
block_len, rest) = bytes.split('\n', 4)
779
block_len, rest) = bytes.split(b'\n', 4)
697
if storage_kind != 'groupcompress-block':
781
if storage_kind != b'groupcompress-block':
698
782
raise ValueError('Unknown storage kind: %s' % (storage_kind,))
699
783
z_header_len = int(z_header_len)
700
784
if len(rest) < z_header_len:
723
807
block = GroupCompressBlock.from_bytes(block_bytes)
725
809
result = cls(block)
726
for start in xrange(0, len(header_lines), 4):
810
for start in range(0, len(header_lines), 4):
728
key = tuple(header_lines[start].split('\x00'))
729
parents_line = header_lines[start+1]
730
if parents_line == 'None:':
812
key = tuple(header_lines[start].split(b'\x00'))
813
parents_line = header_lines[start + 1]
814
if parents_line == b'None:':
733
parents = tuple([tuple(segment.split('\x00'))
734
for segment in parents_line.split('\t')
736
start_offset = int(header_lines[start+2])
737
end_offset = int(header_lines[start+3])
817
parents = tuple([tuple(segment.split(b'\x00'))
818
for segment in parents_line.split(b'\t')
820
start_offset = int(header_lines[start + 2])
821
end_offset = int(header_lines[start + 3])
738
822
result.add_factory(key, parents, start_offset, end_offset)
749
833
class _CommonGroupCompressor(object):
835
def __init__(self, settings=None):
752
836
"""Create a GroupCompressor."""
754
838
self._last = None
755
839
self.endpoint = 0
756
840
self.input_bytes = 0
757
841
self.labels_deltas = {}
758
self._delta_index = None # Set by the children
842
self._delta_index = None # Set by the children
759
843
self._block = GroupCompressBlock()
847
self._settings = settings
761
def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
849
def compress(self, key, chunks, length, expected_sha, nostore_sha=None,
762
851
"""Compress lines with label key.
764
853
:param key: A key tuple. It is stored in the output
765
854
for identification of the text during decompression. If the last
766
element is 'None' it is replaced with the sha1 of the text -
855
element is b'None' it is replaced with the sha1 of the text -
767
856
e.g. sha1:xxxxxxx.
768
:param bytes: The bytes to be compressed
857
:param chunks: Chunks of bytes to be compressed
858
:param length: Length of chunks
769
859
:param expected_sha: If non-None, the sha the lines are believed to
770
860
have. During compression the sha is calculated; a mismatch will
787
877
if expected_sha is not None:
788
878
sha1 = expected_sha
790
sha1 = osutils.sha_string(bytes)
880
sha1 = osutils.sha_strings(chunks)
791
881
if nostore_sha is not None:
792
882
if sha1 == nostore_sha:
793
883
raise errors.ExistingContent()
794
884
if key[-1] is None:
795
key = key[:-1] + ('sha1:' + sha1,)
885
key = key[:-1] + (b'sha1:' + sha1,)
797
start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)
887
start, end, type = self._compress(key, chunks, length, length / 2, soft)
798
888
return sha1, start, end, type
800
def _compress(self, key, bytes, max_delta_size, soft=False):
890
def _compress(self, key, chunks, input_len, max_delta_size, soft=False):
801
891
"""Compress lines with label key.
803
893
:param key: A key tuple. It is stored in the output for identification
804
894
of the text during decompression.
806
:param bytes: The bytes to be compressed
896
:param chunks: The chunks of bytes to be compressed
898
:param input_len: The length of the chunks
808
900
:param max_delta_size: The size above which we issue a fulltext instead
820
912
"""Extract a key previously added to the compressor.
822
914
:param key: The key to extract.
823
:return: An iterable over bytes and the sha1.
915
:return: An iterable over chunks and the sha1.
825
(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]
917
(start_byte, start_chunk, end_byte,
918
end_chunk) = self.labels_deltas[key]
826
919
delta_chunks = self.chunks[start_chunk:end_chunk]
827
stored_bytes = ''.join(delta_chunks)
828
if stored_bytes[0] == 'f':
920
stored_bytes = b''.join(delta_chunks)
921
kind = stored_bytes[:1]
829
923
fulltext_len, offset = decode_base128_int(stored_bytes[1:10])
830
924
data_len = fulltext_len + 1 + offset
831
if data_len != len(stored_bytes):
925
if data_len != len(stored_bytes):
832
926
raise ValueError('Index claimed fulltext len, but stored bytes'
833
927
' claim %s != %s'
834
928
% (len(stored_bytes), data_len))
835
bytes = stored_bytes[offset + 1:]
929
data = [stored_bytes[offset + 1:]]
932
raise ValueError('Unknown content kind, bytes claim %s' % kind)
837
933
# XXX: This is inefficient at best
838
source = ''.join(self.chunks[:start_chunk])
839
if stored_bytes[0] != 'd':
840
raise ValueError('Unknown content kind, bytes claim %s'
841
% (stored_bytes[0],))
934
source = b''.join(self.chunks[:start_chunk])
842
935
delta_len, offset = decode_base128_int(stored_bytes[1:10])
843
936
data_len = delta_len + 1 + offset
844
937
if data_len != len(stored_bytes):
845
938
raise ValueError('Index claimed delta len, but stored bytes'
846
939
' claim %s != %s'
847
940
% (len(stored_bytes), data_len))
848
bytes = apply_delta(source, stored_bytes[offset + 1:])
849
bytes_sha1 = osutils.sha_string(bytes)
850
return bytes, bytes_sha1
941
data = [apply_delta(source, stored_bytes[offset + 1:])]
942
data_sha1 = osutils.sha_strings(data)
943
return data, data_sha1
853
946
"""Finish this group, creating a formatted stream.
855
948
After calling this, the compressor should no longer be used
857
# TODO: this causes us to 'bloat' to 2x the size of content in the
858
# group. This has an impact for 'commit' of large objects.
859
# One possibility is to use self._content_chunks, and be lazy and
860
# only fill out self._content as a full string when we actually
861
# need it. That would at least drop the peak memory consumption
862
# for 'commit' down to ~1x the size of the largest file, at a
863
# cost of increased complexity within this code. 2x is still <<
864
# 3x the size of the largest file, so we are doing ok.
865
950
self._block.set_chunked_content(self.chunks, self.endpoint)
866
951
self.chunks = None
867
952
self._delta_index = None
886
971
class PythonGroupCompressor(_CommonGroupCompressor):
973
def __init__(self, settings=None):
889
974
"""Create a GroupCompressor.
891
976
Used only if the pyrex version is not available.
893
super(PythonGroupCompressor, self).__init__()
978
super(PythonGroupCompressor, self).__init__(settings)
894
979
self._delta_index = LinesDeltaIndex([])
895
980
# The actual content is managed by LinesDeltaIndex
896
981
self.chunks = self._delta_index.lines
898
def _compress(self, key, bytes, max_delta_size, soft=False):
983
def _compress(self, key, chunks, input_len, max_delta_size, soft=False):
899
984
"""see _CommonGroupCompressor._compress"""
900
input_len = len(bytes)
901
new_lines = osutils.split_lines(bytes)
985
new_lines = osutils.chunks_to_lines(chunks)
902
986
out_lines, index_lines = self._delta_index.make_delta(
903
987
new_lines, bytes_length=input_len, soft=soft)
904
988
delta_length = sum(map(len, out_lines))
905
989
if delta_length > max_delta_size:
906
990
# The delta is longer than the fulltext, insert a fulltext
907
991
type = 'fulltext'
908
out_lines = ['f', encode_base128_int(input_len)]
992
out_lines = [b'f', encode_base128_int(input_len)]
909
993
out_lines.extend(new_lines)
910
994
index_lines = [False, False]
911
995
index_lines.extend([True] * len(new_lines))
913
997
# this is a worthy delta, output it
916
1000
# Update the delta_length to include those two encoded integers
917
1001
out_lines[1] = encode_base128_int(delta_length)
918
1002
# Before insertion
934
1018
It contains code very similar to SequenceMatcher because of having a similar
935
1019
task. However some key differences apply:
936
- there is no junk, we want a minimal edit not a human readable diff.
937
- we don't filter very common lines (because we don't know where a good
938
range will start, and after the first text we want to be emitting minmal
940
- we chain the left side, not the right side
941
- we incrementally update the adjacency matrix as new lines are provided.
942
- we look for matches in all of the left side, so the routine which does
943
the analagous task of find_longest_match does not need to filter on the
1021
* there is no junk, we want a minimal edit not a human readable diff.
1022
* we don't filter very common lines (because we don't know where a good
1023
range will start, and after the first text we want to be emitting minmal
1025
* we chain the left side, not the right side
1026
* we incrementally update the adjacency matrix as new lines are provided.
1027
* we look for matches in all of the left side, so the routine which does
1028
the analagous task of find_longest_match does not need to filter on the
948
super(PyrexGroupCompressor, self).__init__()
949
self._delta_index = DeltaIndex()
1032
def __init__(self, settings=None):
1033
super(PyrexGroupCompressor, self).__init__(settings)
1034
max_bytes_to_index = self._settings.get('max_bytes_to_index', 0)
1035
self._delta_index = DeltaIndex(max_bytes_to_index=max_bytes_to_index)
951
def _compress(self, key, bytes, max_delta_size, soft=False):
1037
def _compress(self, key, chunks, input_len, max_delta_size, soft=False):
952
1038
"""see _CommonGroupCompressor._compress"""
953
input_len = len(bytes)
954
1039
# By having action/label/sha1/len, we can parse the group if the index
955
1040
# was ever destroyed, we have the key in 'label', we know the final
956
1041
# bytes are valid from sha1, and we know where to find the end of this
962
1047
# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]
963
1048
if self._delta_index._source_offset != self.endpoint:
964
1049
raise AssertionError('_source_offset != endpoint'
965
' somehow the DeltaIndex got out of sync with'
1050
' somehow the DeltaIndex got out of sync with'
1051
' the output lines')
1052
bytes = b''.join(chunks)
967
1053
delta = self._delta_index.make_delta(bytes, max_delta_size)
969
1055
type = 'fulltext'
970
enc_length = encode_base128_int(len(bytes))
1056
enc_length = encode_base128_int(input_len)
971
1057
len_mini_header = 1 + len(enc_length)
972
1058
self._delta_index.add_source(bytes, len_mini_header)
973
new_chunks = ['f', enc_length, bytes]
1059
new_chunks = [b'f', enc_length] + chunks
976
1062
enc_length = encode_base128_int(len(delta))
977
1063
len_mini_header = 1 + len(enc_length)
978
new_chunks = ['d', enc_length, delta]
1064
new_chunks = [b'd', enc_length, delta]
979
1065
self._delta_index.add_delta_source(delta, len_mini_header)
980
1066
# Before insertion
981
1067
start = self.endpoint
1022
1108
graph_index = BTreeBuilder(reference_lists=ref_length,
1023
key_elements=keylength)
1109
key_elements=keylength)
1024
1110
stream = transport.open_write_stream('newpack')
1025
1111
writer = pack.ContainerWriter(stream.write)
1027
index = _GCGraphIndex(graph_index, lambda:True, parents=parents,
1028
add_callback=graph_index.add_nodes,
1029
inconsistency_fatal=inconsistency_fatal)
1030
access = knit._DirectPackAccess({})
1113
index = _GCGraphIndex(graph_index, lambda: True, parents=parents,
1114
add_callback=graph_index.add_nodes,
1115
inconsistency_fatal=inconsistency_fatal)
1116
access = pack_repo._DirectPackAccess({})
1031
1117
access.set_writer(writer, graph_index, (transport, 'newpack'))
1032
1118
result = GroupCompressVersionedFiles(index, access, delta)
1033
1119
result.stream = stream
1149
1237
self.total_bytes = 0
1152
class GroupCompressVersionedFiles(VersionedFiles):
1240
class GroupCompressVersionedFiles(VersionedFilesWithFallbacks):
1153
1241
"""A group-compress based VersionedFiles implementation."""
1155
def __init__(self, index, access, delta=True, _unadded_refs=None):
1243
# This controls how the GroupCompress DeltaIndex works. Basically, we
1244
# compute hash pointers into the source blocks (so hash(text) => text).
1245
# However each of these references costs some memory in trade against a
1246
# more accurate match result. For very large files, they either are
1247
# pre-compressed and change in bulk whenever they change, or change in just
1248
# local blocks. Either way, 'improved resolution' is not very helpful,
1249
# versus running out of memory trying to track everything. The default max
1250
# gives 100% sampling of a 1MB file.
1251
_DEFAULT_MAX_BYTES_TO_INDEX = 1024 * 1024
1252
_DEFAULT_COMPRESSOR_SETTINGS = {'max_bytes_to_index':
1253
_DEFAULT_MAX_BYTES_TO_INDEX}
1255
def __init__(self, index, access, delta=True, _unadded_refs=None,
1156
1257
"""Create a GroupCompressVersionedFiles object.
1158
1259
:param index: The index object storing access and graph data.
1159
1260
:param access: The access object storing raw data.
1160
1261
:param delta: Whether to delta compress or just entropy compress.
1161
1262
:param _unadded_refs: private parameter, don't use.
1263
:param _group_cache: private parameter, don't use.
1163
1265
self._index = index
1164
1266
self._access = access
1166
1268
if _unadded_refs is None:
1167
1269
_unadded_refs = {}
1168
1270
self._unadded_refs = _unadded_refs
1169
self._group_cache = LRUSizeCache(max_size=50*1024*1024)
1170
self._fallback_vfs = []
1271
if _group_cache is None:
1272
_group_cache = LRUSizeCache(max_size=50 * 1024 * 1024)
1273
self._group_cache = _group_cache
1274
self._immediate_fallback_vfs = []
1275
self._max_bytes_to_index = None
1172
1277
def without_fallbacks(self):
1173
1278
"""Return a clone of this object without any fallbacks configured."""
1174
1279
return GroupCompressVersionedFiles(self._index, self._access,
1175
self._delta, _unadded_refs=dict(self._unadded_refs))
1280
self._delta, _unadded_refs=dict(
1281
self._unadded_refs),
1282
_group_cache=self._group_cache)
1177
1284
def add_lines(self, key, parents, lines, parent_texts=None,
1178
left_matching_blocks=None, nostore_sha=None, random_id=False,
1179
check_content=True):
1285
left_matching_blocks=None, nostore_sha=None, random_id=False,
1286
check_content=True):
1180
1287
"""Add a text to the store.
1182
1289
:param key: The key tuple of the text to add.
1183
1290
:param parents: The parents key tuples of the text to add.
1184
1291
:param lines: A list of lines. Each line must be a bytestring. And all
1185
of them except the last must be terminated with \n and contain no
1186
other \n's. The last line may either contain no \n's or a single
1187
terminating \n. If the lines list does meet this constraint the add
1188
routine may error or may succeed - but you will be unable to read
1189
the data back accurately. (Checking the lines have been split
1292
of them except the last must be terminated with \\n and contain no
1293
other \\n's. The last line may either contain no \\n's or a single
1294
terminating \\n. If the lines list does meet this constraint the
1295
add routine may error or may succeed - but you will be unable to
1296
read the data back accurately. (Checking the lines have been split
1190
1297
correctly is expensive and extremely unlikely to catch bugs so it
1191
1298
is not done at runtime unless check_content is True.)
1192
1299
:param parent_texts: An optional dictionary containing the opaque
1211
1318
back to future add_lines calls in the parent_texts dictionary.
1213
1320
self._index._check_write_ok()
1214
self._check_add(key, lines, random_id, check_content)
1216
# The caller might pass None if there is no graph data, but kndx
1217
# indexes can't directly store that, so we give them
1218
# an empty tuple instead.
1220
# double handling for now. Make it work until then.
1221
length = sum(map(len, lines))
1222
record = ChunkedContentFactory(key, parents, None, lines)
1223
sha1 = list(self._insert_record_stream([record], random_id=random_id,
1224
nostore_sha=nostore_sha))[0]
1225
return sha1, length, None
1227
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
1228
"""See VersionedFiles._add_text()."""
1322
self._check_lines_not_unicode(lines)
1323
self._check_lines_are_lines(lines)
1324
return self.add_content(
1325
ChunkedContentFactory(
1326
key, parents, osutils.sha_strings(lines), lines, chunks_are_lines=True),
1327
parent_texts, left_matching_blocks, nostore_sha, random_id)
1329
def add_content(self, factory, parent_texts=None,
1330
left_matching_blocks=None, nostore_sha=None,
1332
"""Add a text to the store.
1334
:param factory: A ContentFactory that can be used to retrieve the key,
1335
parents and contents.
1336
:param parent_texts: An optional dictionary containing the opaque
1337
representations of some or all of the parents of version_id to
1338
allow delta optimisations. VERY IMPORTANT: the texts must be those
1339
returned by add_lines or data corruption can be caused.
1340
:param left_matching_blocks: a hint about which areas are common
1341
between the text and its left-hand-parent. The format is
1342
the SequenceMatcher.get_matching_blocks format.
1343
:param nostore_sha: Raise ExistingContent and do not add the lines to
1344
the versioned file if the digest of the lines matches this.
1345
:param random_id: If True a random id has been selected rather than
1346
an id determined by some deterministic process such as a converter
1347
from a foreign VCS. When True the backend may choose not to check
1348
for uniqueness of the resulting key within the versioned file, so
1349
this should only be done when the result is expected to be unique
1351
:return: The text sha1, the number of bytes in the text, and an opaque
1352
representation of the inserted version which can be provided
1353
back to future add_lines calls in the parent_texts dictionary.
1229
1355
self._index._check_write_ok()
1230
self._check_add(key, None, random_id, check_content=False)
1231
if text.__class__ is not str:
1232
raise errors.BzrBadParameterUnicode("text")
1356
parents = factory.parents
1357
self._check_add(factory.key, random_id)
1233
1358
if parents is None:
1234
1359
# The caller might pass None if there is no graph data, but kndx
1235
1360
# indexes can't directly store that, so we give them
1236
1361
# an empty tuple instead.
1238
1363
# double handling for now. Make it work until then.
1240
record = FulltextContentFactory(key, parents, None, text)
1241
sha1 = list(self._insert_record_stream([record], random_id=random_id,
1242
nostore_sha=nostore_sha))[0]
1364
sha1, length = list(self._insert_record_stream(
1365
[factory], random_id=random_id, nostore_sha=nostore_sha))[0]
1243
1366
return sha1, length, None
1245
1368
def add_fallback_versioned_files(self, a_versioned_files):
1283
1407
# probably check that the existing content is identical to what is
1284
1408
# being inserted, and otherwise raise an exception. This would make
1285
1409
# the bundle code simpler.
1287
self._check_lines_not_unicode(lines)
1288
self._check_lines_are_lines(lines)
1290
def get_known_graph_ancestry(self, keys):
1291
"""Get a KnownGraph instance with the ancestry of keys."""
1292
# Note that this is identical to
1293
# KnitVersionedFiles.get_known_graph_ancestry, but they don't share
1295
parent_map, missing_keys = self._index.find_ancestry(keys)
1296
for fallback in self._fallback_vfs:
1297
if not missing_keys:
1299
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1301
parent_map.update(f_parent_map)
1302
missing_keys = f_missing_keys
1303
kg = _mod_graph.KnownGraph(parent_map)
1306
1411
def get_parent_map(self, keys):
1307
1412
"""Get a map of the graph parents of keys.
1534
1640
key_to_source_map)
1535
1641
elif ordering == 'as-requested':
1536
1642
source_keys = self._get_as_requested_source_keys(orig_keys,
1537
locations, unadded_keys, key_to_source_map)
1643
locations, unadded_keys, key_to_source_map)
1539
1645
# We want to yield the keys in a semi-optimal (read-wise) ordering.
1540
1646
# Otherwise we thrash the _group_cache and destroy performance
1541
1647
source_keys = self._get_io_ordered_source_keys(locations,
1542
unadded_keys, source_result)
1648
unadded_keys, source_result)
1543
1649
for key in missing:
1544
1650
yield AbsentContentFactory(key)
1545
1651
# Batch up as many keys as we can until either:
1546
1652
# - we encounter an unadded ref, or
1547
1653
# - we run out of keys, or
1548
1654
# - the total bytes to retrieve for this batch > BATCH_SIZE
1549
batcher = _BatchingBlockFetcher(self, locations)
1655
batcher = _BatchingBlockFetcher(self, locations,
1656
get_compressor_settings=self._get_compressor_settings)
1550
1657
for source, keys in source_keys:
1551
1658
if source is self:
1552
1659
for key in keys:
1595
1702
# test_insert_record_stream_existing_keys fail for groupcompress and
1596
1703
# groupcompress-nograph, this needs to be revisited while addressing
1597
1704
# 'bzr branch' performance issues.
1598
for _ in self._insert_record_stream(stream, random_id=False):
1705
for _, _ in self._insert_record_stream(stream, random_id=False):
1708
def _get_compressor_settings(self):
1709
if self._max_bytes_to_index is None:
1710
# TODO: VersionedFiles don't know about their containing
1711
# repository, so they don't have much of an idea about their
1712
# location. So for now, this is only a global option.
1713
c = config.GlobalConfig()
1714
val = c.get_user_option('bzr.groupcompress.max_bytes_to_index')
1718
except ValueError as e:
1719
trace.warning('Value for '
1720
'"bzr.groupcompress.max_bytes_to_index"'
1721
' %r is not an integer'
1725
val = self._DEFAULT_MAX_BYTES_TO_INDEX
1726
self._max_bytes_to_index = val
1727
return {'max_bytes_to_index': self._max_bytes_to_index}
1729
def _make_group_compressor(self):
1730
return GroupCompressor(self._get_compressor_settings())
1601
1732
def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,
1602
1733
reuse_blocks=True):
1603
1734
"""Internal core to insert a record stream into this container.
1627
1759
# This will go up to fulltexts for gc to gc fetching, which isn't
1629
self._compressor = GroupCompressor()
1761
self._compressor = self._make_group_compressor()
1630
1762
self._unadded_refs = {}
1631
1763
keys_to_add = []
1633
bytes = self._compressor.flush().to_bytes()
1634
self._compressor = GroupCompressor()
1635
index, start, length = self._access.add_raw_records(
1636
[(None, len(bytes))], bytes)[0]
1766
bytes_len, chunks = self._compressor.flush().to_chunks()
1767
self._compressor = self._make_group_compressor()
1768
# Note: At this point we still have 1 copy of the fulltext (in
1769
# record and the var 'bytes'), and this generates 2 copies of
1770
# the compressed text (one for bytes, one in chunks)
1771
# TODO: Figure out how to indicate that we would be happy to free
1772
# the fulltext content at this point. Note that sometimes we
1773
# will want it later (streaming CHK pages), but most of the
1774
# time we won't (everything else)
1775
index, start, length = self._access.add_raw_record(
1776
None, bytes_len, chunks)
1638
1778
for key, reads, refs in keys_to_add:
1639
nodes.append((key, "%d %d %s" % (start, length, reads), refs))
1779
nodes.append((key, b"%d %d %s" % (start, length, reads), refs))
1640
1780
self._index.add_records(nodes, random_id=random_id)
1641
1781
self._unadded_refs = {}
1642
1782
del keys_to_add[:]
1689
1828
raise AssertionError('No insert_manager set')
1690
1829
if insert_manager is not record._manager:
1691
1830
raise AssertionError('insert_manager does not match'
1692
' the current record, we cannot be positive'
1693
' that the appropriate content was inserted.'
1695
value = "%d %d %d %d" % (block_start, block_length,
1696
record._start, record._end)
1831
' the current record, we cannot be positive'
1832
' that the appropriate content was inserted.'
1834
value = b"%d %d %d %d" % (block_start, block_length,
1835
record._start, record._end)
1697
1836
nodes = [(record.key, value, (record.parents,))]
1698
1837
# TODO: Consider buffering up many nodes to be added, not
1699
1838
# sure how much overhead this has, but we're seeing
1701
1840
self._index.add_records(nodes, random_id=random_id)
1704
bytes = record.get_bytes_as('fulltext')
1843
chunks = record.get_bytes_as('chunked')
1705
1844
except errors.UnavailableRepresentation:
1706
adapter_key = record.storage_kind, 'fulltext'
1845
adapter_key = record.storage_kind, 'chunked'
1707
1846
adapter = get_adapter(adapter_key)
1708
bytes = adapter.get_bytes(record)
1847
chunks = adapter.get_bytes(record, 'chunked')
1848
chunks_len = record.size
1849
if chunks_len is None:
1850
chunks_len = sum(map(len, chunks))
1709
1851
if len(record.key) > 1:
1710
1852
prefix = record.key[0]
1711
1853
soft = (prefix == last_prefix)
1715
if max_fulltext_len < len(bytes):
1716
max_fulltext_len = len(bytes)
1857
if max_fulltext_len < chunks_len:
1858
max_fulltext_len = chunks_len
1717
1859
max_fulltext_prefix = prefix
1718
1860
(found_sha1, start_point, end_point,
1719
type) = self._compressor.compress(record.key,
1720
bytes, record.sha1, soft=soft,
1721
nostore_sha=nostore_sha)
1722
# delta_ratio = float(len(bytes)) / (end_point - start_point)
1861
type) = self._compressor.compress(
1862
record.key, chunks, chunks_len, record.sha1, soft=soft,
1863
nostore_sha=nostore_sha)
1864
# delta_ratio = float(chunks_len) / (end_point - start_point)
1723
1865
# Check if we want to continue to include that text
1724
1866
if (prefix == max_fulltext_prefix
1725
and end_point < 2 * max_fulltext_len):
1867
and end_point < 2 * max_fulltext_len):
1726
1868
# As long as we are on the same file_id, we will fill at least
1727
1869
# 2 * max_fulltext_len
1728
1870
start_new_block = False
1729
elif end_point > 4*1024*1024:
1871
elif end_point > 4 * 1024 * 1024:
1730
1872
start_new_block = True
1731
1873
elif (prefix is not None and prefix != last_prefix
1732
and end_point > 2*1024*1024):
1874
and end_point > 2 * 1024 * 1024):
1733
1875
start_new_block = True
1735
1877
start_new_block = False
1737
1879
if start_new_block:
1738
1880
self._compressor.pop_last()
1740
max_fulltext_len = len(bytes)
1882
max_fulltext_len = chunks_len
1741
1883
(found_sha1, start_point, end_point,
1742
type) = self._compressor.compress(record.key, bytes,
1884
type) = self._compressor.compress(
1885
record.key, chunks, chunks_len, record.sha1)
1744
1886
if record.key[-1] is None:
1745
key = record.key[:-1] + ('sha1:' + found_sha1,)
1887
key = record.key[:-1] + (b'sha1:' + found_sha1,)
1747
1889
key = record.key
1748
1890
self._unadded_refs[key] = record.parents
1891
yield found_sha1, chunks_len
1750
1892
as_st = static_tuple.StaticTuple.from_sequence
1751
1893
if record.parents is not None:
1752
1894
parents = as_st([as_st(p) for p in record.parents])
1755
1897
refs = static_tuple.StaticTuple(parents)
1756
keys_to_add.append((key, '%d %d' % (start_point, end_point), refs))
1899
(key, b'%d %d' % (start_point, end_point), refs))
1757
1900
if len(keys_to_add):
1759
1902
self._compressor = None
1785
1928
# but we need to setup a list of records to visit.
1786
1929
# we need key, position, length
1787
1930
for key_idx, record in enumerate(self.get_record_stream(keys,
1788
'unordered', True)):
1931
'unordered', True)):
1789
1932
# XXX: todo - optimise to use less than full texts.
1790
1933
key = record.key
1791
1934
if pb is not None:
1792
1935
pb.update('Walking content', key_idx, total)
1793
1936
if record.storage_kind == 'absent':
1794
1937
raise errors.RevisionNotPresent(key, self)
1795
lines = osutils.split_lines(record.get_bytes_as('fulltext'))
1938
for line in record.iter_bytes_as('lines'):
1797
1939
yield line, key
1798
1940
if pb is not None:
1799
1941
pb.update('Walking content', total, total)
1802
1944
"""See VersionedFiles.keys."""
1803
1945
if 'evil' in debug.debug_flags:
1804
1946
trace.mutter_callsite(2, "keys scales with size of history")
1805
sources = [self._index] + self._fallback_vfs
1947
sources = [self._index] + self._immediate_fallback_vfs
1807
1949
for source in sources:
1808
1950
result.update(source.keys())
1954
class _GCBuildDetails(object):
1955
"""A blob of data about the build details.
1957
This stores the minimal data, which then allows compatibility with the old
1958
api, without taking as much memory.
1961
__slots__ = ('_index', '_group_start', '_group_end', '_basis_end',
1962
'_delta_end', '_parents')
1965
compression_parent = None
1967
def __init__(self, parents, position_info):
1968
self._parents = parents
1969
(self._index, self._group_start, self._group_end, self._basis_end,
1970
self._delta_end) = position_info
1973
return '%s(%s, %s)' % (self.__class__.__name__,
1974
self.index_memo, self._parents)
1977
def index_memo(self):
1978
return (self._index, self._group_start, self._group_end,
1979
self._basis_end, self._delta_end)
1982
def record_details(self):
1983
return static_tuple.StaticTuple(self.method, None)
1985
def __getitem__(self, offset):
1986
"""Compatibility thunk to act like a tuple."""
1988
return self.index_memo
1990
return self.compression_parent # Always None
1992
return self._parents
1994
return self.record_details
1996
raise IndexError('offset out of range')
1812
2002
class _GCGraphIndex(object):
1813
2003
"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""
1815
2005
def __init__(self, graph_index, is_locked, parents=True,
1816
add_callback=None, track_external_parent_refs=False,
1817
inconsistency_fatal=True, track_new_keys=False):
2006
add_callback=None, track_external_parent_refs=False,
2007
inconsistency_fatal=True, track_new_keys=False):
1818
2008
"""Construct a _GCGraphIndex on a graph_index.
1820
:param graph_index: An implementation of bzrlib.index.GraphIndex.
2010
:param graph_index: An implementation of breezy.index.GraphIndex.
1821
2011
:param is_locked: A callback, returns True if the index is locked and
1823
2013
:param parents: If True, record knits parents, if not do not record
1989
2179
:param keys: An iterable of keys.
1990
2180
:return: A dict of key:
1991
2181
(index_memo, compression_parent, parents, record_details).
1993
opaque structure to pass to read_records to extract the raw
1996
Content that this record is built upon, may be None
1998
Logical parents of this node
2000
extra information about the content which needs to be passed to
2001
Factory.parse_record
2183
* index_memo: opaque structure to pass to read_records to extract
2185
* compression_parent: Content that this record is built upon, may
2187
* parents: Logical parents of this node
2188
* record_details: extra information about the content which needs
2189
to be passed to Factory.parse_record
2003
2191
self._check_read()