22
from ..lazy_import import lazy_import
23
lazy_import(globals(), """
37
from bzrlib.btree_index import BTreeBuilder
38
from bzrlib.lru_cache import LRUSizeCache
39
from bzrlib.tsort import topo_sort
40
from bzrlib.versionedfile import (
33
from breezy.bzr import (
39
from breezy.i18n import gettext
45
from .btree_index import BTreeBuilder
46
from ..lru_cache import LRUSizeCache
47
from .versionedfile import (
42
50
AbsentContentFactory,
43
51
ChunkedContentFactory,
44
53
FulltextContentFactory,
54
VersionedFilesWithFallbacks,
55
UnavailableRepresentation,
48
58
# Minimum number of uncompressed bytes to try fetch at once when retrieving
49
59
# groupcompress blocks.
52
_USE_LZMA = False and (pylzma is not None)
62
# osutils.sha_string(b'')
63
_null_sha1 = b'da39a3ee5e6b4b0d3255bfef95601890afd80709'
54
# osutils.sha_string('')
55
_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
57
66
def sort_gc_optimal(parent_map):
58
67
"""Sort and group the keys in parent_map into groupcompress order.
79
88
for prefix in sorted(per_prefix_map):
80
present_keys.extend(reversed(topo_sort(per_prefix_map[prefix])))
89
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
81
90
return present_keys
93
class DecompressCorruption(errors.BzrError):
95
_fmt = "Corruption while decompressing repository file%(orig_error)s"
97
def __init__(self, orig_error=None):
98
if orig_error is not None:
99
self.orig_error = ", %s" % (orig_error,)
102
errors.BzrError.__init__(self)
84
105
# The max zlib window size is 32kB, so if we set 'max_size' output of the
85
106
# decompressor to the requested bytes + 32kB, then we should guarantee
86
107
# num_bytes coming out.
87
_ZLIB_DECOMP_WINDOW = 32*1024
108
_ZLIB_DECOMP_WINDOW = 32 * 1024
89
111
class GroupCompressBlock(object):
90
112
"""An object which maintains the internal structure of the compressed data.
132
154
# Expand the content if required
133
155
if self._content is None:
134
156
if self._content_chunks is not None:
135
self._content = ''.join(self._content_chunks)
157
self._content = b''.join(self._content_chunks)
136
158
self._content_chunks = None
137
159
if self._content is None:
138
if self._z_content is None:
160
# We join self._z_content_chunks here, because if we are
161
# decompressing, then it is *very* likely that we have a single
163
if self._z_content_chunks is None:
139
164
raise AssertionError('No content to decompress')
140
if self._z_content == '':
165
z_content = b''.join(self._z_content_chunks)
142
168
elif self._compressor_name == 'lzma':
143
169
# We don't do partial lzma decomp yet
144
self._content = pylzma.decompress(self._z_content)
171
self._content = pylzma.decompress(z_content)
145
172
elif self._compressor_name == 'zlib':
146
173
# Start a zlib decompressor
147
174
if num_bytes * 4 > self._content_length * 3:
148
175
# If we are requesting more that 3/4ths of the content,
149
176
# just extract the whole thing in a single pass
150
177
num_bytes = self._content_length
151
self._content = zlib.decompress(self._z_content)
178
self._content = zlib.decompress(z_content)
153
180
self._z_content_decompressor = zlib.decompressobj()
154
181
# Seed the decompressor with the uncompressed bytes, so
155
182
# that the rest of the code is simplified
156
183
self._content = self._z_content_decompressor.decompress(
157
self._z_content, num_bytes + _ZLIB_DECOMP_WINDOW)
184
z_content, num_bytes + _ZLIB_DECOMP_WINDOW)
158
185
if not self._z_content_decompressor.unconsumed_tail:
159
186
self._z_content_decompressor = None
197
224
# At present, we have 2 integers for the compressed and uncompressed
198
225
# content. In base10 (ascii) 14 bytes can represent > 1TB, so to avoid
199
226
# checking too far, cap the search to 14 bytes.
200
pos2 = bytes.index('\n', pos, pos + 14)
201
self._z_content_length = int(bytes[pos:pos2])
203
pos2 = bytes.index('\n', pos, pos + 14)
204
self._content_length = int(bytes[pos:pos2])
206
if len(bytes) != (pos + self._z_content_length):
227
pos2 = data.index(b'\n', pos, pos + 14)
228
self._z_content_length = int(data[pos:pos2])
230
pos2 = data.index(b'\n', pos, pos + 14)
231
self._content_length = int(data[pos:pos2])
233
if len(data) != (pos + self._z_content_length):
207
234
# XXX: Define some GCCorrupt error ?
208
235
raise AssertionError('Invalid bytes: (%d) != %d + %d' %
209
(len(bytes), pos, self._z_content_length))
210
self._z_content = bytes[pos:]
236
(len(data), pos, self._z_content_length))
237
self._z_content_chunks = (data[pos:],)
240
def _z_content(self):
241
"""Return z_content_chunks as a simple string.
243
Meant only to be used by the test suite.
245
if self._z_content_chunks is not None:
246
return b''.join(self._z_content_chunks)
213
250
def from_bytes(cls, bytes):
215
if bytes[:6] not in cls.GCB_KNOWN_HEADERS:
253
if header not in cls.GCB_KNOWN_HEADERS:
216
254
raise ValueError('bytes did not start with any of %r'
217
255
% (cls.GCB_KNOWN_HEADERS,))
218
# XXX: why not testing the whole header ?
256
if header == cls.GCB_HEADER:
220
257
out._compressor_name = 'zlib'
221
elif bytes[4] == 'l':
258
elif header == cls.GCB_LZ_HEADER:
222
259
out._compressor_name = 'lzma'
224
raise ValueError('unknown compressor: %r' % (bytes,))
261
raise ValueError('unknown compressor: %r' % (header,))
225
262
out._parse_bytes(bytes, 6)
233
270
:return: The bytes for the content
235
272
if start == end == 0:
237
274
self._ensure_content(end)
238
275
# The bytes are 'f' or 'd' for the type, then a variable-length
239
276
# base128 integer for the content size, then the actual content
240
277
# We know that the variable-length integer won't be longer than 5
241
278
# bytes (it takes 5 bytes to encode 2^32)
242
c = self._content[start]
279
c = self._content[start:start + 1]
244
281
type = 'fulltext'
247
284
raise ValueError('Unknown content control code: %s'
250
287
content_len, len_len = decode_base128_int(
251
self._content[start + 1:start + 6])
288
self._content[start + 1:start + 6])
252
289
content_start = start + 1 + len_len
253
290
if end != content_start + content_len:
254
291
raise ValueError('end != len according to field header'
255
' %s != %s' % (end, content_start + content_len))
257
bytes = self._content[content_start:end]
259
bytes = apply_delta_to_source(self._content, content_start, end)
292
' %s != %s' % (end, content_start + content_len))
294
return [self._content[content_start:end]]
295
# Must be type delta as checked above
296
return [apply_delta_to_source(self._content, content_start, end)]
262
298
def set_chunked_content(self, content_chunks, length):
263
299
"""Set the content of this block to the given chunks."""
269
305
self._content_length = length
270
306
self._content_chunks = content_chunks
271
307
self._content = None
272
self._z_content = None
308
self._z_content_chunks = None
274
310
def set_content(self, content):
275
311
"""Set the content of this block."""
276
312
self._content_length = len(content)
277
313
self._content = content
278
self._z_content = None
280
def _create_z_content_using_lzma(self):
281
if self._content_chunks is not None:
282
self._content = ''.join(self._content_chunks)
283
self._content_chunks = None
284
if self._content is None:
285
raise AssertionError('Nothing to compress')
286
self._z_content = pylzma.compress(self._content)
287
self._z_content_length = len(self._z_content)
289
def _create_z_content_from_chunks(self):
314
self._z_content_chunks = None
316
def _create_z_content_from_chunks(self, chunks):
290
317
compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)
291
compressed_chunks = map(compressor.compress, self._content_chunks)
318
# Peak in this point is 1 fulltext, 1 compressed text, + zlib overhead
319
# (measured peak is maybe 30MB over the above...)
320
compressed_chunks = list(map(compressor.compress, chunks))
292
321
compressed_chunks.append(compressor.flush())
293
self._z_content = ''.join(compressed_chunks)
294
self._z_content_length = len(self._z_content)
322
# Ignore empty chunks
323
self._z_content_chunks = [c for c in compressed_chunks if c]
324
self._z_content_length = sum(map(len, self._z_content_chunks))
296
326
def _create_z_content(self):
297
if self._z_content is not None:
300
self._create_z_content_using_lzma()
327
if self._z_content_chunks is not None:
302
329
if self._content_chunks is not None:
303
self._create_z_content_from_chunks()
305
self._z_content = zlib.compress(self._content)
306
self._z_content_length = len(self._z_content)
330
chunks = self._content_chunks
332
chunks = (self._content,)
333
self._create_z_content_from_chunks(chunks)
336
"""Create the byte stream as a series of 'chunks'"""
337
self._create_z_content()
338
header = self.GCB_HEADER
339
chunks = [b'%s%d\n%d\n'
340
% (header, self._z_content_length, self._content_length),
342
chunks.extend(self._z_content_chunks)
343
total_len = sum(map(len, chunks))
344
return total_len, chunks
308
346
def to_bytes(self):
309
347
"""Encode the information into a byte stream."""
310
self._create_z_content()
312
header = self.GCB_LZ_HEADER
314
header = self.GCB_HEADER
316
'%d\n%d\n' % (self._z_content_length, self._content_length),
319
return ''.join(chunks)
348
total_len, chunks = self.to_chunks()
349
return b''.join(chunks)
321
351
def _dump(self, include_text=False):
322
352
"""Take this block, and spit out a human-readable structure.
334
364
while pos < self._content_length:
335
kind = self._content[pos]
365
kind = self._content[pos:pos + 1]
337
if kind not in ('f', 'd'):
367
if kind not in (b'f', b'd'):
338
368
raise ValueError('invalid kind character: %r' % (kind,))
339
369
content_len, len_len = decode_base128_int(
340
self._content[pos:pos + 5])
370
self._content[pos:pos + 5])
342
372
if content_len + pos > self._content_length:
343
373
raise ValueError('invalid content_len %d for record @ pos %d'
344
374
% (content_len, pos - len_len - 1))
345
if kind == 'f': # Fulltext
375
if kind == b'f': # Fulltext
347
text = self._content[pos:pos+content_len]
348
result.append(('f', content_len, text))
377
text = self._content[pos:pos + content_len]
378
result.append((b'f', content_len, text))
350
result.append(('f', content_len))
351
elif kind == 'd': # Delta
352
delta_content = self._content[pos:pos+content_len]
380
result.append((b'f', content_len))
381
elif kind == b'd': # Delta
382
delta_content = self._content[pos:pos + content_len]
354
384
# The first entry in a delta is the decompressed length
355
385
decomp_len, delta_pos = decode_base128_int(delta_content)
356
result.append(('d', content_len, decomp_len, delta_info))
386
result.append((b'd', content_len, decomp_len, delta_info))
358
388
while delta_pos < content_len:
359
c = ord(delta_content[delta_pos])
389
c = delta_content[delta_pos]
363
393
delta_pos) = decode_copy_instruction(delta_content, c,
366
text = self._content[offset:offset+length]
367
delta_info.append(('c', offset, length, text))
396
text = self._content[offset:offset + length]
397
delta_info.append((b'c', offset, length, text))
369
delta_info.append(('c', offset, length))
399
delta_info.append((b'c', offset, length))
370
400
measured_len += length
373
txt = delta_content[delta_pos:delta_pos+c]
403
txt = delta_content[delta_pos:delta_pos + c]
376
delta_info.append(('i', c, txt))
406
delta_info.append((b'i', c, txt))
377
407
measured_len += c
379
409
if delta_pos != content_len:
422
453
def __repr__(self):
423
454
return '%s(%s, first=%s)' % (self.__class__.__name__,
424
self.key, self._first)
455
self.key, self._first)
457
def _extract_bytes(self):
458
# Grab and cache the raw bytes for this entry
459
# and break the ref-cycle with _manager since we don't need it
462
self._manager._prepare_for_extract()
463
except zlib.error as value:
464
raise DecompressCorruption("zlib: " + str(value))
465
block = self._manager._block
466
self._chunks = block.extract(self.key, self._start, self._end)
467
# There are code paths that first extract as fulltext, and then
468
# extract as storage_kind (smart fetch). So we don't break the
469
# refcycle here, but instead in manager.get_record_stream()
426
471
def get_bytes_as(self, storage_kind):
427
472
if storage_kind == self.storage_kind:
429
474
# wire bytes, something...
430
475
return self._manager._wire_bytes()
433
if storage_kind in ('fulltext', 'chunked'):
434
if self._bytes is None:
435
# Grab and cache the raw bytes for this entry
436
# and break the ref-cycle with _manager since we don't need it
438
self._manager._prepare_for_extract()
439
block = self._manager._block
440
self._bytes = block.extract(self.key, self._start, self._end)
441
# There are code paths that first extract as fulltext, and then
442
# extract as storage_kind (smart fetch). So we don't break the
443
# refcycle here, but instead in manager.get_record_stream()
478
if storage_kind in ('fulltext', 'chunked', 'lines'):
479
if self._chunks is None:
480
self._extract_bytes()
444
481
if storage_kind == 'fulltext':
482
return b''.join(self._chunks)
483
elif storage_kind == 'chunked':
448
raise errors.UnavailableRepresentation(self.key, storage_kind,
486
return osutils.chunks_to_lines(self._chunks)
487
raise UnavailableRepresentation(self.key, storage_kind,
490
def iter_bytes_as(self, storage_kind):
491
if self._chunks is None:
492
self._extract_bytes()
493
if storage_kind == 'chunked':
494
return iter(self._chunks)
495
elif storage_kind == 'lines':
496
return iter(osutils.chunks_to_lines(self._chunks))
497
raise UnavailableRepresentation(self.key, storage_kind,
449
498
self.storage_kind)
452
501
class _LazyGroupContentManager(object):
453
502
"""This manages a group of _LazyGroupCompressFactory objects."""
455
_max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of
456
# current size, and still be considered
458
_full_block_size = 4*1024*1024
459
_full_mixed_block_size = 2*1024*1024
460
_full_enough_block_size = 3*1024*1024 # size at which we won't repack
461
_full_enough_mixed_block_size = 2*768*1024 # 1.5MB
504
_max_cut_fraction = 0.75 # We allow a block to be trimmed to 75% of
505
# current size, and still be considered
507
_full_block_size = 4 * 1024 * 1024
508
_full_mixed_block_size = 2 * 1024 * 1024
509
_full_enough_block_size = 3 * 1024 * 1024 # size at which we won't repack
510
_full_enough_mixed_block_size = 2 * 768 * 1024 # 1.5MB
463
def __init__(self, block):
512
def __init__(self, block, get_compressor_settings=None):
464
513
self._block = block
465
514
# We need to preserve the ordering
466
515
self._factories = []
467
516
self._last_byte = 0
517
self._get_settings = get_compressor_settings
518
self._compressor_settings = None
520
def _get_compressor_settings(self):
521
if self._compressor_settings is not None:
522
return self._compressor_settings
524
if self._get_settings is not None:
525
settings = self._get_settings()
527
vf = GroupCompressVersionedFiles
528
settings = vf._DEFAULT_COMPRESSOR_SETTINGS
529
self._compressor_settings = settings
530
return self._compressor_settings
469
532
def add_factory(self, key, parents, start, end):
470
533
if not self._factories:
503
566
new_block.set_content(self._block._content[:last_byte])
504
567
self._block = new_block
569
def _make_group_compressor(self):
570
return GroupCompressor(self._get_compressor_settings())
506
572
def _rebuild_block(self):
507
573
"""Create a new GroupCompressBlock with only the referenced texts."""
508
compressor = GroupCompressor()
574
compressor = self._make_group_compressor()
509
575
tstart = time.time()
510
576
old_length = self._block._content_length
512
578
for factory in self._factories:
513
bytes = factory.get_bytes_as('fulltext')
579
chunks = factory.get_bytes_as('chunked')
580
chunks_len = factory.size
581
if chunks_len is None:
582
chunks_len = sum(map(len, chunks))
514
583
(found_sha1, start_point, end_point,
515
type) = compressor.compress(factory.key, bytes, factory.sha1)
584
type) = compressor.compress(
585
factory.key, chunks, chunks_len, factory.sha1)
516
586
# Now update this factory with the new offsets, etc
517
587
factory.sha1 = found_sha1
518
588
factory._start = start_point
662
737
# 1 line for end byte
663
738
header_lines = []
664
739
for factory in self._factories:
665
key_bytes = '\x00'.join(factory.key)
740
key_bytes = b'\x00'.join(factory.key)
666
741
parents = factory.parents
667
742
if parents is None:
668
parent_bytes = 'None:'
743
parent_bytes = b'None:'
670
parent_bytes = '\t'.join('\x00'.join(key) for key in parents)
671
record_header = '%s\n%s\n%d\n%d\n' % (
745
parent_bytes = b'\t'.join(b'\x00'.join(key) for key in parents)
746
record_header = b'%s\n%s\n%d\n%d\n' % (
672
747
key_bytes, parent_bytes, factory._start, factory._end)
673
748
header_lines.append(record_header)
674
749
# TODO: Can we break the refcycle at this point and set
675
750
# factory._manager = None?
676
header_bytes = ''.join(header_lines)
751
header_bytes = b''.join(header_lines)
678
753
header_bytes_len = len(header_bytes)
679
754
z_header_bytes = zlib.compress(header_bytes)
681
756
z_header_bytes_len = len(z_header_bytes)
682
block_bytes = self._block.to_bytes()
683
lines.append('%d\n%d\n%d\n' % (z_header_bytes_len, header_bytes_len,
757
block_bytes_len, block_chunks = self._block.to_chunks()
758
lines.append(b'%d\n%d\n%d\n' % (
759
z_header_bytes_len, header_bytes_len, block_bytes_len))
685
760
lines.append(z_header_bytes)
686
lines.append(block_bytes)
687
del z_header_bytes, block_bytes
688
return ''.join(lines)
761
lines.extend(block_chunks)
762
del z_header_bytes, block_chunks
763
# TODO: This is a point where we will double the memory consumption. To
764
# avoid this, we probably have to switch to a 'chunked' api
765
return b''.join(lines)
691
768
def from_bytes(cls, bytes):
692
769
# TODO: This does extra string copying, probably better to do it a
770
# different way. At a minimum this creates 2 copies of the
694
772
(storage_kind, z_header_len, header_len,
695
block_len, rest) = bytes.split('\n', 4)
773
block_len, rest) = bytes.split(b'\n', 4)
697
if storage_kind != 'groupcompress-block':
775
if storage_kind != b'groupcompress-block':
698
776
raise ValueError('Unknown storage kind: %s' % (storage_kind,))
699
777
z_header_len = int(z_header_len)
700
778
if len(rest) < z_header_len:
723
801
block = GroupCompressBlock.from_bytes(block_bytes)
725
803
result = cls(block)
726
for start in xrange(0, len(header_lines), 4):
804
for start in range(0, len(header_lines), 4):
728
key = tuple(header_lines[start].split('\x00'))
729
parents_line = header_lines[start+1]
730
if parents_line == 'None:':
806
key = tuple(header_lines[start].split(b'\x00'))
807
parents_line = header_lines[start + 1]
808
if parents_line == b'None:':
733
parents = tuple([tuple(segment.split('\x00'))
734
for segment in parents_line.split('\t')
736
start_offset = int(header_lines[start+2])
737
end_offset = int(header_lines[start+3])
811
parents = tuple([tuple(segment.split(b'\x00'))
812
for segment in parents_line.split(b'\t')
814
start_offset = int(header_lines[start + 2])
815
end_offset = int(header_lines[start + 3])
738
816
result.add_factory(key, parents, start_offset, end_offset)
749
827
class _CommonGroupCompressor(object):
829
def __init__(self, settings=None):
752
830
"""Create a GroupCompressor."""
754
832
self._last = None
755
833
self.endpoint = 0
756
834
self.input_bytes = 0
757
835
self.labels_deltas = {}
758
self._delta_index = None # Set by the children
836
self._delta_index = None # Set by the children
759
837
self._block = GroupCompressBlock()
841
self._settings = settings
761
def compress(self, key, bytes, expected_sha, nostore_sha=None, soft=False):
843
def compress(self, key, chunks, length, expected_sha, nostore_sha=None,
762
845
"""Compress lines with label key.
764
847
:param key: A key tuple. It is stored in the output
765
848
for identification of the text during decompression. If the last
766
element is 'None' it is replaced with the sha1 of the text -
849
element is b'None' it is replaced with the sha1 of the text -
767
850
e.g. sha1:xxxxxxx.
768
:param bytes: The bytes to be compressed
851
:param chunks: Chunks of bytes to be compressed
852
:param length: Length of chunks
769
853
:param expected_sha: If non-None, the sha the lines are believed to
770
854
have. During compression the sha is calculated; a mismatch will
780
864
:seealso VersionedFiles.add_lines:
782
if not bytes: # empty, like a dir entry, etc
866
if length == 0: # empty, like a dir entry, etc
783
867
if nostore_sha == _null_sha1:
784
raise errors.ExistingContent()
868
raise ExistingContent()
785
869
return _null_sha1, 0, 0, 'fulltext'
786
870
# we assume someone knew what they were doing when they passed it in
787
871
if expected_sha is not None:
788
872
sha1 = expected_sha
790
sha1 = osutils.sha_string(bytes)
874
sha1 = osutils.sha_strings(chunks)
791
875
if nostore_sha is not None:
792
876
if sha1 == nostore_sha:
793
raise errors.ExistingContent()
877
raise ExistingContent()
794
878
if key[-1] is None:
795
key = key[:-1] + ('sha1:' + sha1,)
879
key = key[:-1] + (b'sha1:' + sha1,)
797
start, end, type = self._compress(key, bytes, len(bytes) / 2, soft)
881
start, end, type = self._compress(key, chunks, length, length / 2, soft)
798
882
return sha1, start, end, type
800
def _compress(self, key, bytes, max_delta_size, soft=False):
884
def _compress(self, key, chunks, input_len, max_delta_size, soft=False):
801
885
"""Compress lines with label key.
803
887
:param key: A key tuple. It is stored in the output for identification
804
888
of the text during decompression.
806
:param bytes: The bytes to be compressed
890
:param chunks: The chunks of bytes to be compressed
892
:param input_len: The length of the chunks
808
894
:param max_delta_size: The size above which we issue a fulltext instead
820
906
"""Extract a key previously added to the compressor.
822
908
:param key: The key to extract.
823
:return: An iterable over bytes and the sha1.
909
:return: An iterable over chunks and the sha1.
825
(start_byte, start_chunk, end_byte, end_chunk) = self.labels_deltas[key]
911
(start_byte, start_chunk, end_byte,
912
end_chunk) = self.labels_deltas[key]
826
913
delta_chunks = self.chunks[start_chunk:end_chunk]
827
stored_bytes = ''.join(delta_chunks)
828
if stored_bytes[0] == 'f':
914
stored_bytes = b''.join(delta_chunks)
915
kind = stored_bytes[:1]
829
917
fulltext_len, offset = decode_base128_int(stored_bytes[1:10])
830
918
data_len = fulltext_len + 1 + offset
831
if data_len != len(stored_bytes):
919
if data_len != len(stored_bytes):
832
920
raise ValueError('Index claimed fulltext len, but stored bytes'
833
921
' claim %s != %s'
834
922
% (len(stored_bytes), data_len))
835
bytes = stored_bytes[offset + 1:]
923
data = [stored_bytes[offset + 1:]]
926
raise ValueError('Unknown content kind, bytes claim %s' % kind)
837
927
# XXX: This is inefficient at best
838
source = ''.join(self.chunks[:start_chunk])
839
if stored_bytes[0] != 'd':
840
raise ValueError('Unknown content kind, bytes claim %s'
841
% (stored_bytes[0],))
928
source = b''.join(self.chunks[:start_chunk])
842
929
delta_len, offset = decode_base128_int(stored_bytes[1:10])
843
930
data_len = delta_len + 1 + offset
844
931
if data_len != len(stored_bytes):
845
932
raise ValueError('Index claimed delta len, but stored bytes'
846
933
' claim %s != %s'
847
934
% (len(stored_bytes), data_len))
848
bytes = apply_delta(source, stored_bytes[offset + 1:])
849
bytes_sha1 = osutils.sha_string(bytes)
850
return bytes, bytes_sha1
935
data = [apply_delta(source, stored_bytes[offset + 1:])]
936
data_sha1 = osutils.sha_strings(data)
937
return data, data_sha1
853
940
"""Finish this group, creating a formatted stream.
855
942
After calling this, the compressor should no longer be used
857
# TODO: this causes us to 'bloat' to 2x the size of content in the
858
# group. This has an impact for 'commit' of large objects.
859
# One possibility is to use self._content_chunks, and be lazy and
860
# only fill out self._content as a full string when we actually
861
# need it. That would at least drop the peak memory consumption
862
# for 'commit' down to ~1x the size of the largest file, at a
863
# cost of increased complexity within this code. 2x is still <<
864
# 3x the size of the largest file, so we are doing ok.
865
944
self._block.set_chunked_content(self.chunks, self.endpoint)
866
945
self.chunks = None
867
946
self._delta_index = None
886
965
class PythonGroupCompressor(_CommonGroupCompressor):
967
def __init__(self, settings=None):
889
968
"""Create a GroupCompressor.
891
970
Used only if the pyrex version is not available.
893
super(PythonGroupCompressor, self).__init__()
972
super(PythonGroupCompressor, self).__init__(settings)
894
973
self._delta_index = LinesDeltaIndex([])
895
974
# The actual content is managed by LinesDeltaIndex
896
975
self.chunks = self._delta_index.lines
898
def _compress(self, key, bytes, max_delta_size, soft=False):
977
def _compress(self, key, chunks, input_len, max_delta_size, soft=False):
899
978
"""see _CommonGroupCompressor._compress"""
900
input_len = len(bytes)
901
new_lines = osutils.split_lines(bytes)
979
new_lines = osutils.chunks_to_lines(chunks)
902
980
out_lines, index_lines = self._delta_index.make_delta(
903
981
new_lines, bytes_length=input_len, soft=soft)
904
982
delta_length = sum(map(len, out_lines))
905
983
if delta_length > max_delta_size:
906
984
# The delta is longer than the fulltext, insert a fulltext
907
985
type = 'fulltext'
908
out_lines = ['f', encode_base128_int(input_len)]
986
out_lines = [b'f', encode_base128_int(input_len)]
909
987
out_lines.extend(new_lines)
910
988
index_lines = [False, False]
911
989
index_lines.extend([True] * len(new_lines))
913
991
# this is a worthy delta, output it
916
994
# Update the delta_length to include those two encoded integers
917
995
out_lines[1] = encode_base128_int(delta_length)
918
996
# Before insertion
934
1012
It contains code very similar to SequenceMatcher because of having a similar
935
1013
task. However some key differences apply:
936
- there is no junk, we want a minimal edit not a human readable diff.
937
- we don't filter very common lines (because we don't know where a good
938
range will start, and after the first text we want to be emitting minmal
940
- we chain the left side, not the right side
941
- we incrementally update the adjacency matrix as new lines are provided.
942
- we look for matches in all of the left side, so the routine which does
943
the analagous task of find_longest_match does not need to filter on the
1015
* there is no junk, we want a minimal edit not a human readable diff.
1016
* we don't filter very common lines (because we don't know where a good
1017
range will start, and after the first text we want to be emitting minmal
1019
* we chain the left side, not the right side
1020
* we incrementally update the adjacency matrix as new lines are provided.
1021
* we look for matches in all of the left side, so the routine which does
1022
the analagous task of find_longest_match does not need to filter on the
948
super(PyrexGroupCompressor, self).__init__()
949
self._delta_index = DeltaIndex()
1026
def __init__(self, settings=None):
1027
super(PyrexGroupCompressor, self).__init__(settings)
1028
max_bytes_to_index = self._settings.get('max_bytes_to_index', 0)
1029
self._delta_index = DeltaIndex(max_bytes_to_index=max_bytes_to_index)
951
def _compress(self, key, bytes, max_delta_size, soft=False):
1031
def _compress(self, key, chunks, input_len, max_delta_size, soft=False):
952
1032
"""see _CommonGroupCompressor._compress"""
953
input_len = len(bytes)
954
1033
# By having action/label/sha1/len, we can parse the group if the index
955
1034
# was ever destroyed, we have the key in 'label', we know the final
956
1035
# bytes are valid from sha1, and we know where to find the end of this
962
1041
# new_chunks = ['label:%s\nsha1:%s\n' % (label, sha1)]
963
1042
if self._delta_index._source_offset != self.endpoint:
964
1043
raise AssertionError('_source_offset != endpoint'
965
' somehow the DeltaIndex got out of sync with'
1044
' somehow the DeltaIndex got out of sync with'
1045
' the output lines')
1046
bytes = b''.join(chunks)
967
1047
delta = self._delta_index.make_delta(bytes, max_delta_size)
969
1049
type = 'fulltext'
970
enc_length = encode_base128_int(len(bytes))
1050
enc_length = encode_base128_int(input_len)
971
1051
len_mini_header = 1 + len(enc_length)
972
1052
self._delta_index.add_source(bytes, len_mini_header)
973
new_chunks = ['f', enc_length, bytes]
1053
new_chunks = [b'f', enc_length] + chunks
976
1056
enc_length = encode_base128_int(len(delta))
977
1057
len_mini_header = 1 + len(enc_length)
978
new_chunks = ['d', enc_length, delta]
1058
new_chunks = [b'd', enc_length, delta]
979
1059
self._delta_index.add_delta_source(delta, len_mini_header)
980
1060
# Before insertion
981
1061
start = self.endpoint
1022
1102
graph_index = BTreeBuilder(reference_lists=ref_length,
1023
key_elements=keylength)
1103
key_elements=keylength)
1024
1104
stream = transport.open_write_stream('newpack')
1025
1105
writer = pack.ContainerWriter(stream.write)
1027
index = _GCGraphIndex(graph_index, lambda:True, parents=parents,
1028
add_callback=graph_index.add_nodes,
1029
inconsistency_fatal=inconsistency_fatal)
1030
access = knit._DirectPackAccess({})
1107
index = _GCGraphIndex(graph_index, lambda: True, parents=parents,
1108
add_callback=graph_index.add_nodes,
1109
inconsistency_fatal=inconsistency_fatal)
1110
access = pack_repo._DirectPackAccess({})
1031
1111
access.set_writer(writer, graph_index, (transport, 'newpack'))
1032
1112
result = GroupCompressVersionedFiles(index, access, delta)
1033
1113
result.stream = stream
1149
1231
self.total_bytes = 0
1152
class GroupCompressVersionedFiles(VersionedFiles):
1234
class GroupCompressVersionedFiles(VersionedFilesWithFallbacks):
1153
1235
"""A group-compress based VersionedFiles implementation."""
1155
def __init__(self, index, access, delta=True, _unadded_refs=None):
1237
# This controls how the GroupCompress DeltaIndex works. Basically, we
1238
# compute hash pointers into the source blocks (so hash(text) => text).
1239
# However each of these references costs some memory in trade against a
1240
# more accurate match result. For very large files, they either are
1241
# pre-compressed and change in bulk whenever they change, or change in just
1242
# local blocks. Either way, 'improved resolution' is not very helpful,
1243
# versus running out of memory trying to track everything. The default max
1244
# gives 100% sampling of a 1MB file.
1245
_DEFAULT_MAX_BYTES_TO_INDEX = 1024 * 1024
1246
_DEFAULT_COMPRESSOR_SETTINGS = {'max_bytes_to_index':
1247
_DEFAULT_MAX_BYTES_TO_INDEX}
1249
def __init__(self, index, access, delta=True, _unadded_refs=None,
1156
1251
"""Create a GroupCompressVersionedFiles object.
1158
1253
:param index: The index object storing access and graph data.
1159
1254
:param access: The access object storing raw data.
1160
1255
:param delta: Whether to delta compress or just entropy compress.
1161
1256
:param _unadded_refs: private parameter, don't use.
1257
:param _group_cache: private parameter, don't use.
1163
1259
self._index = index
1164
1260
self._access = access
1166
1262
if _unadded_refs is None:
1167
1263
_unadded_refs = {}
1168
1264
self._unadded_refs = _unadded_refs
1169
self._group_cache = LRUSizeCache(max_size=50*1024*1024)
1170
self._fallback_vfs = []
1265
if _group_cache is None:
1266
_group_cache = LRUSizeCache(max_size=50 * 1024 * 1024)
1267
self._group_cache = _group_cache
1268
self._immediate_fallback_vfs = []
1269
self._max_bytes_to_index = None
1172
1271
def without_fallbacks(self):
1173
1272
"""Return a clone of this object without any fallbacks configured."""
1174
1273
return GroupCompressVersionedFiles(self._index, self._access,
1175
self._delta, _unadded_refs=dict(self._unadded_refs))
1274
self._delta, _unadded_refs=dict(
1275
self._unadded_refs),
1276
_group_cache=self._group_cache)
1177
1278
def add_lines(self, key, parents, lines, parent_texts=None,
1178
left_matching_blocks=None, nostore_sha=None, random_id=False,
1179
check_content=True):
1279
left_matching_blocks=None, nostore_sha=None, random_id=False,
1280
check_content=True):
1180
1281
"""Add a text to the store.
1182
1283
:param key: The key tuple of the text to add.
1183
1284
:param parents: The parents key tuples of the text to add.
1184
1285
:param lines: A list of lines. Each line must be a bytestring. And all
1185
of them except the last must be terminated with \n and contain no
1186
other \n's. The last line may either contain no \n's or a single
1187
terminating \n. If the lines list does meet this constraint the add
1188
routine may error or may succeed - but you will be unable to read
1189
the data back accurately. (Checking the lines have been split
1286
of them except the last must be terminated with \\n and contain no
1287
other \\n's. The last line may either contain no \\n's or a single
1288
terminating \\n. If the lines list does meet this constraint the
1289
add routine may error or may succeed - but you will be unable to
1290
read the data back accurately. (Checking the lines have been split
1190
1291
correctly is expensive and extremely unlikely to catch bugs so it
1191
1292
is not done at runtime unless check_content is True.)
1192
1293
:param parent_texts: An optional dictionary containing the opaque
1211
1312
back to future add_lines calls in the parent_texts dictionary.
1213
1314
self._index._check_write_ok()
1214
self._check_add(key, lines, random_id, check_content)
1216
# The caller might pass None if there is no graph data, but kndx
1217
# indexes can't directly store that, so we give them
1218
# an empty tuple instead.
1220
# double handling for now. Make it work until then.
1221
length = sum(map(len, lines))
1222
record = ChunkedContentFactory(key, parents, None, lines)
1223
sha1 = list(self._insert_record_stream([record], random_id=random_id,
1224
nostore_sha=nostore_sha))[0]
1225
return sha1, length, None
1227
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
1228
"""See VersionedFiles._add_text()."""
1316
self._check_lines_not_unicode(lines)
1317
self._check_lines_are_lines(lines)
1318
return self.add_content(
1319
ChunkedContentFactory(
1320
key, parents, osutils.sha_strings(lines), lines, chunks_are_lines=True),
1321
parent_texts, left_matching_blocks, nostore_sha, random_id)
1323
def add_content(self, factory, parent_texts=None,
1324
left_matching_blocks=None, nostore_sha=None,
1326
"""Add a text to the store.
1328
:param factory: A ContentFactory that can be used to retrieve the key,
1329
parents and contents.
1330
:param parent_texts: An optional dictionary containing the opaque
1331
representations of some or all of the parents of version_id to
1332
allow delta optimisations. VERY IMPORTANT: the texts must be those
1333
returned by add_lines or data corruption can be caused.
1334
:param left_matching_blocks: a hint about which areas are common
1335
between the text and its left-hand-parent. The format is
1336
the SequenceMatcher.get_matching_blocks format.
1337
:param nostore_sha: Raise ExistingContent and do not add the lines to
1338
the versioned file if the digest of the lines matches this.
1339
:param random_id: If True a random id has been selected rather than
1340
an id determined by some deterministic process such as a converter
1341
from a foreign VCS. When True the backend may choose not to check
1342
for uniqueness of the resulting key within the versioned file, so
1343
this should only be done when the result is expected to be unique
1345
:return: The text sha1, the number of bytes in the text, and an opaque
1346
representation of the inserted version which can be provided
1347
back to future add_lines calls in the parent_texts dictionary.
1229
1349
self._index._check_write_ok()
1230
self._check_add(key, None, random_id, check_content=False)
1231
if text.__class__ is not str:
1232
raise errors.BzrBadParameterUnicode("text")
1350
parents = factory.parents
1351
self._check_add(factory.key, random_id)
1233
1352
if parents is None:
1234
1353
# The caller might pass None if there is no graph data, but kndx
1235
1354
# indexes can't directly store that, so we give them
1236
1355
# an empty tuple instead.
1238
1357
# double handling for now. Make it work until then.
1240
record = FulltextContentFactory(key, parents, None, text)
1241
sha1 = list(self._insert_record_stream([record], random_id=random_id,
1242
nostore_sha=nostore_sha))[0]
1358
sha1, length = list(self._insert_record_stream(
1359
[factory], random_id=random_id, nostore_sha=nostore_sha))[0]
1243
1360
return sha1, length, None
1245
1362
def add_fallback_versioned_files(self, a_versioned_files):
1283
1401
# probably check that the existing content is identical to what is
1284
1402
# being inserted, and otherwise raise an exception. This would make
1285
1403
# the bundle code simpler.
1287
self._check_lines_not_unicode(lines)
1288
self._check_lines_are_lines(lines)
1290
def get_known_graph_ancestry(self, keys):
1291
"""Get a KnownGraph instance with the ancestry of keys."""
1292
# Note that this is identical to
1293
# KnitVersionedFiles.get_known_graph_ancestry, but they don't share
1295
parent_map, missing_keys = self._index.find_ancestry(keys)
1296
for fallback in self._fallback_vfs:
1297
if not missing_keys:
1299
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1301
parent_map.update(f_parent_map)
1302
missing_keys = f_missing_keys
1303
kg = _mod_graph.KnownGraph(parent_map)
1306
1405
def get_parent_map(self, keys):
1307
1406
"""Get a map of the graph parents of keys.
1534
1634
key_to_source_map)
1535
1635
elif ordering == 'as-requested':
1536
1636
source_keys = self._get_as_requested_source_keys(orig_keys,
1537
locations, unadded_keys, key_to_source_map)
1637
locations, unadded_keys, key_to_source_map)
1539
1639
# We want to yield the keys in a semi-optimal (read-wise) ordering.
1540
1640
# Otherwise we thrash the _group_cache and destroy performance
1541
1641
source_keys = self._get_io_ordered_source_keys(locations,
1542
unadded_keys, source_result)
1642
unadded_keys, source_result)
1543
1643
for key in missing:
1544
1644
yield AbsentContentFactory(key)
1545
1645
# Batch up as many keys as we can until either:
1546
1646
# - we encounter an unadded ref, or
1547
1647
# - we run out of keys, or
1548
1648
# - the total bytes to retrieve for this batch > BATCH_SIZE
1549
batcher = _BatchingBlockFetcher(self, locations)
1649
batcher = _BatchingBlockFetcher(self, locations,
1650
get_compressor_settings=self._get_compressor_settings)
1550
1651
for source, keys in source_keys:
1551
1652
if source is self:
1552
1653
for key in keys:
1595
1696
# test_insert_record_stream_existing_keys fail for groupcompress and
1596
1697
# groupcompress-nograph, this needs to be revisited while addressing
1597
1698
# 'bzr branch' performance issues.
1598
for _ in self._insert_record_stream(stream, random_id=False):
1699
for _, _ in self._insert_record_stream(stream, random_id=False):
1702
def _get_compressor_settings(self):
1703
if self._max_bytes_to_index is None:
1704
# TODO: VersionedFiles don't know about their containing
1705
# repository, so they don't have much of an idea about their
1706
# location. So for now, this is only a global option.
1707
c = config.GlobalConfig()
1708
val = c.get_user_option('bzr.groupcompress.max_bytes_to_index')
1712
except ValueError as e:
1713
trace.warning('Value for '
1714
'"bzr.groupcompress.max_bytes_to_index"'
1715
' %r is not an integer'
1719
val = self._DEFAULT_MAX_BYTES_TO_INDEX
1720
self._max_bytes_to_index = val
1721
return {'max_bytes_to_index': self._max_bytes_to_index}
1723
def _make_group_compressor(self):
1724
return GroupCompressor(self._get_compressor_settings())
1601
1726
def _insert_record_stream(self, stream, random_id=False, nostore_sha=None,
1602
1727
reuse_blocks=True):
1603
1728
"""Internal core to insert a record stream into this container.
1627
1753
# This will go up to fulltexts for gc to gc fetching, which isn't
1629
self._compressor = GroupCompressor()
1755
self._compressor = self._make_group_compressor()
1630
1756
self._unadded_refs = {}
1631
1757
keys_to_add = []
1633
bytes = self._compressor.flush().to_bytes()
1634
self._compressor = GroupCompressor()
1635
index, start, length = self._access.add_raw_records(
1636
[(None, len(bytes))], bytes)[0]
1760
bytes_len, chunks = self._compressor.flush().to_chunks()
1761
self._compressor = self._make_group_compressor()
1762
# Note: At this point we still have 1 copy of the fulltext (in
1763
# record and the var 'bytes'), and this generates 2 copies of
1764
# the compressed text (one for bytes, one in chunks)
1765
# TODO: Figure out how to indicate that we would be happy to free
1766
# the fulltext content at this point. Note that sometimes we
1767
# will want it later (streaming CHK pages), but most of the
1768
# time we won't (everything else)
1769
index, start, length = self._access.add_raw_record(
1770
None, bytes_len, chunks)
1638
1772
for key, reads, refs in keys_to_add:
1639
nodes.append((key, "%d %d %s" % (start, length, reads), refs))
1773
nodes.append((key, b"%d %d %s" % (start, length, reads), refs))
1640
1774
self._index.add_records(nodes, random_id=random_id)
1641
1775
self._unadded_refs = {}
1642
1776
del keys_to_add[:]
1689
1822
raise AssertionError('No insert_manager set')
1690
1823
if insert_manager is not record._manager:
1691
1824
raise AssertionError('insert_manager does not match'
1692
' the current record, we cannot be positive'
1693
' that the appropriate content was inserted.'
1695
value = "%d %d %d %d" % (block_start, block_length,
1696
record._start, record._end)
1825
' the current record, we cannot be positive'
1826
' that the appropriate content was inserted.'
1828
value = b"%d %d %d %d" % (block_start, block_length,
1829
record._start, record._end)
1697
1830
nodes = [(record.key, value, (record.parents,))]
1698
1831
# TODO: Consider buffering up many nodes to be added, not
1699
1832
# sure how much overhead this has, but we're seeing
1701
1834
self._index.add_records(nodes, random_id=random_id)
1704
bytes = record.get_bytes_as('fulltext')
1705
except errors.UnavailableRepresentation:
1706
adapter_key = record.storage_kind, 'fulltext'
1837
chunks = record.get_bytes_as('chunked')
1838
except UnavailableRepresentation:
1839
adapter_key = record.storage_kind, 'chunked'
1707
1840
adapter = get_adapter(adapter_key)
1708
bytes = adapter.get_bytes(record)
1841
chunks = adapter.get_bytes(record, 'chunked')
1842
chunks_len = record.size
1843
if chunks_len is None:
1844
chunks_len = sum(map(len, chunks))
1709
1845
if len(record.key) > 1:
1710
1846
prefix = record.key[0]
1711
1847
soft = (prefix == last_prefix)
1715
if max_fulltext_len < len(bytes):
1716
max_fulltext_len = len(bytes)
1851
if max_fulltext_len < chunks_len:
1852
max_fulltext_len = chunks_len
1717
1853
max_fulltext_prefix = prefix
1718
1854
(found_sha1, start_point, end_point,
1719
type) = self._compressor.compress(record.key,
1720
bytes, record.sha1, soft=soft,
1721
nostore_sha=nostore_sha)
1722
# delta_ratio = float(len(bytes)) / (end_point - start_point)
1855
type) = self._compressor.compress(
1856
record.key, chunks, chunks_len, record.sha1, soft=soft,
1857
nostore_sha=nostore_sha)
1858
# delta_ratio = float(chunks_len) / (end_point - start_point)
1723
1859
# Check if we want to continue to include that text
1724
1860
if (prefix == max_fulltext_prefix
1725
and end_point < 2 * max_fulltext_len):
1861
and end_point < 2 * max_fulltext_len):
1726
1862
# As long as we are on the same file_id, we will fill at least
1727
1863
# 2 * max_fulltext_len
1728
1864
start_new_block = False
1729
elif end_point > 4*1024*1024:
1865
elif end_point > 4 * 1024 * 1024:
1730
1866
start_new_block = True
1731
1867
elif (prefix is not None and prefix != last_prefix
1732
and end_point > 2*1024*1024):
1868
and end_point > 2 * 1024 * 1024):
1733
1869
start_new_block = True
1735
1871
start_new_block = False
1737
1873
if start_new_block:
1738
1874
self._compressor.pop_last()
1740
max_fulltext_len = len(bytes)
1876
max_fulltext_len = chunks_len
1741
1877
(found_sha1, start_point, end_point,
1742
type) = self._compressor.compress(record.key, bytes,
1878
type) = self._compressor.compress(
1879
record.key, chunks, chunks_len, record.sha1)
1744
1880
if record.key[-1] is None:
1745
key = record.key[:-1] + ('sha1:' + found_sha1,)
1881
key = record.key[:-1] + (b'sha1:' + found_sha1,)
1747
1883
key = record.key
1748
1884
self._unadded_refs[key] = record.parents
1885
yield found_sha1, chunks_len
1750
1886
as_st = static_tuple.StaticTuple.from_sequence
1751
1887
if record.parents is not None:
1752
1888
parents = as_st([as_st(p) for p in record.parents])
1755
1891
refs = static_tuple.StaticTuple(parents)
1756
keys_to_add.append((key, '%d %d' % (start_point, end_point), refs))
1893
(key, b'%d %d' % (start_point, end_point), refs))
1757
1894
if len(keys_to_add):
1759
1896
self._compressor = None
1785
1922
# but we need to setup a list of records to visit.
1786
1923
# we need key, position, length
1787
1924
for key_idx, record in enumerate(self.get_record_stream(keys,
1788
'unordered', True)):
1925
'unordered', True)):
1789
1926
# XXX: todo - optimise to use less than full texts.
1790
1927
key = record.key
1791
1928
if pb is not None:
1792
1929
pb.update('Walking content', key_idx, total)
1793
1930
if record.storage_kind == 'absent':
1794
1931
raise errors.RevisionNotPresent(key, self)
1795
lines = osutils.split_lines(record.get_bytes_as('fulltext'))
1932
for line in record.iter_bytes_as('lines'):
1797
1933
yield line, key
1798
1934
if pb is not None:
1799
1935
pb.update('Walking content', total, total)
1802
1938
"""See VersionedFiles.keys."""
1803
1939
if 'evil' in debug.debug_flags:
1804
1940
trace.mutter_callsite(2, "keys scales with size of history")
1805
sources = [self._index] + self._fallback_vfs
1941
sources = [self._index] + self._immediate_fallback_vfs
1807
1943
for source in sources:
1808
1944
result.update(source.keys())
1948
class _GCBuildDetails(object):
1949
"""A blob of data about the build details.
1951
This stores the minimal data, which then allows compatibility with the old
1952
api, without taking as much memory.
1955
__slots__ = ('_index', '_group_start', '_group_end', '_basis_end',
1956
'_delta_end', '_parents')
1959
compression_parent = None
1961
def __init__(self, parents, position_info):
1962
self._parents = parents
1963
(self._index, self._group_start, self._group_end, self._basis_end,
1964
self._delta_end) = position_info
1967
return '%s(%s, %s)' % (self.__class__.__name__,
1968
self.index_memo, self._parents)
1971
def index_memo(self):
1972
return (self._index, self._group_start, self._group_end,
1973
self._basis_end, self._delta_end)
1976
def record_details(self):
1977
return static_tuple.StaticTuple(self.method, None)
1979
def __getitem__(self, offset):
1980
"""Compatibility thunk to act like a tuple."""
1982
return self.index_memo
1984
return self.compression_parent # Always None
1986
return self._parents
1988
return self.record_details
1990
raise IndexError('offset out of range')
1812
1996
class _GCGraphIndex(object):
1813
1997
"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""
1815
1999
def __init__(self, graph_index, is_locked, parents=True,
1816
add_callback=None, track_external_parent_refs=False,
1817
inconsistency_fatal=True, track_new_keys=False):
2000
add_callback=None, track_external_parent_refs=False,
2001
inconsistency_fatal=True, track_new_keys=False):
1818
2002
"""Construct a _GCGraphIndex on a graph_index.
1820
:param graph_index: An implementation of bzrlib.index.GraphIndex.
2004
:param graph_index: An implementation of breezy.index.GraphIndex.
1821
2005
:param is_locked: A callback, returns True if the index is locked and
1823
2007
:param parents: If True, record knits parents, if not do not record
1989
2173
:param keys: An iterable of keys.
1990
2174
:return: A dict of key:
1991
2175
(index_memo, compression_parent, parents, record_details).
1993
opaque structure to pass to read_records to extract the raw
1996
Content that this record is built upon, may be None
1998
Logical parents of this node
2000
extra information about the content which needs to be passed to
2001
Factory.parse_record
2177
* index_memo: opaque structure to pass to read_records to extract
2179
* compression_parent: Content that this record is built upon, may
2181
* parents: Logical parents of this node
2182
* record_details: extra information about the content which needs
2183
to be passed to Factory.parse_record
2003
2185
self._check_read()