137
134
% (num_bytes, self._content_length))
138
135
# Expand the content if required
139
136
if self._content is None:
137
if self._content_chunks is not None:
138
self._content = ''.join(self._content_chunks)
139
self._content_chunks = None
140
if self._content is None:
140
141
if self._z_content is None:
141
142
raise AssertionError('No content to decompress')
142
143
if self._z_content == '':
273
274
bytes = apply_delta_to_source(self._content, content_start, end)
277
def set_chunked_content(self, content_chunks, length):
278
"""Set the content of this block to the given chunks."""
279
# If we have lots of short lines, it is may be more efficient to join
280
# the content ahead of time. If the content is <10MiB, we don't really
281
# care about the extra memory consumption, so we can just pack it and
282
# be done. However, timing showed 18s => 17.9s for repacking 1k revs of
283
# mysql, which is below the noise margin
284
self._content_length = length
285
self._content_chunks = content_chunks
287
self._z_content = None
276
289
def set_content(self, content):
277
290
"""Set the content of this block."""
278
291
self._content_length = len(content)
279
292
self._content = content
280
293
self._z_content = None
295
def _create_z_content_using_lzma(self):
296
if self._content_chunks is not None:
297
self._content = ''.join(self._content_chunks)
298
self._content_chunks = None
299
if self._content is None:
300
raise AssertionError('Nothing to compress')
301
self._z_content = pylzma.compress(self._content)
302
self._z_content_length = len(self._z_content)
304
def _create_z_content_from_chunks(self):
305
compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)
306
compressed_chunks = map(compressor.compress, self._content_chunks)
307
compressed_chunks.append(compressor.flush())
308
self._z_content = ''.join(compressed_chunks)
309
self._z_content_length = len(self._z_content)
311
def _create_z_content(self):
312
if self._z_content is not None:
315
self._create_z_content_using_lzma()
317
if self._content_chunks is not None:
318
self._create_z_content_from_chunks()
320
self._z_content = zlib.compress(self._content)
321
self._z_content_length = len(self._z_content)
282
323
def to_bytes(self):
283
324
"""Encode the information into a byte stream."""
284
compress = zlib.compress
286
compress = pylzma.compress
287
if self._z_content is None:
288
if self._content is None:
289
raise AssertionError('Nothing to compress')
290
self._z_content = compress(self._content)
291
self._z_content_length = len(self._z_content)
325
self._create_z_content()
293
327
header = self.GCB_LZ_HEADER
762
796
# for 'commit' down to ~1x the size of the largest file, at a
763
797
# cost of increased complexity within this code. 2x is still <<
764
798
# 3x the size of the largest file, so we are doing ok.
765
content = ''.join(self.chunks)
799
self._block.set_chunked_content(self.chunks, self.endpoint)
766
800
self.chunks = None
767
801
self._delta_index = None
768
self._block.set_content(content)
769
802
return self._block
771
804
def pop_last(self):
905
938
self.endpoint = endpoint
908
def make_pack_factory(graph, delta, keylength):
941
def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):
909
942
"""Create a factory for creating a pack based groupcompress.
911
944
This is only functional enough to run interface tests, it doesn't try to
926
959
writer = pack.ContainerWriter(stream.write)
928
961
index = _GCGraphIndex(graph_index, lambda:True, parents=parents,
929
add_callback=graph_index.add_nodes)
962
add_callback=graph_index.add_nodes,
963
inconsistency_fatal=inconsistency_fatal)
930
964
access = knit._DirectPackAccess({})
931
965
access.set_writer(writer, graph_index, (transport, 'newpack'))
932
966
result = GroupCompressVersionedFiles(index, access, delta)
1008
1042
nostore_sha=nostore_sha))[0]
1009
1043
return sha1, length, None
1045
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
1046
"""See VersionedFiles._add_text()."""
1047
self._index._check_write_ok()
1048
self._check_add(key, None, random_id, check_content=False)
1049
if text.__class__ is not str:
1050
raise errors.BzrBadParameterUnicode("text")
1052
# The caller might pass None if there is no graph data, but kndx
1053
# indexes can't directly store that, so we give them
1054
# an empty tuple instead.
1056
# double handling for now. Make it work until then.
1058
record = FulltextContentFactory(key, parents, None, text)
1059
sha1 = list(self._insert_record_stream([record], random_id=random_id,
1060
nostore_sha=nostore_sha))[0]
1061
return sha1, length, None
1011
1063
def add_fallback_versioned_files(self, a_versioned_files):
1012
1064
"""Add a source of texts for texts not present in this knit.
1018
1070
def annotate(self, key):
1019
1071
"""See VersionedFiles.annotate."""
1021
parent_map = self.get_parent_map([key])
1023
raise errors.RevisionNotPresent(key, self)
1024
if parent_map[key] is not None:
1025
search = graph._make_breadth_first_searcher([key])
1029
present, ghosts = search.next_with_ghosts()
1030
except StopIteration:
1032
keys.update(present)
1033
parent_map = self.get_parent_map(keys)
1036
parent_map = {key:()}
1037
# So we used Graph(self) to load the parent_map, but now that we have
1038
# it, we can just query the parent map directly, so create a new Graph
1040
graph = _mod_graph.Graph(_mod_graph.DictParentsProvider(parent_map))
1041
head_cache = _mod_graph.FrozenHeadsCache(graph)
1043
reannotate = annotate.reannotate
1044
for record in self.get_record_stream(keys, 'topological', True):
1046
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1047
parent_lines = [parent_cache[parent] for parent in parent_map[key]]
1048
parent_cache[key] = list(
1049
reannotate(parent_lines, lines, key, None, head_cache))
1050
return parent_cache[key]
1072
ann = annotate.Annotator(self)
1073
return ann.annotate_flat(key)
1075
def get_annotator(self):
1076
return annotate.Annotator(self)
1052
1078
def check(self, progress_bar=None, keys=None):
1053
1079
"""See VersionedFiles.check()."""
1543
1567
'unordered', True)):
1544
1568
# XXX: todo - optimise to use less than full texts.
1545
1569
key = record.key
1546
pb.update('Walking content', key_idx, total)
1571
pb.update('Walking content', key_idx, total)
1547
1572
if record.storage_kind == 'absent':
1548
1573
raise errors.RevisionNotPresent(key, self)
1549
1574
lines = osutils.split_lines(record.get_bytes_as('fulltext'))
1550
1575
for line in lines:
1551
1576
yield line, key
1552
pb.update('Walking content', total, total)
1578
pb.update('Walking content', total, total)
1554
1580
def keys(self):
1555
1581
"""See VersionedFiles.keys."""
1566
1592
"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""
1568
1594
def __init__(self, graph_index, is_locked, parents=True,
1569
add_callback=None, track_external_parent_refs=False):
1595
add_callback=None, track_external_parent_refs=False,
1596
inconsistency_fatal=True):
1570
1597
"""Construct a _GCGraphIndex on a graph_index.
1572
1599
:param graph_index: An implementation of bzrlib.index.GraphIndex.
1580
1607
:param track_external_parent_refs: As keys are added, keep track of the
1581
1608
keys they reference, so that we can query get_missing_parents(),
1610
:param inconsistency_fatal: When asked to add records that are already
1611
present, and the details are inconsistent with the existing
1612
record, raise an exception instead of warning (and skipping the
1584
1615
self._add_callback = add_callback
1585
1616
self._graph_index = graph_index
1586
1617
self._parents = parents
1587
1618
self.has_graph = parents
1588
1619
self._is_locked = is_locked
1620
self._inconsistency_fatal = inconsistency_fatal
1589
1621
if track_external_parent_refs:
1590
1622
self._key_dependencies = knit._KeyRefs()
1627
1659
present_nodes = self._get_entries(keys)
1628
1660
for (index, key, value, node_refs) in present_nodes:
1629
1661
if node_refs != keys[key][1]:
1630
raise errors.KnitCorrupt(self, "inconsistent details in add_records"
1631
": %s %s" % ((value, node_refs), keys[key]))
1662
details = '%s %s %s' % (key, (value, node_refs), keys[key])
1663
if self._inconsistency_fatal:
1664
raise errors.KnitCorrupt(self, "inconsistent details"
1665
" in add_records: %s" %
1668
trace.warning("inconsistent details in skipped"
1669
" record: %s", details)
1679
1717
if check_present:
1680
1718
missing_keys = keys.difference(found_keys)
1681
1719
if missing_keys:
1682
raise RevisionNotPresent(missing_keys.pop(), self)
1720
raise errors.RevisionNotPresent(missing_keys.pop(), self)
1684
1722
def get_parent_map(self, keys):
1685
1723
"""Get a map of the parents of keys.