137
134
% (num_bytes, self._content_length))
138
135
# Expand the content if required
139
136
if self._content is None:
137
if self._content_chunks is not None:
138
self._content = ''.join(self._content_chunks)
139
self._content_chunks = None
140
if self._content is None:
140
141
if self._z_content is None:
141
142
raise AssertionError('No content to decompress')
142
143
if self._z_content == '':
273
274
bytes = apply_delta_to_source(self._content, content_start, end)
277
def set_chunked_content(self, content_chunks, length):
278
"""Set the content of this block to the given chunks."""
279
# If we have lots of short lines, it is may be more efficient to join
280
# the content ahead of time. If the content is <10MiB, we don't really
281
# care about the extra memory consumption, so we can just pack it and
282
# be done. However, timing showed 18s => 17.9s for repacking 1k revs of
283
# mysql, which is below the noise margin
284
self._content_length = length
285
self._content_chunks = content_chunks
287
self._z_content = None
276
289
def set_content(self, content):
277
290
"""Set the content of this block."""
278
291
self._content_length = len(content)
279
292
self._content = content
280
293
self._z_content = None
295
def _create_z_content_using_lzma(self):
296
if self._content_chunks is not None:
297
self._content = ''.join(self._content_chunks)
298
self._content_chunks = None
299
if self._content is None:
300
raise AssertionError('Nothing to compress')
301
self._z_content = pylzma.compress(self._content)
302
self._z_content_length = len(self._z_content)
304
def _create_z_content_from_chunks(self):
305
compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION)
306
compressed_chunks = map(compressor.compress, self._content_chunks)
307
compressed_chunks.append(compressor.flush())
308
self._z_content = ''.join(compressed_chunks)
309
self._z_content_length = len(self._z_content)
311
def _create_z_content(self):
312
if self._z_content is not None:
315
self._create_z_content_using_lzma()
317
if self._content_chunks is not None:
318
self._create_z_content_from_chunks()
320
self._z_content = zlib.compress(self._content)
321
self._z_content_length = len(self._z_content)
282
323
def to_bytes(self):
283
324
"""Encode the information into a byte stream."""
284
compress = zlib.compress
286
compress = pylzma.compress
287
if self._z_content is None:
288
if self._content is None:
289
raise AssertionError('Nothing to compress')
290
self._z_content = compress(self._content)
291
self._z_content_length = len(self._z_content)
325
self._create_z_content()
293
327
header = self.GCB_LZ_HEADER
762
796
# for 'commit' down to ~1x the size of the largest file, at a
763
797
# cost of increased complexity within this code. 2x is still <<
764
798
# 3x the size of the largest file, so we are doing ok.
765
content = ''.join(self.chunks)
799
self._block.set_chunked_content(self.chunks, self.endpoint)
766
800
self.chunks = None
767
801
self._delta_index = None
768
self._block.set_content(content)
769
802
return self._block
771
804
def pop_last(self):
905
938
self.endpoint = endpoint
908
def make_pack_factory(graph, delta, keylength):
941
def make_pack_factory(graph, delta, keylength, inconsistency_fatal=True):
909
942
"""Create a factory for creating a pack based groupcompress.
911
944
This is only functional enough to run interface tests, it doesn't try to
926
959
writer = pack.ContainerWriter(stream.write)
928
961
index = _GCGraphIndex(graph_index, lambda:True, parents=parents,
929
add_callback=graph_index.add_nodes)
962
add_callback=graph_index.add_nodes,
963
inconsistency_fatal=inconsistency_fatal)
930
964
access = knit._DirectPackAccess({})
931
965
access.set_writer(writer, graph_index, (transport, 'newpack'))
932
966
result = GroupCompressVersionedFiles(index, access, delta)
1008
1042
nostore_sha=nostore_sha))[0]
1009
1043
return sha1, length, None
1045
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
1046
"""See VersionedFiles._add_text()."""
1047
self._index._check_write_ok()
1048
self._check_add(key, None, random_id, check_content=False)
1049
if text.__class__ is not str:
1050
raise errors.BzrBadParameterUnicode("text")
1052
# The caller might pass None if there is no graph data, but kndx
1053
# indexes can't directly store that, so we give them
1054
# an empty tuple instead.
1056
# double handling for now. Make it work until then.
1058
record = FulltextContentFactory(key, parents, None, text)
1059
sha1 = list(self._insert_record_stream([record], random_id=random_id,
1060
nostore_sha=nostore_sha))[0]
1061
return sha1, length, None
1011
1063
def add_fallback_versioned_files(self, a_versioned_files):
1012
1064
"""Add a source of texts for texts not present in this knit.
1018
1070
def annotate(self, key):
1019
1071
"""See VersionedFiles.annotate."""
1021
parent_map = self.get_parent_map([key])
1023
raise errors.RevisionNotPresent(key, self)
1024
if parent_map[key] is not None:
1025
search = graph._make_breadth_first_searcher([key])
1029
present, ghosts = search.next_with_ghosts()
1030
except StopIteration:
1032
keys.update(present)
1033
parent_map = self.get_parent_map(keys)
1036
parent_map = {key:()}
1037
# So we used Graph(self) to load the parent_map, but now that we have
1038
# it, we can just query the parent map directly, so create a new Graph
1040
graph = _mod_graph.Graph(_mod_graph.DictParentsProvider(parent_map))
1041
head_cache = _mod_graph.FrozenHeadsCache(graph)
1043
reannotate = annotate.reannotate
1044
for record in self.get_record_stream(keys, 'topological', True):
1046
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1047
parent_lines = [parent_cache[parent] for parent in parent_map[key]]
1048
parent_cache[key] = list(
1049
reannotate(parent_lines, lines, key, None, head_cache))
1050
return parent_cache[key]
1072
ann = annotate.Annotator(self)
1073
return ann.annotate_flat(key)
1075
def get_annotator(self):
1076
return annotate.Annotator(self)
1052
1078
def check(self, progress_bar=None):
1053
1079
"""See VersionedFiles.check()."""
1540
1564
'unordered', True)):
1541
1565
# XXX: todo - optimise to use less than full texts.
1542
1566
key = record.key
1543
pb.update('Walking content', key_idx, total)
1568
pb.update('Walking content', key_idx, total)
1544
1569
if record.storage_kind == 'absent':
1545
1570
raise errors.RevisionNotPresent(key, self)
1546
1571
lines = osutils.split_lines(record.get_bytes_as('fulltext'))
1547
1572
for line in lines:
1548
1573
yield line, key
1549
pb.update('Walking content', total, total)
1575
pb.update('Walking content', total, total)
1551
1577
def keys(self):
1552
1578
"""See VersionedFiles.keys."""
1563
1589
"""Mapper from GroupCompressVersionedFiles needs into GraphIndex storage."""
1565
1591
def __init__(self, graph_index, is_locked, parents=True,
1566
add_callback=None, track_external_parent_refs=False):
1592
add_callback=None, track_external_parent_refs=False,
1593
inconsistency_fatal=True):
1567
1594
"""Construct a _GCGraphIndex on a graph_index.
1569
1596
:param graph_index: An implementation of bzrlib.index.GraphIndex.
1577
1604
:param track_external_parent_refs: As keys are added, keep track of the
1578
1605
keys they reference, so that we can query get_missing_parents(),
1607
:param inconsistency_fatal: When asked to add records that are already
1608
present, and the details are inconsistent with the existing
1609
record, raise an exception instead of warning (and skipping the
1581
1612
self._add_callback = add_callback
1582
1613
self._graph_index = graph_index
1583
1614
self._parents = parents
1584
1615
self.has_graph = parents
1585
1616
self._is_locked = is_locked
1617
self._inconsistency_fatal = inconsistency_fatal
1586
1618
if track_external_parent_refs:
1587
1619
self._key_dependencies = knit._KeyRefs()
1624
1656
present_nodes = self._get_entries(keys)
1625
1657
for (index, key, value, node_refs) in present_nodes:
1626
1658
if node_refs != keys[key][1]:
1627
raise errors.KnitCorrupt(self, "inconsistent details in add_records"
1628
": %s %s" % ((value, node_refs), keys[key]))
1659
details = '%s %s %s' % (key, (value, node_refs), keys[key])
1660
if self._inconsistency_fatal:
1661
raise errors.KnitCorrupt(self, "inconsistent details"
1662
" in add_records: %s" %
1665
trace.warning("inconsistent details in skipped"
1666
" record: %s", details)
1676
1714
if check_present:
1677
1715
missing_keys = keys.difference(found_keys)
1678
1716
if missing_keys:
1679
raise RevisionNotPresent(missing_keys.pop(), self)
1717
raise errors.RevisionNotPresent(missing_keys.pop(), self)
1681
1719
def get_parent_map(self, keys):
1682
1720
"""Get a map of the parents of keys.