20
17
"""Versioned text file storage api."""
19
from __future__ import absolute_import
22
21
from copy import copy
23
from cStringIO import StringIO
26
25
from zlib import adler32
28
from bzrlib.lazy_import import lazy_import
27
from ..lazy_import import lazy_import
29
28
lazy_import(globals(), """
35
33
graph as _mod_graph,
45
from bzrlib.graph import DictParentsProvider, Graph, StackedParentsProvider
46
from bzrlib.transport.memory import MemoryTransport
40
from breezy.bzr import (
48
from bzrlib.registry import Registry
49
from bzrlib.symbol_versioning import *
50
from bzrlib.textmerge import TextMerge
51
from bzrlib import bencode
46
from ..registry import Registry
47
from ..sixish import (
53
from ..textmerge import TextMerge
54
56
adapter_registry = Registry()
55
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
57
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'breezy.bzr.knit',
56
58
'DeltaPlainToFullText')
57
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
59
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'breezy.bzr.knit',
58
60
'FTPlainToFullText')
59
61
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
60
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
62
'breezy.bzr.knit', 'DeltaAnnotatedToUnannotated')
61
63
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
62
'bzrlib.knit', 'DeltaAnnotatedToFullText')
64
'breezy.bzr.knit', 'DeltaAnnotatedToFullText')
63
65
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
64
'bzrlib.knit', 'FTAnnotatedToUnannotated')
66
'breezy.bzr.knit', 'FTAnnotatedToUnannotated')
65
67
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
66
'bzrlib.knit', 'FTAnnotatedToFullText')
68
'breezy.bzr.knit', 'FTAnnotatedToFullText')
67
69
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
68
# 'bzrlib.knit', 'FTAnnotatedToChunked')
70
# 'breezy.bzr.knit', 'FTAnnotatedToChunked')
71
73
class ContentFactory(object):
211
class _MPDiffGenerator(object):
212
"""Pull out the functionality for generating mp_diffs."""
214
def __init__(self, vf, keys):
216
# This is the order the keys were requested in
217
self.ordered_keys = tuple(keys)
218
# keys + their parents, what we need to compute the diffs
219
self.needed_keys = ()
220
# Map from key: mp_diff
222
# Map from key: parents_needed (may have ghosts)
224
# Parents that aren't present
225
self.ghost_parents = ()
226
# Map from parent_key => number of children for this text
228
# Content chunks that are cached while we still need them
231
def _find_needed_keys(self):
232
"""Find the set of keys we need to request.
234
This includes all the original keys passed in, and the non-ghost
235
parents of those keys.
237
:return: (needed_keys, refcounts)
238
needed_keys is the set of all texts we need to extract
239
refcounts is a dict of {key: num_children} letting us know when we
240
no longer need to cache a given parent text
242
# All the keys and their parents
243
needed_keys = set(self.ordered_keys)
244
parent_map = self.vf.get_parent_map(needed_keys)
245
self.parent_map = parent_map
246
# TODO: Should we be using a different construct here? I think this
247
# uses difference_update internally, and we expect the result to
249
missing_keys = needed_keys.difference(parent_map)
251
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
252
# Parents that might be missing. They are allowed to be ghosts, but we
253
# should check for them
255
setdefault = refcounts.setdefault
257
for child_key, parent_keys in viewitems(parent_map):
259
# parent_keys may be None if a given VersionedFile claims to
260
# not support graph operations.
262
just_parents.update(parent_keys)
263
needed_keys.update(parent_keys)
264
for p in parent_keys:
265
refcounts[p] = setdefault(p, 0) + 1
266
just_parents.difference_update(parent_map)
267
# Remove any parents that are actually ghosts from the needed set
268
self.present_parents = set(self.vf.get_parent_map(just_parents))
269
self.ghost_parents = just_parents.difference(self.present_parents)
270
needed_keys.difference_update(self.ghost_parents)
271
self.needed_keys = needed_keys
272
self.refcounts = refcounts
273
return needed_keys, refcounts
275
def _compute_diff(self, key, parent_lines, lines):
276
"""Compute a single mp_diff, and store it in self._diffs"""
277
if len(parent_lines) > 0:
278
# XXX: _extract_blocks is not usefully defined anywhere...
279
# It was meant to extract the left-parent diff without
280
# having to recompute it for Knit content (pack-0.92,
281
# etc). That seems to have regressed somewhere
282
left_parent_blocks = self.vf._extract_blocks(key,
283
parent_lines[0], lines)
285
left_parent_blocks = None
286
diff = multiparent.MultiParent.from_lines(lines,
287
parent_lines, left_parent_blocks)
288
self.diffs[key] = diff
290
def _process_one_record(self, key, this_chunks):
292
if key in self.parent_map:
293
# This record should be ready to diff, since we requested
294
# content in 'topological' order
295
parent_keys = self.parent_map.pop(key)
296
# If a VersionedFile claims 'no-graph' support, then it may return
297
# None for any parent request, so we replace it with an empty tuple
298
if parent_keys is None:
301
for p in parent_keys:
302
# Alternatively we could check p not in self.needed_keys, but
303
# ghost_parents should be tiny versus huge
304
if p in self.ghost_parents:
306
refcount = self.refcounts[p]
307
if refcount == 1: # Last child reference
308
self.refcounts.pop(p)
309
parent_chunks = self.chunks.pop(p)
311
self.refcounts[p] = refcount - 1
312
parent_chunks = self.chunks[p]
313
p_lines = osutils.chunks_to_lines(parent_chunks)
314
# TODO: Should we cache the line form? We did the
315
# computation to get it, but storing it this way will
316
# be less memory efficient...
317
parent_lines.append(p_lines)
319
lines = osutils.chunks_to_lines(this_chunks)
320
# Since we needed the lines, we'll go ahead and cache them this way
322
self._compute_diff(key, parent_lines, lines)
324
# Is this content required for any more children?
325
if key in self.refcounts:
326
self.chunks[key] = this_chunks
328
def _extract_diffs(self):
329
needed_keys, refcounts = self._find_needed_keys()
330
for record in self.vf.get_record_stream(needed_keys,
331
'topological', True):
332
if record.storage_kind == 'absent':
333
raise errors.RevisionNotPresent(record.key, self.vf)
334
self._process_one_record(record.key,
335
record.get_bytes_as('chunked'))
337
def compute_diffs(self):
338
self._extract_diffs()
339
dpop = self.diffs.pop
340
return [dpop(k) for k in self.ordered_keys]
209
343
class VersionedFile(object):
210
344
"""Versioned text file storage.
401
539
for version, parent_ids, expected_sha1, mpdiff in records:
402
540
needed_parents.update(p for p in parent_ids
403
541
if not mpvf.has_version(p))
404
present_parents = set(self.get_parent_map(needed_parents).keys())
542
present_parents = set(self.get_parent_map(needed_parents))
405
543
for parent_id, lines in zip(present_parents,
406
self._get_lf_split_line_list(present_parents)):
544
self._get_lf_split_line_list(present_parents)):
407
545
mpvf.add_version(lines, parent_id, [])
408
for (version, parent_ids, expected_sha1, mpdiff), lines in\
409
zip(records, mpvf.get_line_list(versions)):
546
for (version, parent_ids, expected_sha1, mpdiff), lines in zip(
547
records, mpvf.get_line_list(versions)):
410
548
if len(parent_ids) == 1:
411
549
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
412
550
mpvf.get_diff(parent_ids[0]).num_lines()))
790
928
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
791
929
may have a different length key-size, but that size will be constant for
792
all texts added to or retrieved from it. For instance, bzrlib uses
930
all texts added to or retrieved from it. For instance, breezy uses
793
931
instances with a key-size of 2 for storing user files in a repository, with
794
932
the first element the fileid, and the second the version of that file.
796
934
The use of tuples allows a single code base to support several different
797
935
uses with only the mapping logic changing from instance to instance.
937
:ivar _immediate_fallback_vfs: For subclasses that support stacking,
938
this is a list of other VersionedFiles immediately underneath this
939
one. They may in turn each have further fallbacks.
800
942
def add_lines(self, key, parents, lines, parent_texts=None,
1048
1190
def make_mpdiffs(self, keys):
1049
1191
"""Create multiparent diffs for specified keys."""
1050
keys_order = tuple(keys)
1051
keys = frozenset(keys)
1052
knit_keys = set(keys)
1053
parent_map = self.get_parent_map(keys)
1054
for parent_keys in parent_map.itervalues():
1056
knit_keys.update(parent_keys)
1057
missing_keys = keys - set(parent_map)
1059
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1060
# We need to filter out ghosts, because we can't diff against them.
1061
maybe_ghosts = knit_keys - keys
1062
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1063
knit_keys.difference_update(ghosts)
1065
chunks_to_lines = osutils.chunks_to_lines
1066
for record in self.get_record_stream(knit_keys, 'topological', True):
1067
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1068
# line_block_dict = {}
1069
# for parent, blocks in record.extract_line_blocks():
1070
# line_blocks[parent] = blocks
1071
# line_blocks[record.key] = line_block_dict
1073
for key in keys_order:
1075
parents = parent_map[key] or []
1076
# Note that filtering knit_keys can lead to a parent difference
1077
# between the creation and the application of the mpdiff.
1078
parent_lines = [lines[p] for p in parents if p in knit_keys]
1079
if len(parent_lines) > 0:
1080
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1083
left_parent_blocks = None
1084
diffs.append(multiparent.MultiParent.from_lines(target,
1085
parent_lines, left_parent_blocks))
1192
generator = _MPDiffGenerator(self, keys)
1193
return generator.compute_diffs()
1195
def get_annotator(self):
1196
return annotate.Annotator(self)
1088
1198
missing_keys = index._missing_keys_from_parent_map
1090
1200
def _extract_blocks(self, version_id, source, target):
1203
def _transitive_fallbacks(self):
1204
"""Return the whole stack of fallback versionedfiles.
1206
This VersionedFiles may have a list of fallbacks, but it doesn't
1207
necessarily know about the whole stack going down, and it can't know
1208
at open time because they may change after the objects are opened.
1211
for a_vfs in self._immediate_fallback_vfs:
1212
all_fallbacks.append(a_vfs)
1213
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1214
return all_fallbacks
1094
1217
class ThunkedVersionedFiles(VersionedFiles):
1095
1218
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1434
class VersionedFilesWithFallbacks(VersionedFiles):
1436
def without_fallbacks(self):
1437
"""Return a clone of this object without any fallbacks configured."""
1438
raise NotImplementedError(self.without_fallbacks)
1440
def add_fallback_versioned_files(self, a_versioned_files):
1441
"""Add a source of texts for texts not present in this knit.
1443
:param a_versioned_files: A VersionedFiles object.
1445
raise NotImplementedError(self.add_fallback_versioned_files)
1447
def get_known_graph_ancestry(self, keys):
1448
"""Get a KnownGraph instance with the ancestry of keys."""
1449
parent_map, missing_keys = self._index.find_ancestry(keys)
1450
for fallback in self._transitive_fallbacks():
1451
if not missing_keys:
1453
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1455
parent_map.update(f_parent_map)
1456
missing_keys = f_missing_keys
1457
kg = _mod_graph.KnownGraph(parent_map)
1314
1461
class _PlanMergeVersionedFile(VersionedFiles):
1315
1462
"""A VersionedFile for uncommitted and committed texts.
1775
class NoDupeAddLinesDecorator(object):
1776
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1780
def __init__(self, store):
1783
def add_lines(self, key, parents, lines, parent_texts=None,
1784
left_matching_blocks=None, nostore_sha=None, random_id=False,
1785
check_content=True):
1786
"""See VersionedFiles.add_lines.
1788
This implementation may return None as the third element of the return
1789
value when the original store wouldn't.
1792
raise NotImplementedError(
1793
"NoDupeAddLinesDecorator.add_lines does not implement the "
1794
"nostore_sha behaviour.")
1796
sha1 = osutils.sha_strings(lines)
1797
key = ("sha1:" + sha1,)
1800
if key in self._store.get_parent_map([key]):
1801
# This key has already been inserted, so don't do it again.
1803
sha1 = osutils.sha_strings(lines)
1804
return sha1, sum(map(len, lines)), None
1805
return self._store.add_lines(key, parents, lines,
1806
parent_texts=parent_texts,
1807
left_matching_blocks=left_matching_blocks,
1808
nostore_sha=nostore_sha, random_id=random_id,
1809
check_content=check_content)
1811
def __getattr__(self, name):
1812
return getattr(self._store, name)
1627
1815
def network_bytes_to_kind_and_offset(network_bytes):
1628
1816
"""Strip of a record kind from the front of network_bytes.
1720
1908
for prefix in sorted(per_prefix_map):
1721
1909
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1722
1910
return present_keys
1913
class _KeyRefs(object):
1915
def __init__(self, track_new_keys=False):
1916
# dict mapping 'key' to 'set of keys referring to that key'
1919
# set remembering all new keys
1920
self.new_keys = set()
1922
self.new_keys = None
1928
self.new_keys.clear()
1930
def add_references(self, key, refs):
1931
# Record the new references
1932
for referenced in refs:
1934
needed_by = self.refs[referenced]
1936
needed_by = self.refs[referenced] = set()
1938
# Discard references satisfied by the new key
1941
def get_new_keys(self):
1942
return self.new_keys
1944
def get_unsatisfied_refs(self):
1945
return self.refs.keys()
1947
def _satisfy_refs_for_key(self, key):
1951
# No keys depended on this key. That's ok.
1954
def add_key(self, key):
1955
# satisfy refs for key, and remember that we've seen this key.
1956
self._satisfy_refs_for_key(key)
1957
if self.new_keys is not None:
1958
self.new_keys.add(key)
1960
def satisfy_refs_for_keys(self, keys):
1962
self._satisfy_refs_for_key(key)
1964
def get_referrers(self):
1965
return set(itertools.chain.from_iterable(viewvalues(self.refs)))