45
from bzrlib.graph import DictParentsProvider, Graph, StackedParentsProvider
46
from bzrlib.transport.memory import MemoryTransport
48
from bzrlib.registry import Registry
49
from bzrlib.symbol_versioning import *
50
from bzrlib.textmerge import TextMerge
51
from bzrlib import bencode
44
from .registry import Registry
51
from .textmerge import TextMerge
54
54
adapter_registry = Registry()
55
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
55
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'breezy.knit',
56
56
'DeltaPlainToFullText')
57
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
57
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'breezy.knit',
58
58
'FTPlainToFullText')
59
59
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
60
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
60
'breezy.knit', 'DeltaAnnotatedToUnannotated')
61
61
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
62
'bzrlib.knit', 'DeltaAnnotatedToFullText')
62
'breezy.knit', 'DeltaAnnotatedToFullText')
63
63
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
64
'bzrlib.knit', 'FTAnnotatedToUnannotated')
64
'breezy.knit', 'FTAnnotatedToUnannotated')
65
65
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
66
'bzrlib.knit', 'FTAnnotatedToFullText')
66
'breezy.knit', 'FTAnnotatedToFullText')
67
67
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
68
# 'bzrlib.knit', 'FTAnnotatedToChunked')
68
# 'breezy.knit', 'FTAnnotatedToChunked')
71
71
class ContentFactory(object):
209
class _MPDiffGenerator(object):
210
"""Pull out the functionality for generating mp_diffs."""
212
def __init__(self, vf, keys):
214
# This is the order the keys were requested in
215
self.ordered_keys = tuple(keys)
216
# keys + their parents, what we need to compute the diffs
217
self.needed_keys = ()
218
# Map from key: mp_diff
220
# Map from key: parents_needed (may have ghosts)
222
# Parents that aren't present
223
self.ghost_parents = ()
224
# Map from parent_key => number of children for this text
226
# Content chunks that are cached while we still need them
229
def _find_needed_keys(self):
230
"""Find the set of keys we need to request.
232
This includes all the original keys passed in, and the non-ghost
233
parents of those keys.
235
:return: (needed_keys, refcounts)
236
needed_keys is the set of all texts we need to extract
237
refcounts is a dict of {key: num_children} letting us know when we
238
no longer need to cache a given parent text
240
# All the keys and their parents
241
needed_keys = set(self.ordered_keys)
242
parent_map = self.vf.get_parent_map(needed_keys)
243
self.parent_map = parent_map
244
# TODO: Should we be using a different construct here? I think this
245
# uses difference_update internally, and we expect the result to
247
missing_keys = needed_keys.difference(parent_map)
249
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
250
# Parents that might be missing. They are allowed to be ghosts, but we
251
# should check for them
253
setdefault = refcounts.setdefault
255
for child_key, parent_keys in viewitems(parent_map):
257
# parent_keys may be None if a given VersionedFile claims to
258
# not support graph operations.
260
just_parents.update(parent_keys)
261
needed_keys.update(parent_keys)
262
for p in parent_keys:
263
refcounts[p] = setdefault(p, 0) + 1
264
just_parents.difference_update(parent_map)
265
# Remove any parents that are actually ghosts from the needed set
266
self.present_parents = set(self.vf.get_parent_map(just_parents))
267
self.ghost_parents = just_parents.difference(self.present_parents)
268
needed_keys.difference_update(self.ghost_parents)
269
self.needed_keys = needed_keys
270
self.refcounts = refcounts
271
return needed_keys, refcounts
273
def _compute_diff(self, key, parent_lines, lines):
274
"""Compute a single mp_diff, and store it in self._diffs"""
275
if len(parent_lines) > 0:
276
# XXX: _extract_blocks is not usefully defined anywhere...
277
# It was meant to extract the left-parent diff without
278
# having to recompute it for Knit content (pack-0.92,
279
# etc). That seems to have regressed somewhere
280
left_parent_blocks = self.vf._extract_blocks(key,
281
parent_lines[0], lines)
283
left_parent_blocks = None
284
diff = multiparent.MultiParent.from_lines(lines,
285
parent_lines, left_parent_blocks)
286
self.diffs[key] = diff
288
def _process_one_record(self, key, this_chunks):
290
if key in self.parent_map:
291
# This record should be ready to diff, since we requested
292
# content in 'topological' order
293
parent_keys = self.parent_map.pop(key)
294
# If a VersionedFile claims 'no-graph' support, then it may return
295
# None for any parent request, so we replace it with an empty tuple
296
if parent_keys is None:
299
for p in parent_keys:
300
# Alternatively we could check p not in self.needed_keys, but
301
# ghost_parents should be tiny versus huge
302
if p in self.ghost_parents:
304
refcount = self.refcounts[p]
305
if refcount == 1: # Last child reference
306
self.refcounts.pop(p)
307
parent_chunks = self.chunks.pop(p)
309
self.refcounts[p] = refcount - 1
310
parent_chunks = self.chunks[p]
311
p_lines = osutils.chunks_to_lines(parent_chunks)
312
# TODO: Should we cache the line form? We did the
313
# computation to get it, but storing it this way will
314
# be less memory efficient...
315
parent_lines.append(p_lines)
317
lines = osutils.chunks_to_lines(this_chunks)
318
# Since we needed the lines, we'll go ahead and cache them this way
320
self._compute_diff(key, parent_lines, lines)
322
# Is this content required for any more children?
323
if key in self.refcounts:
324
self.chunks[key] = this_chunks
326
def _extract_diffs(self):
327
needed_keys, refcounts = self._find_needed_keys()
328
for record in self.vf.get_record_stream(needed_keys,
329
'topological', True):
330
if record.storage_kind == 'absent':
331
raise errors.RevisionNotPresent(record.key, self.vf)
332
self._process_one_record(record.key,
333
record.get_bytes_as('chunked'))
335
def compute_diffs(self):
336
self._extract_diffs()
337
dpop = self.diffs.pop
338
return [dpop(k) for k in self.ordered_keys]
209
341
class VersionedFile(object):
210
342
"""Versioned text file storage.
401
537
for version, parent_ids, expected_sha1, mpdiff in records:
402
538
needed_parents.update(p for p in parent_ids
403
539
if not mpvf.has_version(p))
404
present_parents = set(self.get_parent_map(needed_parents).keys())
540
present_parents = set(self.get_parent_map(needed_parents))
405
541
for parent_id, lines in zip(present_parents,
406
self._get_lf_split_line_list(present_parents)):
542
self._get_lf_split_line_list(present_parents)):
407
543
mpvf.add_version(lines, parent_id, [])
408
for (version, parent_ids, expected_sha1, mpdiff), lines in\
409
zip(records, mpvf.get_line_list(versions)):
544
for (version, parent_ids, expected_sha1, mpdiff), lines in zip(
545
records, mpvf.get_line_list(versions)):
410
546
if len(parent_ids) == 1:
411
547
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
412
548
mpvf.get_diff(parent_ids[0]).num_lines()))
790
926
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
791
927
may have a different length key-size, but that size will be constant for
792
all texts added to or retrieved from it. For instance, bzrlib uses
928
all texts added to or retrieved from it. For instance, breezy uses
793
929
instances with a key-size of 2 for storing user files in a repository, with
794
930
the first element the fileid, and the second the version of that file.
796
932
The use of tuples allows a single code base to support several different
797
933
uses with only the mapping logic changing from instance to instance.
935
:ivar _immediate_fallback_vfs: For subclasses that support stacking,
936
this is a list of other VersionedFiles immediately underneath this
937
one. They may in turn each have further fallbacks.
800
940
def add_lines(self, key, parents, lines, parent_texts=None,
1048
1188
def make_mpdiffs(self, keys):
1049
1189
"""Create multiparent diffs for specified keys."""
1050
keys_order = tuple(keys)
1051
keys = frozenset(keys)
1052
knit_keys = set(keys)
1053
parent_map = self.get_parent_map(keys)
1054
for parent_keys in parent_map.itervalues():
1056
knit_keys.update(parent_keys)
1057
missing_keys = keys - set(parent_map)
1059
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1060
# We need to filter out ghosts, because we can't diff against them.
1061
maybe_ghosts = knit_keys - keys
1062
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1063
knit_keys.difference_update(ghosts)
1065
chunks_to_lines = osutils.chunks_to_lines
1066
for record in self.get_record_stream(knit_keys, 'topological', True):
1067
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1068
# line_block_dict = {}
1069
# for parent, blocks in record.extract_line_blocks():
1070
# line_blocks[parent] = blocks
1071
# line_blocks[record.key] = line_block_dict
1073
for key in keys_order:
1075
parents = parent_map[key] or []
1076
# Note that filtering knit_keys can lead to a parent difference
1077
# between the creation and the application of the mpdiff.
1078
parent_lines = [lines[p] for p in parents if p in knit_keys]
1079
if len(parent_lines) > 0:
1080
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1083
left_parent_blocks = None
1084
diffs.append(multiparent.MultiParent.from_lines(target,
1085
parent_lines, left_parent_blocks))
1190
generator = _MPDiffGenerator(self, keys)
1191
return generator.compute_diffs()
1193
def get_annotator(self):
1194
return annotate.Annotator(self)
1088
1196
missing_keys = index._missing_keys_from_parent_map
1090
1198
def _extract_blocks(self, version_id, source, target):
1201
def _transitive_fallbacks(self):
1202
"""Return the whole stack of fallback versionedfiles.
1204
This VersionedFiles may have a list of fallbacks, but it doesn't
1205
necessarily know about the whole stack going down, and it can't know
1206
at open time because they may change after the objects are opened.
1209
for a_vfs in self._immediate_fallback_vfs:
1210
all_fallbacks.append(a_vfs)
1211
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1212
return all_fallbacks
1094
1215
class ThunkedVersionedFiles(VersionedFiles):
1095
1216
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1432
class VersionedFilesWithFallbacks(VersionedFiles):
1434
def without_fallbacks(self):
1435
"""Return a clone of this object without any fallbacks configured."""
1436
raise NotImplementedError(self.without_fallbacks)
1438
def add_fallback_versioned_files(self, a_versioned_files):
1439
"""Add a source of texts for texts not present in this knit.
1441
:param a_versioned_files: A VersionedFiles object.
1443
raise NotImplementedError(self.add_fallback_versioned_files)
1445
def get_known_graph_ancestry(self, keys):
1446
"""Get a KnownGraph instance with the ancestry of keys."""
1447
parent_map, missing_keys = self._index.find_ancestry(keys)
1448
for fallback in self._transitive_fallbacks():
1449
if not missing_keys:
1451
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1453
parent_map.update(f_parent_map)
1454
missing_keys = f_missing_keys
1455
kg = _mod_graph.KnownGraph(parent_map)
1314
1459
class _PlanMergeVersionedFile(VersionedFiles):
1315
1460
"""A VersionedFile for uncommitted and committed texts.
1773
class NoDupeAddLinesDecorator(object):
1774
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1778
def __init__(self, store):
1781
def add_lines(self, key, parents, lines, parent_texts=None,
1782
left_matching_blocks=None, nostore_sha=None, random_id=False,
1783
check_content=True):
1784
"""See VersionedFiles.add_lines.
1786
This implementation may return None as the third element of the return
1787
value when the original store wouldn't.
1790
raise NotImplementedError(
1791
"NoDupeAddLinesDecorator.add_lines does not implement the "
1792
"nostore_sha behaviour.")
1794
sha1 = osutils.sha_strings(lines)
1795
key = ("sha1:" + sha1,)
1798
if key in self._store.get_parent_map([key]):
1799
# This key has already been inserted, so don't do it again.
1801
sha1 = osutils.sha_strings(lines)
1802
return sha1, sum(map(len, lines)), None
1803
return self._store.add_lines(key, parents, lines,
1804
parent_texts=parent_texts,
1805
left_matching_blocks=left_matching_blocks,
1806
nostore_sha=nostore_sha, random_id=random_id,
1807
check_content=check_content)
1809
def __getattr__(self, name):
1810
return getattr(self._store, name)
1627
1813
def network_bytes_to_kind_and_offset(network_bytes):
1628
1814
"""Strip of a record kind from the front of network_bytes.
1720
1906
for prefix in sorted(per_prefix_map):
1721
1907
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1722
1908
return present_keys
1911
class _KeyRefs(object):
1913
def __init__(self, track_new_keys=False):
1914
# dict mapping 'key' to 'set of keys referring to that key'
1917
# set remembering all new keys
1918
self.new_keys = set()
1920
self.new_keys = None
1926
self.new_keys.clear()
1928
def add_references(self, key, refs):
1929
# Record the new references
1930
for referenced in refs:
1932
needed_by = self.refs[referenced]
1934
needed_by = self.refs[referenced] = set()
1936
# Discard references satisfied by the new key
1939
def get_new_keys(self):
1940
return self.new_keys
1942
def get_unsatisfied_refs(self):
1943
return self.refs.keys()
1945
def _satisfy_refs_for_key(self, key):
1949
# No keys depended on this key. That's ok.
1952
def add_key(self, key):
1953
# satisfy refs for key, and remember that we've seen this key.
1954
self._satisfy_refs_for_key(key)
1955
if self.new_keys is not None:
1956
self.new_keys.add(key)
1958
def satisfy_refs_for_keys(self, keys):
1960
self._satisfy_refs_for_key(key)
1962
def get_referrers(self):
1963
return set(itertools.chain.from_iterable(viewvalues(self.refs)))