45
from bzrlib.graph import DictParentsProvider, Graph, StackedParentsProvider
46
from bzrlib.transport.memory import MemoryTransport
48
from bzrlib.registry import Registry
49
from bzrlib.symbol_versioning import *
50
from bzrlib.textmerge import TextMerge
51
from bzrlib import bencode
44
from .registry import Registry
48
from .textmerge import TextMerge
54
51
adapter_registry = Registry()
55
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
52
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'breezy.knit',
56
53
'DeltaPlainToFullText')
57
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
54
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'breezy.knit',
58
55
'FTPlainToFullText')
59
56
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
60
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
57
'breezy.knit', 'DeltaAnnotatedToUnannotated')
61
58
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
62
'bzrlib.knit', 'DeltaAnnotatedToFullText')
59
'breezy.knit', 'DeltaAnnotatedToFullText')
63
60
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
64
'bzrlib.knit', 'FTAnnotatedToUnannotated')
61
'breezy.knit', 'FTAnnotatedToUnannotated')
65
62
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
66
'bzrlib.knit', 'FTAnnotatedToFullText')
63
'breezy.knit', 'FTAnnotatedToFullText')
67
64
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
68
# 'bzrlib.knit', 'FTAnnotatedToChunked')
65
# 'breezy.knit', 'FTAnnotatedToChunked')
71
68
class ContentFactory(object):
206
class _MPDiffGenerator(object):
207
"""Pull out the functionality for generating mp_diffs."""
209
def __init__(self, vf, keys):
211
# This is the order the keys were requested in
212
self.ordered_keys = tuple(keys)
213
# keys + their parents, what we need to compute the diffs
214
self.needed_keys = ()
215
# Map from key: mp_diff
217
# Map from key: parents_needed (may have ghosts)
219
# Parents that aren't present
220
self.ghost_parents = ()
221
# Map from parent_key => number of children for this text
223
# Content chunks that are cached while we still need them
226
def _find_needed_keys(self):
227
"""Find the set of keys we need to request.
229
This includes all the original keys passed in, and the non-ghost
230
parents of those keys.
232
:return: (needed_keys, refcounts)
233
needed_keys is the set of all texts we need to extract
234
refcounts is a dict of {key: num_children} letting us know when we
235
no longer need to cache a given parent text
237
# All the keys and their parents
238
needed_keys = set(self.ordered_keys)
239
parent_map = self.vf.get_parent_map(needed_keys)
240
self.parent_map = parent_map
241
# TODO: Should we be using a different construct here? I think this
242
# uses difference_update internally, and we expect the result to
244
missing_keys = needed_keys.difference(parent_map)
246
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
247
# Parents that might be missing. They are allowed to be ghosts, but we
248
# should check for them
250
setdefault = refcounts.setdefault
252
for child_key, parent_keys in parent_map.iteritems():
254
# parent_keys may be None if a given VersionedFile claims to
255
# not support graph operations.
257
just_parents.update(parent_keys)
258
needed_keys.update(parent_keys)
259
for p in parent_keys:
260
refcounts[p] = setdefault(p, 0) + 1
261
just_parents.difference_update(parent_map)
262
# Remove any parents that are actually ghosts from the needed set
263
self.present_parents = set(self.vf.get_parent_map(just_parents))
264
self.ghost_parents = just_parents.difference(self.present_parents)
265
needed_keys.difference_update(self.ghost_parents)
266
self.needed_keys = needed_keys
267
self.refcounts = refcounts
268
return needed_keys, refcounts
270
def _compute_diff(self, key, parent_lines, lines):
271
"""Compute a single mp_diff, and store it in self._diffs"""
272
if len(parent_lines) > 0:
273
# XXX: _extract_blocks is not usefully defined anywhere...
274
# It was meant to extract the left-parent diff without
275
# having to recompute it for Knit content (pack-0.92,
276
# etc). That seems to have regressed somewhere
277
left_parent_blocks = self.vf._extract_blocks(key,
278
parent_lines[0], lines)
280
left_parent_blocks = None
281
diff = multiparent.MultiParent.from_lines(lines,
282
parent_lines, left_parent_blocks)
283
self.diffs[key] = diff
285
def _process_one_record(self, key, this_chunks):
287
if key in self.parent_map:
288
# This record should be ready to diff, since we requested
289
# content in 'topological' order
290
parent_keys = self.parent_map.pop(key)
291
# If a VersionedFile claims 'no-graph' support, then it may return
292
# None for any parent request, so we replace it with an empty tuple
293
if parent_keys is None:
296
for p in parent_keys:
297
# Alternatively we could check p not in self.needed_keys, but
298
# ghost_parents should be tiny versus huge
299
if p in self.ghost_parents:
301
refcount = self.refcounts[p]
302
if refcount == 1: # Last child reference
303
self.refcounts.pop(p)
304
parent_chunks = self.chunks.pop(p)
306
self.refcounts[p] = refcount - 1
307
parent_chunks = self.chunks[p]
308
p_lines = osutils.chunks_to_lines(parent_chunks)
309
# TODO: Should we cache the line form? We did the
310
# computation to get it, but storing it this way will
311
# be less memory efficient...
312
parent_lines.append(p_lines)
314
lines = osutils.chunks_to_lines(this_chunks)
315
# Since we needed the lines, we'll go ahead and cache them this way
317
self._compute_diff(key, parent_lines, lines)
319
# Is this content required for any more children?
320
if key in self.refcounts:
321
self.chunks[key] = this_chunks
323
def _extract_diffs(self):
324
needed_keys, refcounts = self._find_needed_keys()
325
for record in self.vf.get_record_stream(needed_keys,
326
'topological', True):
327
if record.storage_kind == 'absent':
328
raise errors.RevisionNotPresent(record.key, self.vf)
329
self._process_one_record(record.key,
330
record.get_bytes_as('chunked'))
332
def compute_diffs(self):
333
self._extract_diffs()
334
dpop = self.diffs.pop
335
return [dpop(k) for k in self.ordered_keys]
209
338
class VersionedFile(object):
210
339
"""Versioned text file storage.
403
536
if not mpvf.has_version(p))
404
537
present_parents = set(self.get_parent_map(needed_parents).keys())
405
538
for parent_id, lines in zip(present_parents,
406
self._get_lf_split_line_list(present_parents)):
539
self._get_lf_split_line_list(present_parents)):
407
540
mpvf.add_version(lines, parent_id, [])
408
for (version, parent_ids, expected_sha1, mpdiff), lines in\
409
zip(records, mpvf.get_line_list(versions)):
541
for (version, parent_ids, expected_sha1, mpdiff), lines in zip(
542
records, mpvf.get_line_list(versions)):
410
543
if len(parent_ids) == 1:
411
544
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
412
545
mpvf.get_diff(parent_ids[0]).num_lines()))
790
923
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
791
924
may have a different length key-size, but that size will be constant for
792
all texts added to or retrieved from it. For instance, bzrlib uses
925
all texts added to or retrieved from it. For instance, breezy uses
793
926
instances with a key-size of 2 for storing user files in a repository, with
794
927
the first element the fileid, and the second the version of that file.
796
929
The use of tuples allows a single code base to support several different
797
930
uses with only the mapping logic changing from instance to instance.
932
:ivar _immediate_fallback_vfs: For subclasses that support stacking,
933
this is a list of other VersionedFiles immediately underneath this
934
one. They may in turn each have further fallbacks.
800
937
def add_lines(self, key, parents, lines, parent_texts=None,
1048
1185
def make_mpdiffs(self, keys):
1049
1186
"""Create multiparent diffs for specified keys."""
1050
keys_order = tuple(keys)
1051
keys = frozenset(keys)
1052
knit_keys = set(keys)
1053
parent_map = self.get_parent_map(keys)
1054
for parent_keys in parent_map.itervalues():
1056
knit_keys.update(parent_keys)
1057
missing_keys = keys - set(parent_map)
1059
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1060
# We need to filter out ghosts, because we can't diff against them.
1061
maybe_ghosts = knit_keys - keys
1062
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1063
knit_keys.difference_update(ghosts)
1065
chunks_to_lines = osutils.chunks_to_lines
1066
for record in self.get_record_stream(knit_keys, 'topological', True):
1067
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1068
# line_block_dict = {}
1069
# for parent, blocks in record.extract_line_blocks():
1070
# line_blocks[parent] = blocks
1071
# line_blocks[record.key] = line_block_dict
1073
for key in keys_order:
1075
parents = parent_map[key] or []
1076
# Note that filtering knit_keys can lead to a parent difference
1077
# between the creation and the application of the mpdiff.
1078
parent_lines = [lines[p] for p in parents if p in knit_keys]
1079
if len(parent_lines) > 0:
1080
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1083
left_parent_blocks = None
1084
diffs.append(multiparent.MultiParent.from_lines(target,
1085
parent_lines, left_parent_blocks))
1187
generator = _MPDiffGenerator(self, keys)
1188
return generator.compute_diffs()
1190
def get_annotator(self):
1191
return annotate.Annotator(self)
1088
1193
missing_keys = index._missing_keys_from_parent_map
1090
1195
def _extract_blocks(self, version_id, source, target):
1198
def _transitive_fallbacks(self):
1199
"""Return the whole stack of fallback versionedfiles.
1201
This VersionedFiles may have a list of fallbacks, but it doesn't
1202
necessarily know about the whole stack going down, and it can't know
1203
at open time because they may change after the objects are opened.
1206
for a_vfs in self._immediate_fallback_vfs:
1207
all_fallbacks.append(a_vfs)
1208
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1209
return all_fallbacks
1094
1212
class ThunkedVersionedFiles(VersionedFiles):
1095
1213
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1429
class VersionedFilesWithFallbacks(VersionedFiles):
1431
def without_fallbacks(self):
1432
"""Return a clone of this object without any fallbacks configured."""
1433
raise NotImplementedError(self.without_fallbacks)
1435
def add_fallback_versioned_files(self, a_versioned_files):
1436
"""Add a source of texts for texts not present in this knit.
1438
:param a_versioned_files: A VersionedFiles object.
1440
raise NotImplementedError(self.add_fallback_versioned_files)
1442
def get_known_graph_ancestry(self, keys):
1443
"""Get a KnownGraph instance with the ancestry of keys."""
1444
parent_map, missing_keys = self._index.find_ancestry(keys)
1445
for fallback in self._transitive_fallbacks():
1446
if not missing_keys:
1448
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1450
parent_map.update(f_parent_map)
1451
missing_keys = f_missing_keys
1452
kg = _mod_graph.KnownGraph(parent_map)
1314
1456
class _PlanMergeVersionedFile(VersionedFiles):
1315
1457
"""A VersionedFile for uncommitted and committed texts.
1770
class NoDupeAddLinesDecorator(object):
1771
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1775
def __init__(self, store):
1778
def add_lines(self, key, parents, lines, parent_texts=None,
1779
left_matching_blocks=None, nostore_sha=None, random_id=False,
1780
check_content=True):
1781
"""See VersionedFiles.add_lines.
1783
This implementation may return None as the third element of the return
1784
value when the original store wouldn't.
1787
raise NotImplementedError(
1788
"NoDupeAddLinesDecorator.add_lines does not implement the "
1789
"nostore_sha behaviour.")
1791
sha1 = osutils.sha_strings(lines)
1792
key = ("sha1:" + sha1,)
1795
if key in self._store.get_parent_map([key]):
1796
# This key has already been inserted, so don't do it again.
1798
sha1 = osutils.sha_strings(lines)
1799
return sha1, sum(map(len, lines)), None
1800
return self._store.add_lines(key, parents, lines,
1801
parent_texts=parent_texts,
1802
left_matching_blocks=left_matching_blocks,
1803
nostore_sha=nostore_sha, random_id=random_id,
1804
check_content=check_content)
1806
def __getattr__(self, name):
1807
return getattr(self._store, name)
1627
1810
def network_bytes_to_kind_and_offset(network_bytes):
1628
1811
"""Strip of a record kind from the front of network_bytes.
1720
1903
for prefix in sorted(per_prefix_map):
1721
1904
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1722
1905
return present_keys
1908
class _KeyRefs(object):
1910
def __init__(self, track_new_keys=False):
1911
# dict mapping 'key' to 'set of keys referring to that key'
1914
# set remembering all new keys
1915
self.new_keys = set()
1917
self.new_keys = None
1923
self.new_keys.clear()
1925
def add_references(self, key, refs):
1926
# Record the new references
1927
for referenced in refs:
1929
needed_by = self.refs[referenced]
1931
needed_by = self.refs[referenced] = set()
1933
# Discard references satisfied by the new key
1936
def get_new_keys(self):
1937
return self.new_keys
1939
def get_unsatisfied_refs(self):
1940
return self.refs.iterkeys()
1942
def _satisfy_refs_for_key(self, key):
1946
# No keys depended on this key. That's ok.
1949
def add_key(self, key):
1950
# satisfy refs for key, and remember that we've seen this key.
1951
self._satisfy_refs_for_key(key)
1952
if self.new_keys is not None:
1953
self.new_keys.add(key)
1955
def satisfy_refs_for_keys(self, keys):
1957
self._satisfy_refs_for_key(key)
1959
def get_referrers(self):
1961
for referrers in self.refs.itervalues():
1962
result.update(referrers)