17
20
"""Versioned text file storage api."""
19
22
from copy import copy
20
from io import BytesIO
23
from cStringIO import StringIO
24
26
from zlib import adler32
26
from ..lazy_import import lazy_import
28
from bzrlib.lazy_import import lazy_import
27
29
lazy_import(globals(), """
31
35
graph as _mod_graph,
38
from breezy.bzr import (
45
from bzrlib.graph import DictParentsProvider, Graph, StackedParentsProvider
46
from bzrlib.transport.memory import MemoryTransport
47
from ..registry import Registry
48
from ..textmerge import TextMerge
48
from bzrlib.inter import InterObject
49
from bzrlib.registry import Registry
50
from bzrlib.symbol_versioning import *
51
from bzrlib.textmerge import TextMerge
52
from bzrlib import bencode
51
55
adapter_registry = Registry()
56
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
57
'DeltaPlainToFullText')
58
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
52
60
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
53
'breezy.bzr.knit', 'DeltaAnnotatedToUnannotated')
61
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
62
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
63
'bzrlib.knit', 'DeltaAnnotatedToFullText')
54
64
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
55
'breezy.bzr.knit', 'FTAnnotatedToUnannotated')
56
for target_storage_kind in ('fulltext', 'chunked', 'lines'):
57
adapter_registry.register_lazy(('knit-delta-gz', target_storage_kind), 'breezy.bzr.knit',
58
'DeltaPlainToFullText')
59
adapter_registry.register_lazy(('knit-ft-gz', target_storage_kind), 'breezy.bzr.knit',
61
adapter_registry.register_lazy(('knit-annotated-ft-gz', target_storage_kind),
62
'breezy.bzr.knit', 'FTAnnotatedToFullText')
63
adapter_registry.register_lazy(('knit-annotated-delta-gz', target_storage_kind),
64
'breezy.bzr.knit', 'DeltaAnnotatedToFullText')
67
class UnavailableRepresentation(errors.InternalBzrError):
69
_fmt = ("The encoding '%(wanted)s' is not available for key %(key)s which "
70
"is encoded as '%(native)s'.")
72
def __init__(self, key, wanted, native):
73
errors.InternalBzrError.__init__(self)
79
class ExistingContent(errors.BzrError):
81
_fmt = "The content being inserted is already present."
65
'bzrlib.knit', 'FTAnnotatedToUnannotated')
66
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
67
'bzrlib.knit', 'FTAnnotatedToFullText')
68
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
69
# 'bzrlib.knit', 'FTAnnotatedToChunked')
84
72
class ContentFactory(object):
85
73
"""Abstract interface for insertion and retrieval from a VersionedFile.
87
75
:ivar sha1: None, or the sha1 of the content fulltext.
88
:ivar size: None, or the size of the content fulltext.
89
76
:ivar storage_kind: The native storage kind of this factory. One of
90
77
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
91
78
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
122
107
:ivar parents: A tuple of parent keys for self.key. If the object has
123
108
no parent information, None (as opposed to () for an empty list of
125
:ivar chunks_are_lines: Whether chunks are lines.
128
def __init__(self, key, parents, sha1, chunks, chunks_are_lines=None):
112
def __init__(self, key, parents, sha1, chunks):
129
113
"""Create a ContentFactory."""
131
self.size = sum(map(len, chunks))
132
115
self.storage_kind = 'chunked'
134
117
self.parents = parents
135
118
self._chunks = chunks
136
self._chunks_are_lines = chunks_are_lines
138
120
def get_bytes_as(self, storage_kind):
139
121
if storage_kind == 'chunked':
140
122
return self._chunks
141
123
elif storage_kind == 'fulltext':
142
return b''.join(self._chunks)
143
elif storage_kind == 'lines':
144
if self._chunks_are_lines:
146
return list(osutils.chunks_to_lines(self._chunks))
147
raise UnavailableRepresentation(self.key, storage_kind,
124
return ''.join(self._chunks)
125
raise errors.UnavailableRepresentation(self.key, storage_kind,
150
def iter_bytes_as(self, storage_kind):
151
if storage_kind == 'chunked':
152
return iter(self._chunks)
153
elif storage_kind == 'lines':
154
if self._chunks_are_lines:
155
return iter(self._chunks)
156
return iter(osutils.chunks_to_lines(self._chunks))
157
raise UnavailableRepresentation(self.key, storage_kind,
160
129
class FulltextContentFactory(ContentFactory):
161
130
"""Static data content factory.
189
155
return self._text
190
156
elif storage_kind == 'chunked':
191
157
return [self._text]
192
elif storage_kind == 'lines':
193
return osutils.split_lines(self._text)
194
raise UnavailableRepresentation(self.key, storage_kind,
197
def iter_bytes_as(self, storage_kind):
198
if storage_kind == 'chunked':
199
return iter([self._text])
200
elif storage_kind == 'lines':
201
return iter(osutils.split_lines(self._text))
202
raise UnavailableRepresentation(self.key, storage_kind,
206
class FileContentFactory(ContentFactory):
207
"""File-based content factory.
210
def __init__(self, key, parents, fileobj, sha1=None, size=None):
212
self.parents = parents
214
self.storage_kind = 'file'
218
def get_bytes_as(self, storage_kind):
220
if storage_kind == 'fulltext':
221
return self.file.read()
222
elif storage_kind == 'chunked':
223
return list(osutils.file_iterator(self.file))
224
elif storage_kind == 'lines':
225
return list(self.file.readlines())
226
raise UnavailableRepresentation(self.key, storage_kind,
229
def iter_bytes_as(self, storage_kind):
231
if storage_kind == 'chunked':
232
return osutils.file_iterator(self.file)
233
elif storage_kind == 'lines':
235
raise UnavailableRepresentation(self.key, storage_kind,
158
raise errors.UnavailableRepresentation(self.key, storage_kind,
239
162
class AbsentContentFactory(ContentFactory):
294
class _MPDiffGenerator(object):
295
"""Pull out the functionality for generating mp_diffs."""
297
def __init__(self, vf, keys):
299
# This is the order the keys were requested in
300
self.ordered_keys = tuple(keys)
301
# keys + their parents, what we need to compute the diffs
302
self.needed_keys = ()
303
# Map from key: mp_diff
305
# Map from key: parents_needed (may have ghosts)
307
# Parents that aren't present
308
self.ghost_parents = ()
309
# Map from parent_key => number of children for this text
311
# Content chunks that are cached while we still need them
314
def _find_needed_keys(self):
315
"""Find the set of keys we need to request.
317
This includes all the original keys passed in, and the non-ghost
318
parents of those keys.
320
:return: (needed_keys, refcounts)
321
needed_keys is the set of all texts we need to extract
322
refcounts is a dict of {key: num_children} letting us know when we
323
no longer need to cache a given parent text
325
# All the keys and their parents
326
needed_keys = set(self.ordered_keys)
327
parent_map = self.vf.get_parent_map(needed_keys)
328
self.parent_map = parent_map
329
# TODO: Should we be using a different construct here? I think this
330
# uses difference_update internally, and we expect the result to
332
missing_keys = needed_keys.difference(parent_map)
334
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
335
# Parents that might be missing. They are allowed to be ghosts, but we
336
# should check for them
338
setdefault = refcounts.setdefault
340
for child_key, parent_keys in parent_map.items():
342
# parent_keys may be None if a given VersionedFile claims to
343
# not support graph operations.
345
just_parents.update(parent_keys)
346
needed_keys.update(parent_keys)
347
for p in parent_keys:
348
refcounts[p] = setdefault(p, 0) + 1
349
just_parents.difference_update(parent_map)
350
# Remove any parents that are actually ghosts from the needed set
351
self.present_parents = set(self.vf.get_parent_map(just_parents))
352
self.ghost_parents = just_parents.difference(self.present_parents)
353
needed_keys.difference_update(self.ghost_parents)
354
self.needed_keys = needed_keys
355
self.refcounts = refcounts
356
return needed_keys, refcounts
358
def _compute_diff(self, key, parent_lines, lines):
359
"""Compute a single mp_diff, and store it in self._diffs"""
360
if len(parent_lines) > 0:
361
# XXX: _extract_blocks is not usefully defined anywhere...
362
# It was meant to extract the left-parent diff without
363
# having to recompute it for Knit content (pack-0.92,
364
# etc). That seems to have regressed somewhere
365
left_parent_blocks = self.vf._extract_blocks(key,
366
parent_lines[0], lines)
368
left_parent_blocks = None
369
diff = multiparent.MultiParent.from_lines(lines,
370
parent_lines, left_parent_blocks)
371
self.diffs[key] = diff
373
def _process_one_record(self, key, this_chunks):
375
if key in self.parent_map:
376
# This record should be ready to diff, since we requested
377
# content in 'topological' order
378
parent_keys = self.parent_map.pop(key)
379
# If a VersionedFile claims 'no-graph' support, then it may return
380
# None for any parent request, so we replace it with an empty tuple
381
if parent_keys is None:
384
for p in parent_keys:
385
# Alternatively we could check p not in self.needed_keys, but
386
# ghost_parents should be tiny versus huge
387
if p in self.ghost_parents:
389
refcount = self.refcounts[p]
390
if refcount == 1: # Last child reference
391
self.refcounts.pop(p)
392
parent_chunks = self.chunks.pop(p)
394
self.refcounts[p] = refcount - 1
395
parent_chunks = self.chunks[p]
396
p_lines = osutils.chunks_to_lines(parent_chunks)
397
# TODO: Should we cache the line form? We did the
398
# computation to get it, but storing it this way will
399
# be less memory efficient...
400
parent_lines.append(p_lines)
402
lines = osutils.chunks_to_lines(this_chunks)
403
# Since we needed the lines, we'll go ahead and cache them this way
405
self._compute_diff(key, parent_lines, lines)
407
# Is this content required for any more children?
408
if key in self.refcounts:
409
self.chunks[key] = this_chunks
411
def _extract_diffs(self):
412
needed_keys, refcounts = self._find_needed_keys()
413
for record in self.vf.get_record_stream(needed_keys,
414
'topological', True):
415
if record.storage_kind == 'absent':
416
raise errors.RevisionNotPresent(record.key, self.vf)
417
self._process_one_record(record.key,
418
record.get_bytes_as('chunked'))
420
def compute_diffs(self):
421
self._extract_diffs()
422
dpop = self.diffs.pop
423
return [dpop(k) for k in self.ordered_keys]
426
210
class VersionedFile(object):
427
211
"""Versioned text file storage.
518
302
self._check_write_ok()
519
303
return self._add_lines(version_id, parents, lines, parent_texts,
520
left_matching_blocks, nostore_sha, random_id, check_content)
304
left_matching_blocks, nostore_sha, random_id, check_content)
522
306
def _add_lines(self, version_id, parents, lines, parent_texts,
523
left_matching_blocks, nostore_sha, random_id, check_content):
307
left_matching_blocks, nostore_sha, random_id, check_content):
524
308
"""Helper to do the class specific add_lines."""
525
309
raise NotImplementedError(self.add_lines)
527
311
def add_lines_with_ghosts(self, version_id, parents, lines,
528
parent_texts=None, nostore_sha=None, random_id=False,
529
check_content=True, left_matching_blocks=None):
312
parent_texts=None, nostore_sha=None, random_id=False,
313
check_content=True, left_matching_blocks=None):
530
314
"""Add lines to the versioned file, allowing ghosts to be present.
532
316
This takes the same parameters as add_lines and returns the same.
534
318
self._check_write_ok()
535
319
return self._add_lines_with_ghosts(version_id, parents, lines,
536
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
320
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
538
322
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
539
nostore_sha, random_id, check_content, left_matching_blocks):
323
nostore_sha, random_id, check_content, left_matching_blocks):
540
324
"""Helper to do class specific add_lines_with_ghosts."""
541
325
raise NotImplementedError(self.add_lines_with_ghosts)
622
402
for version, parent_ids, expected_sha1, mpdiff in records:
623
403
needed_parents.update(p for p in parent_ids
624
404
if not mpvf.has_version(p))
625
present_parents = set(self.get_parent_map(needed_parents))
405
present_parents = set(self.get_parent_map(needed_parents).keys())
626
406
for parent_id, lines in zip(present_parents,
627
self._get_lf_split_line_list(present_parents)):
407
self._get_lf_split_line_list(present_parents)):
628
408
mpvf.add_version(lines, parent_id, [])
629
for (version, parent_ids, expected_sha1, mpdiff), lines in zip(
630
records, mpvf.get_line_list(versions)):
409
for (version, parent_ids, expected_sha1, mpdiff), lines in\
410
zip(records, mpvf.get_line_list(versions)):
631
411
if len(parent_ids) == 1:
632
412
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
633
mpvf.get_diff(parent_ids[0]).num_lines()))
413
mpvf.get_diff(parent_ids[0]).num_lines()))
635
415
left_matching_blocks = None
637
417
_, _, version_text = self.add_lines_with_ghosts(version,
638
parent_ids, lines, vf_parents,
639
left_matching_blocks=left_matching_blocks)
418
parent_ids, lines, vf_parents,
419
left_matching_blocks=left_matching_blocks)
640
420
except NotImplementedError:
641
421
# The vf can't handle ghosts, so add lines normally, which will
642
422
# (reasonably) fail if there are ghosts in the data.
643
423
_, _, version_text = self.add_lines(version,
644
parent_ids, lines, vf_parents,
645
left_matching_blocks=left_matching_blocks)
424
parent_ids, lines, vf_parents,
425
left_matching_blocks=left_matching_blocks)
646
426
vf_parents[version] = version_text
647
427
sha1s = self.get_sha1s(versions)
648
428
for version, parent_ids, expected_sha1, mpdiff in records:
675
455
raise NotImplementedError(self.get_lines)
677
457
def _get_lf_split_line_list(self, version_ids):
678
return [BytesIO(t).readlines() for t in self.get_texts(version_ids)]
458
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
680
def get_ancestry(self, version_ids):
460
def get_ancestry(self, version_ids, topo_sorted=True):
681
461
"""Return a list of all ancestors of given version(s). This
682
462
will not include the null revision.
464
This list will not be topologically sorted if topo_sorted=False is
684
467
Must raise RevisionNotPresent if any of the given versions are
685
468
not present in file history."""
469
if isinstance(version_ids, basestring):
470
version_ids = [version_ids]
686
471
raise NotImplementedError(self.get_ancestry)
688
473
def get_ancestry_with_ghosts(self, version_ids):
796
581
def add_lines(self, key, parents, lines, parent_texts=None,
797
left_matching_blocks=None, nostore_sha=None, random_id=False,
582
left_matching_blocks=None, nostore_sha=None, random_id=False,
799
584
self.calls.append(("add_lines", key, parents, lines, parent_texts,
800
left_matching_blocks, nostore_sha, random_id, check_content))
585
left_matching_blocks, nostore_sha, random_id, check_content))
801
586
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
802
left_matching_blocks, nostore_sha, random_id, check_content)
804
def add_content(self, factory, parent_texts=None,
805
left_matching_blocks=None, nostore_sha=None, random_id=False,
807
self.calls.append(("add_content", factory, parent_texts,
808
left_matching_blocks, nostore_sha, random_id, check_content))
809
return self._backing_vf.add_content(
810
factory, parent_texts, left_matching_blocks, nostore_sha,
811
random_id, check_content)
587
left_matching_blocks, nostore_sha, random_id, check_content)
814
590
self._backing_vf.check()
969
745
This mapper is for use with a transport based backend.
972
_safe = bytearray(b"abcdefghijklmnopqrstuvwxyz0123456789-_@,.")
748
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
974
750
def _escape(self, prefix):
975
751
"""Turn a key element into a filesystem safe string.
977
This is similar to a plain urlutils.quote, except
753
This is similar to a plain urllib.quote, except
978
754
it uses specific safe characters, so that it doesn't
979
755
have to translate a lot of valid file ids.
981
757
# @ does not get escaped. This is because it is a valid
982
758
# filesystem character we use all the time, and it looks
983
759
# a lot better than seeing %40 all the time.
984
r = [(c in self._safe) and chr(c) or ('%%%02x' % c)
985
for c in bytearray(prefix)]
986
return ''.join(r).encode('ascii')
760
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
988
764
def _unescape(self, basename):
989
765
"""Escaped names are easily unescaped by urlutils."""
990
return urlutils.unquote(basename)
766
return urllib.unquote(basename)
993
769
def make_versioned_files_factory(versioned_file_factory, mapper):
1015
791
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
1016
792
may have a different length key-size, but that size will be constant for
1017
all texts added to or retrieved from it. For instance, breezy uses
793
all texts added to or retrieved from it. For instance, bzrlib uses
1018
794
instances with a key-size of 2 for storing user files in a repository, with
1019
795
the first element the fileid, and the second the version of that file.
1021
797
The use of tuples allows a single code base to support several different
1022
798
uses with only the mapping logic changing from instance to instance.
1024
:ivar _immediate_fallback_vfs: For subclasses that support stacking,
1025
this is a list of other VersionedFiles immediately underneath this
1026
one. They may in turn each have further fallbacks.
1029
801
def add_lines(self, key, parents, lines, parent_texts=None,
1030
left_matching_blocks=None, nostore_sha=None, random_id=False,
1031
check_content=True):
802
left_matching_blocks=None, nostore_sha=None, random_id=False,
1032
804
"""Add a text to the store.
1034
806
:param key: The key tuple of the text to add. If the last element is
1066
838
raise NotImplementedError(self.add_lines)
1068
def add_content(self, factory, parent_texts=None,
1069
left_matching_blocks=None, nostore_sha=None, random_id=False,
1070
check_content=True):
1071
"""Add a text to the store from a chunk iterable.
840
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
841
"""Add a text to the store.
843
This is a private function for use by CommitBuilder.
1073
845
:param key: The key tuple of the text to add. If the last element is
1074
846
None, a CHK string will be generated during the addition.
1075
847
:param parents: The parents key tuples of the text to add.
1076
:param chunk_iter: An iterable over bytestrings.
1077
:param parent_texts: An optional dictionary containing the opaque
1078
representations of some or all of the parents of version_id to
1079
allow delta optimisations. VERY IMPORTANT: the texts must be those
1080
returned by add_lines or data corruption can be caused.
1081
:param left_matching_blocks: a hint about which areas are common
1082
between the text and its left-hand-parent. The format is
1083
the SequenceMatcher.get_matching_blocks format.
848
:param text: A string containing the text to be committed.
1084
849
:param nostore_sha: Raise ExistingContent and do not add the lines to
1085
850
the versioned file if the digest of the lines matches this.
1086
851
:param random_id: If True a random id has been selected rather than
1115
885
if not mpvf.has_version(p))
1116
886
# It seems likely that adding all the present parents as fulltexts can
1117
887
# easily exhaust memory.
888
chunks_to_lines = osutils.chunks_to_lines
1118
889
for record in self.get_record_stream(needed_parents, 'unordered',
1120
891
if record.storage_kind == 'absent':
1122
mpvf.add_version(record.get_bytes_as('lines'), record.key, [])
1123
for (key, parent_keys, expected_sha1, mpdiff), lines in zip(
1124
records, mpvf.get_line_list(versions)):
893
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
895
for (key, parent_keys, expected_sha1, mpdiff), lines in\
896
zip(records, mpvf.get_line_list(versions)):
1125
897
if len(parent_keys) == 1:
1126
898
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
1127
mpvf.get_diff(parent_keys[0]).num_lines()))
899
mpvf.get_diff(parent_keys[0]).num_lines()))
1129
901
left_matching_blocks = None
1130
902
version_sha1, _, version_text = self.add_lines(key,
1131
parent_keys, lines, vf_parents,
1132
left_matching_blocks=left_matching_blocks)
903
parent_keys, lines, vf_parents,
904
left_matching_blocks=left_matching_blocks)
1133
905
if version_sha1 != expected_sha1:
1134
906
raise errors.VersionedFileInvalidChecksum(version)
1135
907
vf_parents[key] = version_text
1277
1042
def make_mpdiffs(self, keys):
1278
1043
"""Create multiparent diffs for specified keys."""
1279
generator = _MPDiffGenerator(self, keys)
1280
return generator.compute_diffs()
1282
def get_annotator(self):
1283
return annotate.Annotator(self)
1044
keys_order = tuple(keys)
1045
keys = frozenset(keys)
1046
knit_keys = set(keys)
1047
parent_map = self.get_parent_map(keys)
1048
for parent_keys in parent_map.itervalues():
1050
knit_keys.update(parent_keys)
1051
missing_keys = keys - set(parent_map)
1053
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1054
# We need to filter out ghosts, because we can't diff against them.
1055
maybe_ghosts = knit_keys - keys
1056
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1057
knit_keys.difference_update(ghosts)
1059
chunks_to_lines = osutils.chunks_to_lines
1060
for record in self.get_record_stream(knit_keys, 'topological', True):
1061
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1062
# line_block_dict = {}
1063
# for parent, blocks in record.extract_line_blocks():
1064
# line_blocks[parent] = blocks
1065
# line_blocks[record.key] = line_block_dict
1067
for key in keys_order:
1069
parents = parent_map[key] or []
1070
# Note that filtering knit_keys can lead to a parent difference
1071
# between the creation and the application of the mpdiff.
1072
parent_lines = [lines[p] for p in parents if p in knit_keys]
1073
if len(parent_lines) > 0:
1074
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1077
left_parent_blocks = None
1078
diffs.append(multiparent.MultiParent.from_lines(target,
1079
parent_lines, left_parent_blocks))
1285
1082
missing_keys = index._missing_keys_from_parent_map
1287
1084
def _extract_blocks(self, version_id, source, target):
1290
def _transitive_fallbacks(self):
1291
"""Return the whole stack of fallback versionedfiles.
1293
This VersionedFiles may have a list of fallbacks, but it doesn't
1294
necessarily know about the whole stack going down, and it can't know
1295
at open time because they may change after the objects are opened.
1298
for a_vfs in self._immediate_fallback_vfs:
1299
all_fallbacks.append(a_vfs)
1300
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1301
return all_fallbacks
1304
1088
class ThunkedVersionedFiles(VersionedFiles):
1305
1089
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1318
1102
self._mapper = mapper
1319
1103
self._is_locked = is_locked
1321
def add_content(self, factory, parent_texts=None,
1322
left_matching_blocks=None, nostore_sha=None, random_id=False):
1323
"""See VersionedFiles.add_content()."""
1324
lines = factory.get_bytes_as('lines')
1325
return self.add_lines(
1326
factory.key, factory.parents, lines,
1327
parent_texts=parent_texts,
1328
left_matching_blocks=left_matching_blocks,
1329
nostore_sha=nostore_sha,
1330
random_id=random_id,
1333
1105
def add_lines(self, key, parents, lines, parent_texts=None,
1334
left_matching_blocks=None, nostore_sha=None, random_id=False,
1335
check_content=True):
1106
left_matching_blocks=None, nostore_sha=None, random_id=False,
1107
check_content=True):
1336
1108
"""See VersionedFiles.add_lines()."""
1337
1109
path = self._mapper.map(key)
1338
1110
version_id = key[-1]
1343
1115
return vf.add_lines_with_ghosts(version_id, parents, lines,
1344
parent_texts=parent_texts,
1345
left_matching_blocks=left_matching_blocks,
1346
nostore_sha=nostore_sha, random_id=random_id,
1347
check_content=check_content)
1116
parent_texts=parent_texts,
1117
left_matching_blocks=left_matching_blocks,
1118
nostore_sha=nostore_sha, random_id=random_id,
1119
check_content=check_content)
1348
1120
except NotImplementedError:
1349
1121
return vf.add_lines(version_id, parents, lines,
1350
parent_texts=parent_texts,
1351
left_matching_blocks=left_matching_blocks,
1352
nostore_sha=nostore_sha, random_id=random_id,
1353
check_content=check_content)
1122
parent_texts=parent_texts,
1123
left_matching_blocks=left_matching_blocks,
1124
nostore_sha=nostore_sha, random_id=random_id,
1125
check_content=check_content)
1354
1126
except errors.NoSuchFile:
1355
1127
# parent directory may be missing, try again.
1356
1128
self._transport.mkdir(osutils.dirname(path))
1358
1130
return vf.add_lines_with_ghosts(version_id, parents, lines,
1359
parent_texts=parent_texts,
1360
left_matching_blocks=left_matching_blocks,
1361
nostore_sha=nostore_sha, random_id=random_id,
1362
check_content=check_content)
1131
parent_texts=parent_texts,
1132
left_matching_blocks=left_matching_blocks,
1133
nostore_sha=nostore_sha, random_id=random_id,
1134
check_content=check_content)
1363
1135
except NotImplementedError:
1364
1136
return vf.add_lines(version_id, parents, lines,
1365
parent_texts=parent_texts,
1366
left_matching_blocks=left_matching_blocks,
1367
nostore_sha=nostore_sha, random_id=random_id,
1368
check_content=check_content)
1137
parent_texts=parent_texts,
1138
left_matching_blocks=left_matching_blocks,
1139
nostore_sha=nostore_sha, random_id=random_id,
1140
check_content=check_content)
1370
1142
def annotate(self, key):
1371
1143
"""Return a list of (version-key, line) tuples for the text of key.
1533
class VersionedFilesWithFallbacks(VersionedFiles):
1535
def without_fallbacks(self):
1536
"""Return a clone of this object without any fallbacks configured."""
1537
raise NotImplementedError(self.without_fallbacks)
1539
def add_fallback_versioned_files(self, a_versioned_files):
1540
"""Add a source of texts for texts not present in this knit.
1542
:param a_versioned_files: A VersionedFiles object.
1544
raise NotImplementedError(self.add_fallback_versioned_files)
1546
def get_known_graph_ancestry(self, keys):
1547
"""Get a KnownGraph instance with the ancestry of keys."""
1548
parent_map, missing_keys = self._index.find_ancestry(keys)
1549
for fallback in self._transitive_fallbacks():
1550
if not missing_keys:
1552
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1554
parent_map.update(f_parent_map)
1555
missing_keys = f_missing_keys
1556
kg = _mod_graph.KnownGraph(parent_map)
1560
1308
class _PlanMergeVersionedFile(VersionedFiles):
1561
1309
"""A VersionedFile for uncommitted and committed texts.
1583
1331
# line data for locally held keys.
1584
1332
self._lines = {}
1585
1333
# key lookup providers
1586
self._providers = [_mod_graph.DictParentsProvider(self._parents)]
1334
self._providers = [DictParentsProvider(self._parents)]
1588
1336
def plan_merge(self, ver_a, ver_b, base=None):
1589
1337
"""See VersionedFile.plan_merge"""
1590
from ..merge import _PlanMerge
1338
from bzrlib.merge import _PlanMerge
1591
1339
if base is None:
1592
1340
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1593
old_plan = list(_PlanMerge(ver_a, base, self,
1594
(self._file_id,)).plan_merge())
1595
new_plan = list(_PlanMerge(ver_a, ver_b, self,
1596
(self._file_id,)).plan_merge())
1341
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1342
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1597
1343
return _PlanMerge._subtract_plans(old_plan, new_plan)
1599
1345
def plan_lca_merge(self, ver_a, ver_b, base=None):
1600
from ..merge import _PlanLCAMerge
1601
graph = _mod_graph.Graph(self)
1602
new_plan = _PlanLCAMerge(
1603
ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1346
from bzrlib.merge import _PlanLCAMerge
1348
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1604
1349
if base is None:
1605
1350
return new_plan
1606
old_plan = _PlanLCAMerge(
1607
ver_a, base, self, (self._file_id,), graph).plan_merge()
1351
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1608
1352
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1610
def add_content(self, factory):
1611
return self.add_lines(
1612
factory.key, factory.parents, factory.get_bytes_as('lines'))
1614
1354
def add_lines(self, key, parents, lines):
1615
1355
"""See VersionedFiles.add_lines
1617
1357
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1618
1358
are permitted. Only reserved ids are permitted.
1620
if not isinstance(key, tuple):
1360
if type(key) is not tuple:
1621
1361
raise TypeError(key)
1622
1362
if not revision.is_reserved_id(key[-1]):
1623
1363
raise ValueError('Only reserved ids may be used')
1741
1478
ch_b = ch_a = True
1743
1480
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1745
1482
raise AssertionError(state)
1746
1483
for struct in outstanding_struct():
1749
def base_from_plan(self):
1750
"""Construct a BASE file from the plan text."""
1752
for state, line in self.plan:
1753
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1754
# If unchanged, then this line is straight from base. If a or b
1755
# or both killed the line, then it *used* to be in base.
1756
base_lines.append(line)
1758
if state not in ('killed-base', 'irrelevant',
1759
'ghost-a', 'ghost-b',
1761
'conflicted-a', 'conflicted-b'):
1762
# killed-base, irrelevant means it doesn't apply
1763
# ghost-a/ghost-b are harder to say for sure, but they
1764
# aren't in the 'inc_c' which means they aren't in the
1765
# shared base of a & b. So we don't include them. And
1766
# obviously if the line is newly inserted, it isn't in base
1768
# If 'conflicted-a' or b, then it is new vs one base, but
1769
# old versus another base. However, if we make it present
1770
# in the base, it will be deleted from the target, and it
1771
# seems better to get a line doubled in the merge result,
1772
# rather than have it deleted entirely.
1773
# Example, each node is the 'text' at that point:
1781
# There was a criss-cross conflict merge. Both sides
1782
# include the other, but put themselves first.
1783
# Weave marks this as a 'clean' merge, picking OTHER over
1784
# THIS. (Though the details depend on order inserted into
1786
# LCA generates a plan:
1787
# [('unchanged', M),
1788
# ('conflicted-b', b),
1790
# ('conflicted-a', b),
1792
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1793
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1794
# removes one 'b', and BASE vs OTHER removes the other)
1795
# If you include neither, 3-way creates a clean "MbabN" as
1796
# THIS adds one 'b', and OTHER does too.
1797
# It seems that having the line 2 times is better than
1798
# having it omitted. (Easier to manually delete than notice
1799
# it needs to be added.)
1800
raise AssertionError('Unknown state: %s' % (state,))
1804
1487
class WeaveMerge(PlanWeaveMerge):
1805
1488
"""Weave merge that takes a VersionedFile and two versions as its input."""
1807
1490
def __init__(self, versionedfile, ver_a, ver_b,
1808
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1491
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1809
1492
plan = versionedfile.plan_merge(ver_a, ver_b)
1810
1493
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1884
class NoDupeAddLinesDecorator(object):
1885
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1889
def __init__(self, store):
1892
def add_lines(self, key, parents, lines, parent_texts=None,
1893
left_matching_blocks=None, nostore_sha=None, random_id=False,
1894
check_content=True):
1895
"""See VersionedFiles.add_lines.
1897
This implementation may return None as the third element of the return
1898
value when the original store wouldn't.
1901
raise NotImplementedError(
1902
"NoDupeAddLinesDecorator.add_lines does not implement the "
1903
"nostore_sha behaviour.")
1905
sha1 = osutils.sha_strings(lines)
1906
key = (b"sha1:" + sha1,)
1909
if key in self._store.get_parent_map([key]):
1910
# This key has already been inserted, so don't do it again.
1912
sha1 = osutils.sha_strings(lines)
1913
return sha1, sum(map(len, lines)), None
1914
return self._store.add_lines(key, parents, lines,
1915
parent_texts=parent_texts,
1916
left_matching_blocks=left_matching_blocks,
1917
nostore_sha=nostore_sha, random_id=random_id,
1918
check_content=check_content)
1920
def __getattr__(self, name):
1921
return getattr(self._store, name)
1924
1567
def network_bytes_to_kind_and_offset(network_bytes):
1925
1568
"""Strip of a record kind from the front of network_bytes.
1927
1570
:param network_bytes: The bytes of a record.
1928
1571
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1930
line_end = network_bytes.find(b'\n')
1931
storage_kind = network_bytes[:line_end].decode('ascii')
1573
line_end = network_bytes.find('\n')
1574
storage_kind = network_bytes[:line_end]
1932
1575
return storage_kind, line_end + 1
1961
1604
for bytes in self._bytes_iterator:
1962
1605
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1963
1606
for record in self._kind_factory[storage_kind](
1964
storage_kind, bytes, line_end):
1607
storage_kind, bytes, line_end):
1968
1611
def fulltext_network_to_record(kind, bytes, line_end):
1969
1612
"""Convert a network fulltext record to record."""
1970
meta_len, = struct.unpack('!L', bytes[line_end:line_end + 4])
1971
record_meta = bytes[line_end + 4:line_end + 4 + meta_len]
1613
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1614
record_meta = bytes[line_end+4:line_end+4+meta_len]
1972
1615
key, parents = bencode.bdecode_as_tuple(record_meta)
1973
if parents == b'nil':
1616
if parents == 'nil':
1975
fulltext = bytes[line_end + 4 + meta_len:]
1618
fulltext = bytes[line_end+4+meta_len:]
1976
1619
return [FulltextContentFactory(key, parents, None, fulltext)]
2017
1660
for prefix in sorted(per_prefix_map):
2018
1661
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
2019
1662
return present_keys
2022
class _KeyRefs(object):
2024
def __init__(self, track_new_keys=False):
2025
# dict mapping 'key' to 'set of keys referring to that key'
2028
# set remembering all new keys
2029
self.new_keys = set()
2031
self.new_keys = None
2037
self.new_keys.clear()
2039
def add_references(self, key, refs):
2040
# Record the new references
2041
for referenced in refs:
2043
needed_by = self.refs[referenced]
2045
needed_by = self.refs[referenced] = set()
2047
# Discard references satisfied by the new key
2050
def get_new_keys(self):
2051
return self.new_keys
2053
def get_unsatisfied_refs(self):
2054
return self.refs.keys()
2056
def _satisfy_refs_for_key(self, key):
2060
# No keys depended on this key. That's ok.
2063
def add_key(self, key):
2064
# satisfy refs for key, and remember that we've seen this key.
2065
self._satisfy_refs_for_key(key)
2066
if self.new_keys is not None:
2067
self.new_keys.add(key)
2069
def satisfy_refs_for_keys(self, keys):
2071
self._satisfy_refs_for_key(key)
2073
def get_referrers(self):
2074
return set(itertools.chain.from_iterable(self.refs.values()))