58
65
'bzrlib.knit', 'FTAnnotatedToUnannotated')
59
66
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
60
67
'bzrlib.knit', 'FTAnnotatedToFullText')
68
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
69
# 'bzrlib.knit', 'FTAnnotatedToChunked')
63
72
class ContentFactory(object):
64
73
"""Abstract interface for insertion and retrieval from a VersionedFile.
66
75
:ivar sha1: None, or the sha1 of the content fulltext.
67
76
:ivar storage_kind: The native storage kind of this factory. One of
68
77
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
83
92
self.parents = None
95
class ChunkedContentFactory(ContentFactory):
96
"""Static data content factory.
98
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
99
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
100
satisfies this, as does a list of lines.
102
:ivar sha1: None, or the sha1 of the content fulltext.
103
:ivar storage_kind: The native storage kind of this factory. Always
105
:ivar key: The key of this content. Each key is a tuple with a single
107
:ivar parents: A tuple of parent keys for self.key. If the object has
108
no parent information, None (as opposed to () for an empty list of
112
def __init__(self, key, parents, sha1, chunks):
113
"""Create a ContentFactory."""
115
self.storage_kind = 'chunked'
117
self.parents = parents
118
self._chunks = chunks
120
def get_bytes_as(self, storage_kind):
121
if storage_kind == 'chunked':
123
elif storage_kind == 'fulltext':
124
return ''.join(self._chunks)
125
raise errors.UnavailableRepresentation(self.key, storage_kind,
86
129
class FulltextContentFactory(ContentFactory):
87
130
"""Static data content factory.
89
132
This takes a fulltext when created and just returns that during
90
133
get_bytes_as('fulltext').
92
135
:ivar sha1: None, or the sha1 of the content fulltext.
93
136
:ivar storage_kind: The native storage kind of this factory. Always
418
469
if isinstance(version_ids, basestring):
419
470
version_ids = [version_ids]
420
471
raise NotImplementedError(self.get_ancestry)
422
473
def get_ancestry_with_ghosts(self, version_ids):
423
474
"""Return a list of all ancestors of given version(s). This
424
475
will not include the null revision.
426
477
Must raise RevisionNotPresent if any of the given versions are
427
478
not present in file history.
429
480
Ghosts that are known about will be included in ancestry list,
430
481
but are not explicitly marked.
432
483
raise NotImplementedError(self.get_ancestry_with_ghosts)
434
485
def get_parent_map(self, version_ids):
435
486
"""Get a map of the parents of version_ids.
561
612
return self._backing_vf.keys()
615
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
616
"""A VF that records calls, and returns keys in specific order.
618
:ivar calls: A list of the calls made; can be reset at any time by
622
def __init__(self, backing_vf, key_priority):
623
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
625
:param backing_vf: The versioned file to answer all methods.
626
:param key_priority: A dictionary defining what order keys should be
627
returned from an 'unordered' get_record_stream request.
628
Keys with lower priority are returned first, keys not present in
629
the map get an implicit priority of 0, and are returned in
630
lexicographical order.
632
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
633
self._key_priority = key_priority
635
def get_record_stream(self, keys, sort_order, include_delta_closure):
636
self.calls.append(("get_record_stream", list(keys), sort_order,
637
include_delta_closure))
638
if sort_order == 'unordered':
640
return (self._key_priority.get(key, 0), key)
641
# Use a defined order by asking for the keys one-by-one from the
643
for key in sorted(keys, key=sort_key):
644
for record in self._backing_vf.get_record_stream([key],
645
'unordered', include_delta_closure):
648
for record in self._backing_vf.get_record_stream(keys, sort_order,
649
include_delta_closure):
564
653
class KeyMapper(object):
565
654
"""KeyMappers map between keys and underlying partitioned storage."""
748
838
raise NotImplementedError(self.add_lines)
840
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
841
"""Add a text to the store.
843
This is a private function for use by CommitBuilder.
845
:param key: The key tuple of the text to add. If the last element is
846
None, a CHK string will be generated during the addition.
847
:param parents: The parents key tuples of the text to add.
848
:param text: A string containing the text to be committed.
849
:param nostore_sha: Raise ExistingContent and do not add the lines to
850
the versioned file if the digest of the lines matches this.
851
:param random_id: If True a random id has been selected rather than
852
an id determined by some deterministic process such as a converter
853
from a foreign VCS. When True the backend may choose not to check
854
for uniqueness of the resulting key within the versioned file, so
855
this should only be done when the result is expected to be unique
857
:param check_content: If True, the lines supplied are verified to be
858
bytestrings that are correctly formed lines.
859
:return: The text sha1, the number of bytes in the text, and an opaque
860
representation of the inserted version which can be provided
861
back to future _add_text calls in the parent_texts dictionary.
863
# The default implementation just thunks over to .add_lines(),
864
# inefficient, but it works.
865
return self.add_lines(key, parents, osutils.split_lines(text),
866
nostore_sha=nostore_sha,
750
870
def add_mpdiffs(self, records):
751
871
"""Add mpdiffs to this VersionedFile.
765
885
if not mpvf.has_version(p))
766
886
# It seems likely that adding all the present parents as fulltexts can
767
887
# easily exhaust memory.
768
split_lines = osutils.split_lines
888
chunks_to_lines = osutils.chunks_to_lines
769
889
for record in self.get_record_stream(needed_parents, 'unordered',
771
891
if record.storage_kind == 'absent':
773
mpvf.add_version(split_lines(record.get_bytes_as('fulltext')),
893
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
775
895
for (key, parent_keys, expected_sha1, mpdiff), lines in\
776
896
zip(records, mpvf.get_line_list(versions)):
794
914
raise NotImplementedError(self.annotate)
796
916
def check(self, progress_bar=None):
797
"""Check this object for integrity."""
917
"""Check this object for integrity.
919
:param progress_bar: A progress bar to output as the check progresses.
920
:param keys: Specific keys within the VersionedFiles to check. When
921
this parameter is not None, check() becomes a generator as per
922
get_record_stream. The difference to get_record_stream is that
923
more or deeper checks will be performed.
924
:return: None, or if keys was supplied a generator as per
798
927
raise NotImplementedError(self.check)
801
930
def check_not_reserved_id(version_id):
802
931
revision.check_not_reserved_id(version_id)
933
def clear_cache(self):
934
"""Clear whatever caches this VersionedFile holds.
936
This is generally called after an operation has been performed, when we
937
don't expect to be using this versioned file again soon.
804
940
def _check_lines_not_unicode(self, lines):
805
941
"""Check that lines being added to a versioned file are not unicode."""
806
942
for line in lines:
847
997
raise NotImplementedError(self.get_sha1s)
999
has_key = index._has_key_from_parent_map
1001
def get_missing_compression_parent_keys(self):
1002
"""Return an iterable of keys of missing compression parents.
1004
Check this after calling insert_record_stream to find out if there are
1005
any missing compression parents. If there are, the records that
1006
depend on them are not able to be inserted safely. The precise
1007
behaviour depends on the concrete VersionedFiles class in use.
1009
Classes that do not support this will raise NotImplementedError.
1011
raise NotImplementedError(self.get_missing_compression_parent_keys)
849
1013
def insert_record_stream(self, stream):
850
1014
"""Insert a record stream into this container.
852
:param stream: A stream of records to insert.
1016
:param stream: A stream of records to insert.
854
1018
:seealso VersionedFile.get_record_stream:
994
1160
result.append((prefix + (origin,), line))
997
def check(self, progress_bar=None):
1163
def get_annotator(self):
1164
return annotate.Annotator(self)
1166
def check(self, progress_bar=None, keys=None):
998
1167
"""See VersionedFiles.check()."""
1168
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1169
# this is tolerable. Ideally we'd pass keys down to check() and
1170
# have the older VersiondFile interface updated too.
999
1171
for prefix, vf in self._iter_all_components():
1173
if keys is not None:
1174
return self.get_record_stream(keys, 'unordered', True)
1002
1176
def get_parent_map(self, keys):
1003
1177
"""Get a map of the parents of keys.
1306
1479
elif state == 'conflicted-b':
1307
1480
ch_b = ch_a = True
1308
1481
lines_b.append(line)
1482
elif state == 'killed-both':
1483
# This counts as a change, even though there is no associated
1310
1487
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1311
'killed-base', 'killed-both'):
1312
1489
raise AssertionError(state)
1313
1490
for struct in outstanding_struct():
1493
def base_from_plan(self):
1494
"""Construct a BASE file from the plan text."""
1496
for state, line in self.plan:
1497
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1498
# If unchanged, then this line is straight from base. If a or b
1499
# or both killed the line, then it *used* to be in base.
1500
base_lines.append(line)
1502
if state not in ('killed-base', 'irrelevant',
1503
'ghost-a', 'ghost-b',
1505
'conflicted-a', 'conflicted-b'):
1506
# killed-base, irrelevant means it doesn't apply
1507
# ghost-a/ghost-b are harder to say for sure, but they
1508
# aren't in the 'inc_c' which means they aren't in the
1509
# shared base of a & b. So we don't include them. And
1510
# obviously if the line is newly inserted, it isn't in base
1512
# If 'conflicted-a' or b, then it is new vs one base, but
1513
# old versus another base. However, if we make it present
1514
# in the base, it will be deleted from the target, and it
1515
# seems better to get a line doubled in the merge result,
1516
# rather than have it deleted entirely.
1517
# Example, each node is the 'text' at that point:
1525
# There was a criss-cross conflict merge. Both sides
1526
# include the other, but put themselves first.
1527
# Weave marks this as a 'clean' merge, picking OTHER over
1528
# THIS. (Though the details depend on order inserted into
1530
# LCA generates a plan:
1531
# [('unchanged', M),
1532
# ('conflicted-b', b),
1534
# ('conflicted-a', b),
1536
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1537
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1538
# removes one 'b', and BASE vs OTHER removes the other)
1539
# If you include neither, 3-way creates a clean "MbabN" as
1540
# THIS adds one 'b', and OTHER does too.
1541
# It seems that having the line 2 times is better than
1542
# having it omitted. (Easier to manually delete than notice
1543
# it needs to be added.)
1544
raise AssertionError('Unknown state: %s' % (state,))
1317
1548
class WeaveMerge(PlanWeaveMerge):
1318
1549
"""Weave merge that takes a VersionedFile and two versions as its input."""
1320
def __init__(self, versionedfile, ver_a, ver_b,
1551
def __init__(self, versionedfile, ver_a, ver_b,
1321
1552
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1322
1553
plan = versionedfile.plan_merge(ver_a, ver_b)
1323
1554
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1326
1557
class VirtualVersionedFiles(VersionedFiles):
1327
"""Dummy implementation for VersionedFiles that uses other functions for
1558
"""Dummy implementation for VersionedFiles that uses other functions for
1328
1559
obtaining fulltexts and parent maps.
1330
This is always on the bottom of the stack and uses string keys
1561
This is always on the bottom of the stack and uses string keys
1331
1562
(rather than tuples) internally.
1379
1610
if lines is not None:
1380
1611
if not isinstance(lines, list):
1381
1612
raise AssertionError
1382
yield FulltextContentFactory((k,), None,
1613
yield ChunkedContentFactory((k,), None,
1383
1614
sha1=osutils.sha_strings(lines),
1384
text=''.join(lines))
1386
1617
yield AbsentContentFactory((k,))
1619
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1620
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1621
for i, (key,) in enumerate(keys):
1623
pb.update("Finding changed lines", i, len(keys))
1624
for l in self._get_lines(key):
1628
def network_bytes_to_kind_and_offset(network_bytes):
1629
"""Strip of a record kind from the front of network_bytes.
1631
:param network_bytes: The bytes of a record.
1632
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1634
line_end = network_bytes.find('\n')
1635
storage_kind = network_bytes[:line_end]
1636
return storage_kind, line_end + 1
1639
class NetworkRecordStream(object):
1640
"""A record_stream which reconstitures a serialised stream."""
1642
def __init__(self, bytes_iterator):
1643
"""Create a NetworkRecordStream.
1645
:param bytes_iterator: An iterator of bytes. Each item in this
1646
iterator should have been obtained from a record_streams'
1647
record.get_bytes_as(record.storage_kind) call.
1649
self._bytes_iterator = bytes_iterator
1650
self._kind_factory = {
1651
'fulltext': fulltext_network_to_record,
1652
'groupcompress-block': groupcompress.network_block_to_records,
1653
'knit-ft-gz': knit.knit_network_to_record,
1654
'knit-delta-gz': knit.knit_network_to_record,
1655
'knit-annotated-ft-gz': knit.knit_network_to_record,
1656
'knit-annotated-delta-gz': knit.knit_network_to_record,
1657
'knit-delta-closure': knit.knit_delta_closure_to_records,
1663
:return: An iterator as per VersionedFiles.get_record_stream().
1665
for bytes in self._bytes_iterator:
1666
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1667
for record in self._kind_factory[storage_kind](
1668
storage_kind, bytes, line_end):
1672
def fulltext_network_to_record(kind, bytes, line_end):
1673
"""Convert a network fulltext record to record."""
1674
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1675
record_meta = bytes[line_end+4:line_end+4+meta_len]
1676
key, parents = bencode.bdecode_as_tuple(record_meta)
1677
if parents == 'nil':
1679
fulltext = bytes[line_end+4+meta_len:]
1680
return [FulltextContentFactory(key, parents, None, fulltext)]
1683
def _length_prefix(bytes):
1684
return struct.pack('!L', len(bytes))
1687
def record_to_fulltext_bytes(record):
1688
if record.parents is None:
1691
parents = record.parents
1692
record_meta = bencode.bencode((record.key, parents))
1693
record_content = record.get_bytes_as('fulltext')
1694
return "fulltext\n%s%s%s" % (
1695
_length_prefix(record_meta), record_meta, record_content)
1698
def sort_groupcompress(parent_map):
1699
"""Sort and group the keys in parent_map into groupcompress order.
1701
groupcompress is defined (currently) as reverse-topological order, grouped
1704
:return: A sorted-list of keys
1706
# gc-optimal ordering is approximately reverse topological,
1707
# properly grouped by file-id.
1709
for item in parent_map.iteritems():
1711
if isinstance(key, str) or len(key) == 1:
1716
per_prefix_map[prefix].append(item)
1718
per_prefix_map[prefix] = [item]
1721
for prefix in sorted(per_prefix_map):
1722
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))