67
65
# FIXME: the conflict markers should be *7* characters
69
67
from copy import copy
70
from cStringIO import StringIO
68
from io import BytesIO
73
from bzrlib.lazy_import import lazy_import
72
from ..lazy_import import lazy_import
74
73
lazy_import(globals(), """
75
from bzrlib import tsort
74
from breezy import tsort
81
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
82
RevisionAlreadyPresent,
84
UnavailableRepresentation,
86
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
87
import bzrlib.patiencediff
88
from bzrlib.revision import NULL_REVISION
89
from bzrlib.symbol_versioning import *
90
from bzrlib.trace import mutter
91
from bzrlib.versionedfile import (
80
from ..errors import (
81
RevisionAlreadyPresent,
84
from ..osutils import dirname, sha, sha_strings, split_lines
85
from ..revision import NULL_REVISION
86
from ..trace import mutter
87
from .versionedfile import (
92
88
AbsentContentFactory,
95
92
sort_groupcompress,
93
UnavailableRepresentation,
98
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
96
from .weavefile import _read_weave_v5, write_weave_v5
99
class WeaveError(errors.BzrError):
101
_fmt = "Error in processing weave: %(msg)s"
103
def __init__(self, msg=None):
104
errors.BzrError.__init__(self)
108
class WeaveRevisionAlreadyPresent(WeaveError):
110
_fmt = "Revision {%(revision_id)s} already present in %(weave)s"
112
def __init__(self, revision_id, weave):
114
WeaveError.__init__(self)
115
self.revision_id = revision_id
119
class WeaveRevisionNotPresent(WeaveError):
121
_fmt = "Revision {%(revision_id)s} not present in %(weave)s"
123
def __init__(self, revision_id, weave):
124
WeaveError.__init__(self)
125
self.revision_id = revision_id
129
class WeaveFormatError(WeaveError):
131
_fmt = "Weave invariant violated: %(what)s"
133
def __init__(self, what):
134
WeaveError.__init__(self)
138
class WeaveParentMismatch(WeaveError):
140
_fmt = "Parents are mismatched between two revisions. %(msg)s"
143
class WeaveInvalidChecksum(WeaveError):
145
_fmt = "Text did not match its checksum: %(msg)s"
148
class WeaveTextDiffers(WeaveError):
150
_fmt = ("Weaves differ on text content. Revision:"
151
" {%(revision_id)s}, %(weave_a)s, %(weave_b)s")
153
def __init__(self, revision_id, weave_a, weave_b):
154
WeaveError.__init__(self)
155
self.revision_id = revision_id
156
self.weave_a = weave_a
157
self.weave_b = weave_b
101
160
class WeaveContentFactory(ContentFactory):
117
176
def get_bytes_as(self, storage_kind):
118
177
if storage_kind == 'fulltext':
119
178
return self._weave.get_text(self.key[-1])
120
elif storage_kind == 'chunked':
179
elif storage_kind in ('chunked', 'lines'):
121
180
return self._weave.get_lines(self.key[-1])
123
182
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
184
def iter_bytes_as(self, storage_kind):
185
if storage_kind in ('chunked', 'lines'):
186
return iter(self._weave.get_lines(self.key[-1]))
188
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
126
191
class Weave(VersionedFile):
127
192
"""weave - versioned text file storage.
359
425
raise RevisionNotPresent([record.key[0]], self)
360
426
# adapt to non-tuple interface
361
427
parents = [parent[0] for parent in record.parents]
362
if (record.storage_kind == 'fulltext'
363
or record.storage_kind == 'chunked'):
364
self.add_lines(record.key[0], parents,
365
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
428
if record.storage_kind in ('fulltext', 'chunked', 'lines'):
430
record.key[0], parents,
431
record.get_bytes_as('lines'))
367
adapter_key = record.storage_kind, 'fulltext'
433
adapter_key = record.storage_kind, 'lines'
369
435
adapter = adapters[adapter_key]
371
437
adapter_factory = adapter_registry.get(adapter_key)
372
438
adapter = adapter_factory(self)
373
439
adapters[adapter_key] = adapter
374
lines = split_lines(adapter.get_bytes(record))
440
lines = adapter.get_bytes(record, 'lines')
376
442
self.add_lines(record.key[0], parents, lines)
377
443
except RevisionAlreadyPresent:
385
451
idx = self._lookup(name)
386
452
if sorted(self._parents[idx]) != sorted(parents) \
387
or sha1 != self._sha1s[idx]:
453
or sha1 != self._sha1s[idx]:
388
454
raise RevisionAlreadyPresent(name, self._weave_name)
391
457
def _add_lines(self, version_id, parents, lines, parent_texts,
392
left_matching_blocks, nostore_sha, random_id, check_content):
458
left_matching_blocks, nostore_sha, random_id,
393
460
"""See VersionedFile.add_lines."""
394
idx = self._add(version_id, lines, map(self._lookup, parents),
395
nostore_sha=nostore_sha)
461
idx = self._add(version_id, lines, list(map(self._lookup, parents)),
462
nostore_sha=nostore_sha)
396
463
return sha_strings(lines), sum(map(len, lines)), idx
398
465
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
419
486
sha1 = sha_strings(lines)
420
487
if sha1 == nostore_sha:
421
raise errors.ExistingContent
488
raise ExistingContent
422
489
if version_id is None:
423
version_id = "sha1:" + sha1
490
version_id = b"sha1:" + sha1
424
491
if version_id in self._name_map:
425
492
return self._check_repeated_add(version_id, parents, lines, sha1)
427
494
self._check_versions(parents)
428
## self._check_lines(lines)
429
495
new_version = len(self._parents)
431
# if we abort after here the (in-memory) weave will be corrupt because only
432
# some fields are updated
497
# if we abort after here the (in-memory) weave will be corrupt because
498
# only some fields are updated
433
499
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
434
500
# - Robert Collins 20060226
435
501
self._parents.append(parents[:])
437
503
self._names.append(version_id)
438
504
self._name_map[version_id] = new_version
442
507
# special case; adding with no parents revision; can do
443
508
# this more quickly by just appending unconditionally.
444
509
# even more specially, if we're adding an empty text we
445
510
# need do nothing at all.
447
self._weave.append(('{', new_version))
512
self._weave.append((b'{', new_version))
448
513
self._weave.extend(lines)
449
self._weave.append(('}', None))
514
self._weave.append((b'}', None))
450
515
return new_version
452
517
if len(parents) == 1:
479
543
# matches the end of the file? the current code says it's the
480
544
# last line of the weave?
482
#print 'basis_lines:', basis_lines
483
#print 'new_lines: ', lines
546
# print 'basis_lines:', basis_lines
547
# print 'new_lines: ', lines
485
549
s = self._matcher(None, basis_lines, lines)
487
551
# offset gives the number of lines that have been inserted
488
# into the weave up to the current point; if the original edit instruction
489
# says to change line A then we actually change (A+offset)
552
# into the weave up to the current point; if the original edit
553
# instruction says to change line A then we actually change (A+offset)
492
556
for tag, i1, i2, j1, j2 in s.get_opcodes():
493
# i1,i2 are given in offsets within basis_lines; we need to map them
494
# back to offsets within the entire weave
495
#print 'raw match', tag, i1, i2, j1, j2
557
# i1,i2 are given in offsets within basis_lines; we need to map
558
# them back to offsets within the entire weave print 'raw match',
559
# tag, i1, i2, j1, j2
496
560
if tag == 'equal':
498
562
i1 = basis_lineno[i1]
500
564
# the deletion and insertion are handled separately.
501
565
# first delete the region.
503
self._weave.insert(i1+offset, ('[', new_version))
504
self._weave.insert(i2+offset+1, (']', new_version))
567
self._weave.insert(i1 + offset, (b'[', new_version))
568
self._weave.insert(i2 + offset + 1, (b']', new_version))
509
573
# i2; we want to insert after this region to make sure
510
574
# we don't destroy ourselves
512
self._weave[i:i] = ([('{', new_version)]
576
self._weave[i:i] = ([(b'{', new_version)] +
515
579
offset += 2 + (j2 - j1)
516
580
return new_version
518
582
def _inclusions(self, versions):
519
583
"""Return set of all ancestors of given version(s)."""
520
584
if not len(versions):
522
586
i = set(versions)
523
for v in xrange(max(versions), 0, -1):
587
for v in range(max(versions), 0, -1):
525
589
# include all its parents
526
590
i.update(self._parents[v])
528
## except IndexError:
529
## raise ValueError("version %d not present in weave" % v)
531
593
def get_ancestry(self, version_ids, topo_sorted=True):
532
594
"""See VersionedFile.get_ancestry."""
533
if isinstance(version_ids, basestring):
595
if isinstance(version_ids, bytes):
534
596
version_ids = [version_ids]
535
597
i = self._inclusions([self._lookup(v) for v in version_ids])
536
return [self._idx_to_name(v) for v in i]
538
def _check_lines(self, text):
539
if not isinstance(text, list):
540
raise ValueError("text should be a list, not %s" % type(text))
543
if not isinstance(l, basestring):
544
raise ValueError("text line should be a string or unicode, not %s"
598
return set(self._idx_to_name(v) for v in i)
549
600
def _check_versions(self, indexes):
550
601
"""Check everything in the sequence of indexes is valid"""
576
627
if version_ids is None:
577
628
version_ids = self.versions()
578
629
version_ids = set(version_ids)
579
for lineno, inserted, deletes, line in self._walk_internal(version_ids):
580
if inserted not in version_ids: continue
582
yield line + '\n', inserted
630
for lineno, inserted, deletes, line in self._walk_internal(
632
if inserted not in version_ids:
634
if not line.endswith(b'\n'):
635
yield line + b'\n', inserted
584
637
yield line, inserted
625
677
Weave lines present in none of them are skipped entirely.
627
inc_a = set(self.get_ancestry([ver_a]))
628
inc_b = set(self.get_ancestry([ver_b]))
679
inc_a = self.get_ancestry([ver_a])
680
inc_b = self.get_ancestry([ver_b])
629
681
inc_c = inc_a & inc_b
631
for lineno, insert, deleteset, line in self._walk_internal([ver_a, ver_b]):
683
for lineno, insert, deleteset, line in self._walk_internal(
632
685
if deleteset & inc_c:
633
686
# killed in parent; can't be in either a or b
634
687
# not relevant to our work
688
WFE = WeaveFormatError
691
# 449 0 4474.6820 2356.5590 bzrlib.weave:556(_extract)
742
# 449 0 4474.6820 2356.5590 breezy.weave:556(_extract)
692
743
# +285282 0 1676.8040 1676.8040 +<isinstance>
693
744
# 1.6 seconds in 'isinstance'.
694
745
# changing the first isinstance:
695
# 449 0 2814.2660 1577.1760 bzrlib.weave:556(_extract)
746
# 449 0 2814.2660 1577.1760 breezy.weave:556(_extract)
696
747
# +140414 0 762.8050 762.8050 +<isinstance>
697
748
# note that the inline time actually dropped (less function calls)
698
749
# and total processing time was halved.
699
750
# we're still spending ~1/4 of the method in isinstance though.
700
751
# so lets hard code the acceptable string classes we expect:
701
# 449 0 1202.9420 786.2930 bzrlib.weave:556(_extract)
752
# 449 0 1202.9420 786.2930 breezy.weave:556(_extract)
702
753
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
704
755
# yay, down to ~1/4 the initial extract time, and our inline time
705
756
# has shrunk again, with isinstance no longer dominating.
706
757
# tweaking the stack inclusion test to use a set gives:
707
# 449 0 1122.8030 713.0080 bzrlib.weave:556(_extract)
758
# 449 0 1122.8030 713.0080 breezy.weave:556(_extract)
708
759
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
710
761
# - a 5% win, or possibly just noise. However with large istacks that
711
# 'in' test could dominate, so I'm leaving this change in place -
712
# when its fast enough to consider profiling big datasets we can review.
762
# 'in' test could dominate, so I'm leaving this change in place - when
763
# its fast enough to consider profiling big datasets we can review.
717
765
for l in self._weave:
718
766
if l.__class__ == tuple:
725
773
iset.remove(istack.pop())
727
775
if v in included:
730
778
if v in included:
733
781
raise AssertionError()
735
783
if isactive is None:
736
isactive = (not dset) and istack and (istack[-1] in included)
784
isactive = (not dset) and istack and (
785
istack[-1] in included)
738
787
result.append((istack[-1], lineno, l))
741
790
raise WeaveFormatError("unclosed insertion blocks "
742
"at end of weave: %s" % istack)
791
"at end of weave: %s" % istack)
744
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
793
raise WeaveFormatError(
794
"unclosed deletion blocks at end of weave: %s" % dset)
748
797
def _maybe_lookup(self, name_or_index):
758
809
def get_lines(self, version_id):
759
810
"""See VersionedFile.get_lines()."""
760
811
int_index = self._maybe_lookup(version_id)
761
result = [line for (origin, lineno, line) in self._extract([int_index])]
812
result = [line for (origin, lineno, line)
813
in self._extract([int_index])]
762
814
expected_sha1 = self._sha1s[int_index]
763
815
measured_sha1 = sha_strings(result)
764
816
if measured_sha1 != expected_sha1:
765
raise errors.WeaveInvalidChecksum(
766
'file %s, revision %s, expected: %s, measured %s'
767
% (self._weave_name, version_id,
768
expected_sha1, measured_sha1))
817
raise WeaveInvalidChecksum(
818
'file %s, revision %s, expected: %s, measured %s'
819
% (self._weave_name, version_id,
820
expected_sha1, measured_sha1))
771
823
def get_sha1s(self, version_ids):
805
857
name = self._idx_to_name(i)
806
858
sha1s[name] = sha()
808
new_inc = set([name])
809
861
for p in self._parents[i]:
810
862
new_inc.update(inclusions[self._idx_to_name(p)])
812
if set(new_inc) != set(self.get_ancestry(name)):
864
if new_inc != self.get_ancestry(name):
813
865
raise AssertionError(
814
866
'failed %s != %s'
815
% (set(new_inc), set(self.get_ancestry(name))))
867
% (new_inc, self.get_ancestry(name)))
816
868
inclusions[name] = new_inc
818
870
nlines = len(self._weave)
831
883
# The active inclusion must be an ancestor,
832
884
# and no ancestors must have deleted this line,
833
885
# because we don't support resurrection.
834
if (insert in name_inclusions) and not (deleteset & name_inclusions):
886
if ((insert in name_inclusions) and
887
not (deleteset & name_inclusions)):
835
888
sha1s[name].update(line)
837
890
for i in range(nv):
838
891
version = self._idx_to_name(i)
839
hd = sha1s[version].hexdigest()
892
hd = sha1s[version].hexdigest().encode()
840
893
expected = self._sha1s[i]
841
894
if hd != expected:
842
raise errors.WeaveInvalidChecksum(
843
"mismatched sha1 for version %s: "
844
"got %s, expected %s"
845
% (version, hd, expected))
895
raise WeaveInvalidChecksum(
896
"mismatched sha1 for version %s: "
897
"got %s, expected %s"
898
% (version, hd, expected))
847
900
# TODO: check insertions are properly nested, that there are
848
901
# no lines outside of insertion blocks, that deletions are
875
928
this_idx = self._name_map.get(name, -1)
876
929
if this_idx != -1:
877
930
if self._sha1s[this_idx] != other._sha1s[other_idx]:
878
raise errors.WeaveTextDiffers(name, self, other)
931
raise WeaveTextDiffers(name, self, other)
879
932
self_parents = self._parents[this_idx]
880
933
other_parents = other._parents[other_idx]
881
n1 = set([self._names[i] for i in self_parents])
882
n2 = set([other._names[i] for i in other_parents])
934
n1 = {self._names[i] for i in self_parents}
935
n2 = {other._names[i] for i in other_parents}
883
936
if not self._compatible_parents(n1, n2):
884
raise WeaveParentMismatch("inconsistent parents "
937
raise WeaveParentMismatch(
938
"inconsistent parents "
885
939
"for version {%s}: %s vs %s" % (name, n1, n2))
887
941
return True # ok!
911
965
WEAVE_SUFFIX = '.weave'
913
def __init__(self, name, transport, filemode=None, create=False, access_mode='w', get_scope=None):
967
def __init__(self, name, transport, filemode=None, create=False,
968
access_mode='w', get_scope=None):
914
969
"""Create a WeaveFile.
916
971
:param create: If not True, only open an existing knit.
918
973
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
919
allow_reserved=False)
974
allow_reserved=False)
920
975
self._transport = transport
921
976
self._filemode = filemode
923
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
978
f = self._transport.get(name + WeaveFile.WEAVE_SUFFIX)
979
_read_weave_v5(BytesIO(f.read()), self)
924
980
except errors.NoSuchFile:
930
986
def _add_lines(self, version_id, parents, lines, parent_texts,
931
left_matching_blocks, nostore_sha, random_id, check_content):
987
left_matching_blocks, nostore_sha, random_id,
932
989
"""Add a version and save the weave."""
933
990
self.check_not_reserved_id(version_id)
934
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
935
parent_texts, left_matching_blocks, nostore_sha, random_id,
991
result = super(WeaveFile, self)._add_lines(
992
version_id, parents, lines, parent_texts, left_matching_blocks,
993
nostore_sha, random_id, check_content)
940
997
def copy_to(self, name, transport):
941
998
"""See VersionedFile.copy_to()."""
942
999
# as we are all in memory always, just serialise to the new place.
944
1001
write_weave_v5(self, sio)
946
1003
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
985
1042
:param msg: An optional message for the progress
989
queue_a = range(wa.num_versions())
990
queue_b = range(wb.num_versions())
991
1045
# first determine combined parents of all versions
992
1046
# map from version name -> all parent names
993
1047
combined_parents = _reweave_parent_graphs(wa, wb)
994
1048
mutter("combined parents: %r", combined_parents)
995
order = tsort.topo_sort(combined_parents.iteritems())
1049
order = tsort.topo_sort(combined_parents.items())
996
1050
mutter("order to reweave: %r", order)
998
1052
if pb and not msg:
1010
1064
mutter('weaves: %s, %s', wa._weave_name, wb._weave_name)
1012
1066
lines = list(difflib.unified_diff(lines, lines_b,
1013
wa._weave_name, wb._weave_name))
1067
wa._weave_name, wb._weave_name))
1014
1068
mutter('lines:\n%s', ''.join(lines))
1015
raise errors.WeaveTextDiffers(name, wa, wb)
1069
raise WeaveTextDiffers(name, wa, wb)
1017
1071
lines = wb.get_lines(name)
1018
1072
wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]])