65
67
# FIXME: the conflict markers should be *7* characters
67
69
from copy import copy
68
from io import BytesIO
70
from cStringIO import StringIO
72
from ..lazy_import import lazy_import
75
from bzrlib.lazy_import import lazy_import
73
76
lazy_import(globals(), """
74
from breezy import tsort
77
from bzrlib import tsort
80
from ..errors import (
81
RevisionAlreadyPresent,
84
from ..osutils import dirname, sha, sha_strings, split_lines
85
from ..revision import NULL_REVISION
86
from ..trace import mutter
87
from .versionedfile import (
84
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
85
RevisionAlreadyPresent,
87
UnavailableRepresentation,
88
WeaveRevisionAlreadyPresent,
89
WeaveRevisionNotPresent,
91
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
92
import bzrlib.patiencediff
93
from bzrlib.revision import NULL_REVISION
94
from bzrlib.symbol_versioning import *
95
from bzrlib.trace import mutter
96
from bzrlib.versionedfile import (
88
97
AbsentContentFactory,
92
100
sort_groupcompress,
93
UnavailableRepresentation,
96
from .weavefile import _read_weave_v5, write_weave_v5
99
class WeaveError(errors.BzrError):
101
_fmt = "Error in processing weave: %(msg)s"
103
def __init__(self, msg=None):
104
errors.BzrError.__init__(self)
108
class WeaveRevisionAlreadyPresent(WeaveError):
110
_fmt = "Revision {%(revision_id)s} already present in %(weave)s"
112
def __init__(self, revision_id, weave):
114
WeaveError.__init__(self)
115
self.revision_id = revision_id
119
class WeaveRevisionNotPresent(WeaveError):
121
_fmt = "Revision {%(revision_id)s} not present in %(weave)s"
123
def __init__(self, revision_id, weave):
124
WeaveError.__init__(self)
125
self.revision_id = revision_id
129
class WeaveFormatError(WeaveError):
131
_fmt = "Weave invariant violated: %(what)s"
133
def __init__(self, what):
134
WeaveError.__init__(self)
138
class WeaveParentMismatch(WeaveError):
140
_fmt = "Parents are mismatched between two revisions. %(msg)s"
143
class WeaveInvalidChecksum(WeaveError):
145
_fmt = "Text did not match its checksum: %(msg)s"
148
class WeaveTextDiffers(WeaveError):
150
_fmt = ("Weaves differ on text content. Revision:"
151
" {%(revision_id)s}, %(weave_a)s, %(weave_b)s")
153
def __init__(self, revision_id, weave_a, weave_b):
154
WeaveError.__init__(self)
155
self.revision_id = revision_id
156
self.weave_a = weave_a
157
self.weave_b = weave_b
103
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
160
106
class WeaveContentFactory(ContentFactory):
176
122
def get_bytes_as(self, storage_kind):
177
123
if storage_kind == 'fulltext':
178
124
return self._weave.get_text(self.key[-1])
179
elif storage_kind in ('chunked', 'lines'):
125
elif storage_kind == 'chunked':
180
126
return self._weave.get_lines(self.key[-1])
182
128
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
184
def iter_bytes_as(self, storage_kind):
185
if storage_kind in ('chunked', 'lines'):
186
return iter(self._weave.get_lines(self.key[-1]))
188
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
191
131
class Weave(VersionedFile):
192
132
"""weave - versioned text file storage.
425
364
raise RevisionNotPresent([record.key[0]], self)
426
365
# adapt to non-tuple interface
427
366
parents = [parent[0] for parent in record.parents]
428
if record.storage_kind in ('fulltext', 'chunked', 'lines'):
430
record.key[0], parents,
431
record.get_bytes_as('lines'))
367
if (record.storage_kind == 'fulltext'
368
or record.storage_kind == 'chunked'):
369
self.add_lines(record.key[0], parents,
370
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
433
adapter_key = record.storage_kind, 'lines'
372
adapter_key = record.storage_kind, 'fulltext'
435
374
adapter = adapters[adapter_key]
437
376
adapter_factory = adapter_registry.get(adapter_key)
438
377
adapter = adapter_factory(self)
439
378
adapters[adapter_key] = adapter
440
lines = adapter.get_bytes(record, 'lines')
379
lines = split_lines(adapter.get_bytes(record))
442
381
self.add_lines(record.key[0], parents, lines)
443
382
except RevisionAlreadyPresent:
451
390
idx = self._lookup(name)
452
391
if sorted(self._parents[idx]) != sorted(parents) \
453
or sha1 != self._sha1s[idx]:
392
or sha1 != self._sha1s[idx]:
454
393
raise RevisionAlreadyPresent(name, self._weave_name)
457
396
def _add_lines(self, version_id, parents, lines, parent_texts,
458
left_matching_blocks, nostore_sha, random_id,
397
left_matching_blocks, nostore_sha, random_id, check_content):
460
398
"""See VersionedFile.add_lines."""
461
idx = self._add(version_id, lines, list(map(self._lookup, parents)),
462
nostore_sha=nostore_sha)
399
idx = self._add(version_id, lines, map(self._lookup, parents),
400
nostore_sha=nostore_sha)
463
401
return sha_strings(lines), sum(map(len, lines)), idx
465
403
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
486
424
sha1 = sha_strings(lines)
487
425
if sha1 == nostore_sha:
488
raise ExistingContent
426
raise errors.ExistingContent
489
427
if version_id is None:
490
version_id = b"sha1:" + sha1
428
version_id = "sha1:" + sha1
491
429
if version_id in self._name_map:
492
430
return self._check_repeated_add(version_id, parents, lines, sha1)
494
432
self._check_versions(parents)
433
## self._check_lines(lines)
495
434
new_version = len(self._parents)
497
# if we abort after here the (in-memory) weave will be corrupt because
498
# only some fields are updated
436
# if we abort after here the (in-memory) weave will be corrupt because only
437
# some fields are updated
499
438
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
500
439
# - Robert Collins 20060226
501
440
self._parents.append(parents[:])
503
442
self._names.append(version_id)
504
443
self._name_map[version_id] = new_version
507
447
# special case; adding with no parents revision; can do
508
448
# this more quickly by just appending unconditionally.
509
449
# even more specially, if we're adding an empty text we
510
450
# need do nothing at all.
512
self._weave.append((b'{', new_version))
452
self._weave.append(('{', new_version))
513
453
self._weave.extend(lines)
514
self._weave.append((b'}', None))
454
self._weave.append(('}', None))
515
455
return new_version
517
457
if len(parents) == 1:
543
484
# matches the end of the file? the current code says it's the
544
485
# last line of the weave?
546
# print 'basis_lines:', basis_lines
547
# print 'new_lines: ', lines
487
#print 'basis_lines:', basis_lines
488
#print 'new_lines: ', lines
549
490
s = self._matcher(None, basis_lines, lines)
551
492
# offset gives the number of lines that have been inserted
552
# into the weave up to the current point; if the original edit
553
# instruction says to change line A then we actually change (A+offset)
493
# into the weave up to the current point; if the original edit instruction
494
# says to change line A then we actually change (A+offset)
556
497
for tag, i1, i2, j1, j2 in s.get_opcodes():
557
# i1,i2 are given in offsets within basis_lines; we need to map
558
# them back to offsets within the entire weave print 'raw match',
559
# tag, i1, i2, j1, j2
498
# i1,i2 are given in offsets within basis_lines; we need to map them
499
# back to offsets within the entire weave
500
#print 'raw match', tag, i1, i2, j1, j2
560
501
if tag == 'equal':
562
503
i1 = basis_lineno[i1]
564
505
# the deletion and insertion are handled separately.
565
506
# first delete the region.
567
self._weave.insert(i1 + offset, (b'[', new_version))
568
self._weave.insert(i2 + offset + 1, (b']', new_version))
508
self._weave.insert(i1+offset, ('[', new_version))
509
self._weave.insert(i2+offset+1, (']', new_version))
573
514
# i2; we want to insert after this region to make sure
574
515
# we don't destroy ourselves
576
self._weave[i:i] = ([(b'{', new_version)] +
517
self._weave[i:i] = ([('{', new_version)]
579
520
offset += 2 + (j2 - j1)
580
521
return new_version
582
523
def _inclusions(self, versions):
583
524
"""Return set of all ancestors of given version(s)."""
584
525
if not len(versions):
586
527
i = set(versions)
587
for v in range(max(versions), 0, -1):
528
for v in xrange(max(versions), 0, -1):
589
530
# include all its parents
590
531
i.update(self._parents[v])
533
## except IndexError:
534
## raise ValueError("version %d not present in weave" % v)
593
536
def get_ancestry(self, version_ids, topo_sorted=True):
594
537
"""See VersionedFile.get_ancestry."""
595
if isinstance(version_ids, bytes):
538
if isinstance(version_ids, basestring):
596
539
version_ids = [version_ids]
597
540
i = self._inclusions([self._lookup(v) for v in version_ids])
598
return set(self._idx_to_name(v) for v in i)
541
return [self._idx_to_name(v) for v in i]
543
def _check_lines(self, text):
544
if not isinstance(text, list):
545
raise ValueError("text should be a list, not %s" % type(text))
548
if not isinstance(l, basestring):
549
raise ValueError("text line should be a string or unicode, not %s"
600
554
def _check_versions(self, indexes):
601
555
"""Check everything in the sequence of indexes is valid"""
627
581
if version_ids is None:
628
582
version_ids = self.versions()
629
583
version_ids = set(version_ids)
630
for lineno, inserted, deletes, line in self._walk_internal(
632
if inserted not in version_ids:
634
if not line.endswith(b'\n'):
635
yield line + b'\n', inserted
584
for lineno, inserted, deletes, line in self._walk_internal(version_ids):
585
if inserted not in version_ids: continue
587
yield line + '\n', inserted
637
589
yield line, inserted
677
630
Weave lines present in none of them are skipped entirely.
679
inc_a = self.get_ancestry([ver_a])
680
inc_b = self.get_ancestry([ver_b])
632
inc_a = set(self.get_ancestry([ver_a]))
633
inc_b = set(self.get_ancestry([ver_b]))
681
634
inc_c = inc_a & inc_b
683
for lineno, insert, deleteset, line in self._walk_internal(
636
for lineno, insert, deleteset, line in self._walk_internal([ver_a, ver_b]):
685
637
if deleteset & inc_c:
686
638
# killed in parent; can't be in either a or b
687
639
# not relevant to our work
693
WFE = WeaveFormatError
742
# 449 0 4474.6820 2356.5590 breezy.weave:556(_extract)
696
# 449 0 4474.6820 2356.5590 bzrlib.weave:556(_extract)
743
697
# +285282 0 1676.8040 1676.8040 +<isinstance>
744
698
# 1.6 seconds in 'isinstance'.
745
699
# changing the first isinstance:
746
# 449 0 2814.2660 1577.1760 breezy.weave:556(_extract)
700
# 449 0 2814.2660 1577.1760 bzrlib.weave:556(_extract)
747
701
# +140414 0 762.8050 762.8050 +<isinstance>
748
702
# note that the inline time actually dropped (less function calls)
749
703
# and total processing time was halved.
750
704
# we're still spending ~1/4 of the method in isinstance though.
751
705
# so lets hard code the acceptable string classes we expect:
752
# 449 0 1202.9420 786.2930 breezy.weave:556(_extract)
706
# 449 0 1202.9420 786.2930 bzrlib.weave:556(_extract)
753
707
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
755
709
# yay, down to ~1/4 the initial extract time, and our inline time
756
710
# has shrunk again, with isinstance no longer dominating.
757
711
# tweaking the stack inclusion test to use a set gives:
758
# 449 0 1122.8030 713.0080 breezy.weave:556(_extract)
712
# 449 0 1122.8030 713.0080 bzrlib.weave:556(_extract)
759
713
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
761
715
# - a 5% win, or possibly just noise. However with large istacks that
762
# 'in' test could dominate, so I'm leaving this change in place - when
763
# its fast enough to consider profiling big datasets we can review.
716
# 'in' test could dominate, so I'm leaving this change in place -
717
# when its fast enough to consider profiling big datasets we can review.
765
722
for l in self._weave:
766
723
if l.__class__ == tuple:
773
730
iset.remove(istack.pop())
775
732
if v in included:
778
735
if v in included:
781
738
raise AssertionError()
783
740
if isactive is None:
784
isactive = (not dset) and istack and (
785
istack[-1] in included)
741
isactive = (not dset) and istack and (istack[-1] in included)
787
743
result.append((istack[-1], lineno, l))
790
746
raise WeaveFormatError("unclosed insertion blocks "
791
"at end of weave: %s" % istack)
747
"at end of weave: %s" % istack)
793
raise WeaveFormatError(
794
"unclosed deletion blocks at end of weave: %s" % dset)
749
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
797
753
def _maybe_lookup(self, name_or_index):
809
763
def get_lines(self, version_id):
810
764
"""See VersionedFile.get_lines()."""
811
765
int_index = self._maybe_lookup(version_id)
812
result = [line for (origin, lineno, line)
813
in self._extract([int_index])]
766
result = [line for (origin, lineno, line) in self._extract([int_index])]
814
767
expected_sha1 = self._sha1s[int_index]
815
768
measured_sha1 = sha_strings(result)
816
769
if measured_sha1 != expected_sha1:
817
raise WeaveInvalidChecksum(
818
'file %s, revision %s, expected: %s, measured %s'
819
% (self._weave_name, version_id,
820
expected_sha1, measured_sha1))
770
raise errors.WeaveInvalidChecksum(
771
'file %s, revision %s, expected: %s, measured %s'
772
% (self._weave_name, version_id,
773
expected_sha1, measured_sha1))
823
776
def get_sha1s(self, version_ids):
857
810
name = self._idx_to_name(i)
858
811
sha1s[name] = sha()
813
new_inc = set([name])
861
814
for p in self._parents[i]:
862
815
new_inc.update(inclusions[self._idx_to_name(p)])
864
if new_inc != self.get_ancestry(name):
817
if set(new_inc) != set(self.get_ancestry(name)):
865
818
raise AssertionError(
866
819
'failed %s != %s'
867
% (new_inc, self.get_ancestry(name)))
820
% (set(new_inc), set(self.get_ancestry(name))))
868
821
inclusions[name] = new_inc
870
823
nlines = len(self._weave)
883
836
# The active inclusion must be an ancestor,
884
837
# and no ancestors must have deleted this line,
885
838
# because we don't support resurrection.
886
if ((insert in name_inclusions) and
887
not (deleteset & name_inclusions)):
839
if (insert in name_inclusions) and not (deleteset & name_inclusions):
888
840
sha1s[name].update(line)
890
842
for i in range(nv):
891
843
version = self._idx_to_name(i)
892
hd = sha1s[version].hexdigest().encode()
844
hd = sha1s[version].hexdigest()
893
845
expected = self._sha1s[i]
894
846
if hd != expected:
895
raise WeaveInvalidChecksum(
896
"mismatched sha1 for version %s: "
897
"got %s, expected %s"
898
% (version, hd, expected))
847
raise errors.WeaveInvalidChecksum(
848
"mismatched sha1 for version %s: "
849
"got %s, expected %s"
850
% (version, hd, expected))
900
852
# TODO: check insertions are properly nested, that there are
901
853
# no lines outside of insertion blocks, that deletions are
928
880
this_idx = self._name_map.get(name, -1)
929
881
if this_idx != -1:
930
882
if self._sha1s[this_idx] != other._sha1s[other_idx]:
931
raise WeaveTextDiffers(name, self, other)
883
raise errors.WeaveTextDiffers(name, self, other)
932
884
self_parents = self._parents[this_idx]
933
885
other_parents = other._parents[other_idx]
934
n1 = {self._names[i] for i in self_parents}
935
n2 = {other._names[i] for i in other_parents}
886
n1 = set([self._names[i] for i in self_parents])
887
n2 = set([other._names[i] for i in other_parents])
936
888
if not self._compatible_parents(n1, n2):
937
raise WeaveParentMismatch(
938
"inconsistent parents "
889
raise WeaveParentMismatch("inconsistent parents "
939
890
"for version {%s}: %s vs %s" % (name, n1, n2))
941
892
return True # ok!
965
916
WEAVE_SUFFIX = '.weave'
967
def __init__(self, name, transport, filemode=None, create=False,
968
access_mode='w', get_scope=None):
918
def __init__(self, name, transport, filemode=None, create=False, access_mode='w', get_scope=None):
969
919
"""Create a WeaveFile.
971
921
:param create: If not True, only open an existing knit.
973
923
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
974
allow_reserved=False)
924
allow_reserved=False)
975
925
self._transport = transport
976
926
self._filemode = filemode
978
f = self._transport.get(name + WeaveFile.WEAVE_SUFFIX)
979
_read_weave_v5(BytesIO(f.read()), self)
928
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
980
929
except errors.NoSuchFile:
986
935
def _add_lines(self, version_id, parents, lines, parent_texts,
987
left_matching_blocks, nostore_sha, random_id,
936
left_matching_blocks, nostore_sha, random_id, check_content):
989
937
"""Add a version and save the weave."""
990
938
self.check_not_reserved_id(version_id)
991
result = super(WeaveFile, self)._add_lines(
992
version_id, parents, lines, parent_texts, left_matching_blocks,
993
nostore_sha, random_id, check_content)
939
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
940
parent_texts, left_matching_blocks, nostore_sha, random_id,
997
945
def copy_to(self, name, transport):
998
946
"""See VersionedFile.copy_to()."""
999
947
# as we are all in memory always, just serialise to the new place.
1001
949
write_weave_v5(self, sio)
1003
951
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1042
990
:param msg: An optional message for the progress
994
queue_a = range(wa.num_versions())
995
queue_b = range(wb.num_versions())
1045
996
# first determine combined parents of all versions
1046
997
# map from version name -> all parent names
1047
998
combined_parents = _reweave_parent_graphs(wa, wb)
1048
999
mutter("combined parents: %r", combined_parents)
1049
order = tsort.topo_sort(combined_parents.items())
1000
order = tsort.topo_sort(combined_parents.iteritems())
1050
1001
mutter("order to reweave: %r", order)
1052
1003
if pb and not msg:
1064
1015
mutter('weaves: %s, %s', wa._weave_name, wb._weave_name)
1066
1017
lines = list(difflib.unified_diff(lines, lines_b,
1067
wa._weave_name, wb._weave_name))
1018
wa._weave_name, wb._weave_name))
1068
1019
mutter('lines:\n%s', ''.join(lines))
1069
raise WeaveTextDiffers(name, wa, wb)
1020
raise errors.WeaveTextDiffers(name, wa, wb)
1071
1022
lines = wb.get_lines(name)
1072
1023
wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]])