67
67
# FIXME: the conflict markers should be *7* characters
69
69
from copy import copy
70
from cStringIO import StringIO
73
from ..lazy_import import lazy_import
73
from bzrlib.lazy_import import lazy_import
74
74
lazy_import(globals(), """
75
from breezy import tsort
75
from bzrlib import tsort
81
from ..errors import (
82
RevisionAlreadyPresent,
84
UnavailableRepresentation,
86
from ..osutils import dirname, sha, sha_strings, split_lines
87
from ..revision import NULL_REVISION
88
from ..sixish import (
91
from ..trace import mutter
92
from .versionedfile import (
81
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
82
RevisionAlreadyPresent,
84
UnavailableRepresentation,
86
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
87
import bzrlib.patiencediff
88
from bzrlib.revision import NULL_REVISION
89
from bzrlib.symbol_versioning import *
90
from bzrlib.trace import mutter
91
from bzrlib.versionedfile import (
93
92
AbsentContentFactory,
96
95
sort_groupcompress,
99
from .weavefile import _read_weave_v5, write_weave_v5
102
class WeaveError(errors.BzrError):
104
_fmt = "Error in processing weave: %(msg)s"
106
def __init__(self, msg=None):
107
errors.BzrError.__init__(self)
111
class WeaveRevisionAlreadyPresent(WeaveError):
113
_fmt = "Revision {%(revision_id)s} already present in %(weave)s"
115
def __init__(self, revision_id, weave):
117
WeaveError.__init__(self)
118
self.revision_id = revision_id
122
class WeaveRevisionNotPresent(WeaveError):
124
_fmt = "Revision {%(revision_id)s} not present in %(weave)s"
126
def __init__(self, revision_id, weave):
127
WeaveError.__init__(self)
128
self.revision_id = revision_id
132
class WeaveFormatError(WeaveError):
134
_fmt = "Weave invariant violated: %(what)s"
136
def __init__(self, what):
137
WeaveError.__init__(self)
141
class WeaveParentMismatch(WeaveError):
143
_fmt = "Parents are mismatched between two revisions. %(msg)s"
146
class WeaveInvalidChecksum(WeaveError):
148
_fmt = "Text did not match its checksum: %(msg)s"
151
class WeaveTextDiffers(WeaveError):
153
_fmt = ("Weaves differ on text content. Revision:"
154
" {%(revision_id)s}, %(weave_a)s, %(weave_b)s")
156
def __init__(self, revision_id, weave_a, weave_b):
157
WeaveError.__init__(self)
158
self.revision_id = revision_id
159
self.weave_a = weave_a
160
self.weave_b = weave_b
98
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
163
101
class WeaveContentFactory(ContentFactory):
179
117
def get_bytes_as(self, storage_kind):
180
118
if storage_kind == 'fulltext':
181
119
return self._weave.get_text(self.key[-1])
182
elif storage_kind in ('chunked', 'lines'):
120
elif storage_kind == 'chunked':
183
121
return self._weave.get_lines(self.key[-1])
185
123
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
187
def iter_bytes_as(self, storage_kind):
188
if storage_kind in ('chunked', 'lines'):
189
return iter(self._weave.get_lines(self.key[-1]))
191
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
194
126
class Weave(VersionedFile):
195
127
"""weave - versioned text file storage.
428
359
raise RevisionNotPresent([record.key[0]], self)
429
360
# adapt to non-tuple interface
430
361
parents = [parent[0] for parent in record.parents]
431
if record.storage_kind in ('fulltext', 'chunked', 'lines'):
433
record.key[0], parents,
434
record.get_bytes_as('lines'))
362
if (record.storage_kind == 'fulltext'
363
or record.storage_kind == 'chunked'):
364
self.add_lines(record.key[0], parents,
365
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
436
adapter_key = record.storage_kind, 'lines'
367
adapter_key = record.storage_kind, 'fulltext'
438
369
adapter = adapters[adapter_key]
440
371
adapter_factory = adapter_registry.get(adapter_key)
441
372
adapter = adapter_factory(self)
442
373
adapters[adapter_key] = adapter
443
lines = adapter.get_bytes(record, 'lines')
374
lines = split_lines(adapter.get_bytes(record))
445
376
self.add_lines(record.key[0], parents, lines)
446
377
except RevisionAlreadyPresent:
454
385
idx = self._lookup(name)
455
386
if sorted(self._parents[idx]) != sorted(parents) \
456
or sha1 != self._sha1s[idx]:
387
or sha1 != self._sha1s[idx]:
457
388
raise RevisionAlreadyPresent(name, self._weave_name)
460
391
def _add_lines(self, version_id, parents, lines, parent_texts,
461
left_matching_blocks, nostore_sha, random_id,
392
left_matching_blocks, nostore_sha, random_id, check_content):
463
393
"""See VersionedFile.add_lines."""
464
idx = self._add(version_id, lines, list(map(self._lookup, parents)),
465
nostore_sha=nostore_sha)
394
idx = self._add(version_id, lines, map(self._lookup, parents),
395
nostore_sha=nostore_sha)
466
396
return sha_strings(lines), sum(map(len, lines)), idx
468
398
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
490
420
if sha1 == nostore_sha:
491
421
raise errors.ExistingContent
492
422
if version_id is None:
493
version_id = b"sha1:" + sha1
423
version_id = "sha1:" + sha1
494
424
if version_id in self._name_map:
495
425
return self._check_repeated_add(version_id, parents, lines, sha1)
497
427
self._check_versions(parents)
428
## self._check_lines(lines)
498
429
new_version = len(self._parents)
500
# if we abort after here the (in-memory) weave will be corrupt because
501
# only some fields are updated
431
# if we abort after here the (in-memory) weave will be corrupt because only
432
# some fields are updated
502
433
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
503
434
# - Robert Collins 20060226
504
435
self._parents.append(parents[:])
506
437
self._names.append(version_id)
507
438
self._name_map[version_id] = new_version
510
442
# special case; adding with no parents revision; can do
511
443
# this more quickly by just appending unconditionally.
512
444
# even more specially, if we're adding an empty text we
513
445
# need do nothing at all.
515
self._weave.append((b'{', new_version))
447
self._weave.append(('{', new_version))
516
448
self._weave.extend(lines)
517
self._weave.append((b'}', None))
449
self._weave.append(('}', None))
518
450
return new_version
520
452
if len(parents) == 1:
546
479
# matches the end of the file? the current code says it's the
547
480
# last line of the weave?
549
# print 'basis_lines:', basis_lines
550
# print 'new_lines: ', lines
482
#print 'basis_lines:', basis_lines
483
#print 'new_lines: ', lines
552
485
s = self._matcher(None, basis_lines, lines)
554
487
# offset gives the number of lines that have been inserted
555
# into the weave up to the current point; if the original edit
556
# instruction says to change line A then we actually change (A+offset)
488
# into the weave up to the current point; if the original edit instruction
489
# says to change line A then we actually change (A+offset)
559
492
for tag, i1, i2, j1, j2 in s.get_opcodes():
560
# i1,i2 are given in offsets within basis_lines; we need to map
561
# them back to offsets within the entire weave print 'raw match',
562
# tag, i1, i2, j1, j2
493
# i1,i2 are given in offsets within basis_lines; we need to map them
494
# back to offsets within the entire weave
495
#print 'raw match', tag, i1, i2, j1, j2
563
496
if tag == 'equal':
565
498
i1 = basis_lineno[i1]
567
500
# the deletion and insertion are handled separately.
568
501
# first delete the region.
570
self._weave.insert(i1 + offset, (b'[', new_version))
571
self._weave.insert(i2 + offset + 1, (b']', new_version))
503
self._weave.insert(i1+offset, ('[', new_version))
504
self._weave.insert(i2+offset+1, (']', new_version))
587
520
if not len(versions):
589
522
i = set(versions)
590
for v in range(max(versions), 0, -1):
523
for v in xrange(max(versions), 0, -1):
592
525
# include all its parents
593
526
i.update(self._parents[v])
528
## except IndexError:
529
## raise ValueError("version %d not present in weave" % v)
596
531
def get_ancestry(self, version_ids, topo_sorted=True):
597
532
"""See VersionedFile.get_ancestry."""
598
if isinstance(version_ids, bytes):
533
if isinstance(version_ids, basestring):
599
534
version_ids = [version_ids]
600
535
i = self._inclusions([self._lookup(v) for v in version_ids])
601
536
return [self._idx_to_name(v) for v in i]
538
def _check_lines(self, text):
539
if not isinstance(text, list):
540
raise ValueError("text should be a list, not %s" % type(text))
543
if not isinstance(l, basestring):
544
raise ValueError("text line should be a string or unicode, not %s"
603
549
def _check_versions(self, indexes):
604
550
"""Check everything in the sequence of indexes is valid"""
605
551
for i in indexes:
630
576
if version_ids is None:
631
577
version_ids = self.versions()
632
578
version_ids = set(version_ids)
633
for lineno, inserted, deletes, line in self._walk_internal(
635
if inserted not in version_ids:
637
if not line.endswith(b'\n'):
638
yield line + b'\n', inserted
579
for lineno, inserted, deletes, line in self._walk_internal(version_ids):
580
if inserted not in version_ids: continue
582
yield line + '\n', inserted
640
584
yield line, inserted
688
WFE = WeaveFormatError
745
# 449 0 4474.6820 2356.5590 breezy.weave:556(_extract)
691
# 449 0 4474.6820 2356.5590 bzrlib.weave:556(_extract)
746
692
# +285282 0 1676.8040 1676.8040 +<isinstance>
747
693
# 1.6 seconds in 'isinstance'.
748
694
# changing the first isinstance:
749
# 449 0 2814.2660 1577.1760 breezy.weave:556(_extract)
695
# 449 0 2814.2660 1577.1760 bzrlib.weave:556(_extract)
750
696
# +140414 0 762.8050 762.8050 +<isinstance>
751
697
# note that the inline time actually dropped (less function calls)
752
698
# and total processing time was halved.
753
699
# we're still spending ~1/4 of the method in isinstance though.
754
700
# so lets hard code the acceptable string classes we expect:
755
# 449 0 1202.9420 786.2930 breezy.weave:556(_extract)
701
# 449 0 1202.9420 786.2930 bzrlib.weave:556(_extract)
756
702
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
758
704
# yay, down to ~1/4 the initial extract time, and our inline time
759
705
# has shrunk again, with isinstance no longer dominating.
760
706
# tweaking the stack inclusion test to use a set gives:
761
# 449 0 1122.8030 713.0080 breezy.weave:556(_extract)
707
# 449 0 1122.8030 713.0080 bzrlib.weave:556(_extract)
762
708
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
764
710
# - a 5% win, or possibly just noise. However with large istacks that
765
# 'in' test could dominate, so I'm leaving this change in place - when
766
# its fast enough to consider profiling big datasets we can review.
711
# 'in' test could dominate, so I'm leaving this change in place -
712
# when its fast enough to consider profiling big datasets we can review.
768
717
for l in self._weave:
769
718
if l.__class__ == tuple:
776
725
iset.remove(istack.pop())
778
727
if v in included:
781
730
if v in included:
784
733
raise AssertionError()
786
735
if isactive is None:
787
isactive = (not dset) and istack and (
788
istack[-1] in included)
736
isactive = (not dset) and istack and (istack[-1] in included)
790
738
result.append((istack[-1], lineno, l))
793
741
raise WeaveFormatError("unclosed insertion blocks "
794
"at end of weave: %s" % istack)
742
"at end of weave: %s" % istack)
796
raise WeaveFormatError(
797
"unclosed deletion blocks at end of weave: %s" % dset)
744
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
800
748
def _maybe_lookup(self, name_or_index):
812
758
def get_lines(self, version_id):
813
759
"""See VersionedFile.get_lines()."""
814
760
int_index = self._maybe_lookup(version_id)
815
result = [line for (origin, lineno, line)
816
in self._extract([int_index])]
761
result = [line for (origin, lineno, line) in self._extract([int_index])]
817
762
expected_sha1 = self._sha1s[int_index]
818
763
measured_sha1 = sha_strings(result)
819
764
if measured_sha1 != expected_sha1:
820
raise WeaveInvalidChecksum(
821
'file %s, revision %s, expected: %s, measured %s'
822
% (self._weave_name, version_id,
823
expected_sha1, measured_sha1))
765
raise errors.WeaveInvalidChecksum(
766
'file %s, revision %s, expected: %s, measured %s'
767
% (self._weave_name, version_id,
768
expected_sha1, measured_sha1))
826
771
def get_sha1s(self, version_ids):
886
831
# The active inclusion must be an ancestor,
887
832
# and no ancestors must have deleted this line,
888
833
# because we don't support resurrection.
889
if ((insert in name_inclusions) and
890
not (deleteset & name_inclusions)):
834
if (insert in name_inclusions) and not (deleteset & name_inclusions):
891
835
sha1s[name].update(line)
893
837
for i in range(nv):
894
838
version = self._idx_to_name(i)
895
hd = sha1s[version].hexdigest().encode()
839
hd = sha1s[version].hexdigest()
896
840
expected = self._sha1s[i]
897
841
if hd != expected:
898
raise WeaveInvalidChecksum(
899
"mismatched sha1 for version %s: "
900
"got %s, expected %s"
901
% (version, hd, expected))
842
raise errors.WeaveInvalidChecksum(
843
"mismatched sha1 for version %s: "
844
"got %s, expected %s"
845
% (version, hd, expected))
903
847
# TODO: check insertions are properly nested, that there are
904
848
# no lines outside of insertion blocks, that deletions are
931
875
this_idx = self._name_map.get(name, -1)
932
876
if this_idx != -1:
933
877
if self._sha1s[this_idx] != other._sha1s[other_idx]:
934
raise WeaveTextDiffers(name, self, other)
878
raise errors.WeaveTextDiffers(name, self, other)
935
879
self_parents = self._parents[this_idx]
936
880
other_parents = other._parents[other_idx]
937
n1 = {self._names[i] for i in self_parents}
938
n2 = {other._names[i] for i in other_parents}
881
n1 = set([self._names[i] for i in self_parents])
882
n2 = set([other._names[i] for i in other_parents])
939
883
if not self._compatible_parents(n1, n2):
940
raise WeaveParentMismatch(
941
"inconsistent parents "
884
raise WeaveParentMismatch("inconsistent parents "
942
885
"for version {%s}: %s vs %s" % (name, n1, n2))
944
887
return True # ok!
968
911
WEAVE_SUFFIX = '.weave'
970
def __init__(self, name, transport, filemode=None, create=False,
971
access_mode='w', get_scope=None):
913
def __init__(self, name, transport, filemode=None, create=False, access_mode='w', get_scope=None):
972
914
"""Create a WeaveFile.
974
916
:param create: If not True, only open an existing knit.
976
918
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
977
allow_reserved=False)
919
allow_reserved=False)
978
920
self._transport = transport
979
921
self._filemode = filemode
981
f = self._transport.get(name + WeaveFile.WEAVE_SUFFIX)
982
_read_weave_v5(BytesIO(f.read()), self)
923
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
983
924
except errors.NoSuchFile:
989
930
def _add_lines(self, version_id, parents, lines, parent_texts,
990
left_matching_blocks, nostore_sha, random_id,
931
left_matching_blocks, nostore_sha, random_id, check_content):
992
932
"""Add a version and save the weave."""
993
933
self.check_not_reserved_id(version_id)
994
result = super(WeaveFile, self)._add_lines(
995
version_id, parents, lines, parent_texts, left_matching_blocks,
996
nostore_sha, random_id, check_content)
934
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
935
parent_texts, left_matching_blocks, nostore_sha, random_id,
1000
940
def copy_to(self, name, transport):
1001
941
"""See VersionedFile.copy_to()."""
1002
942
# as we are all in memory always, just serialise to the new place.
1004
944
write_weave_v5(self, sio)
1006
946
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1045
985
:param msg: An optional message for the progress
989
queue_a = range(wa.num_versions())
990
queue_b = range(wb.num_versions())
1048
991
# first determine combined parents of all versions
1049
992
# map from version name -> all parent names
1050
993
combined_parents = _reweave_parent_graphs(wa, wb)
1051
994
mutter("combined parents: %r", combined_parents)
1052
order = tsort.topo_sort(combined_parents.items())
995
order = tsort.topo_sort(combined_parents.iteritems())
1053
996
mutter("order to reweave: %r", order)
1055
998
if pb and not msg:
1067
1010
mutter('weaves: %s, %s', wa._weave_name, wb._weave_name)
1069
1012
lines = list(difflib.unified_diff(lines, lines_b,
1070
wa._weave_name, wb._weave_name))
1013
wa._weave_name, wb._weave_name))
1071
1014
mutter('lines:\n%s', ''.join(lines))
1072
raise WeaveTextDiffers(name, wa, wb)
1015
raise errors.WeaveTextDiffers(name, wa, wb)
1074
1017
lines = wb.get_lines(name)
1075
1018
wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]])