160
169
each version; the parent's parents are implied.
163
List of hex SHA-1 of each version, or None if not recorded.
172
List of hex SHA-1 of each version.
175
List of symbolic names for each version. Each should be unique.
178
For each name, the version number.
181
Descriptive name of this weave; typically the filename if known.
166
__slots__ = ['_weave', '_parents', '_sha1s']
185
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
186
'_weave_name', '_matcher']
188
def __init__(self, weave_name=None, access_mode='w', matcher=None, get_scope=None):
189
super(Weave, self).__init__(access_mode)
170
191
self._parents = []
195
self._weave_name = weave_name
197
self._matcher = bzrlib.patiencediff.PatienceSequenceMatcher
199
self._matcher = matcher
200
if get_scope is None:
201
get_scope = lambda:None
202
self.get_scope = get_scope
203
self.scope = get_scope()
204
self._access_mode = access_mode
207
return "Weave(%r)" % self._weave_name
209
def _check_write_ok(self):
210
"""Is the versioned file marked as 'finished' ? Raise if it is."""
211
if self.get_scope() != self.scope:
212
raise errors.OutSideTransaction()
213
if self._access_mode != 'w':
214
raise errors.ReadOnlyObjectDirtiedError(self)
217
"""Return a deep copy of self.
219
The copy can be modified without affecting the original weave."""
221
other._weave = self._weave[:]
222
other._parents = self._parents[:]
223
other._sha1s = self._sha1s[:]
224
other._names = self._names[:]
225
other._name_map = self._name_map.copy()
226
other._weave_name = self._weave_name
174
229
def __eq__(self, other):
175
230
if not isinstance(other, Weave):
177
232
return self._parents == other._parents \
178
and self._weave == other._weave
233
and self._weave == other._weave \
234
and self._sha1s == other._sha1s
181
236
def __ne__(self, other):
182
237
return not self.__eq__(other)
185
def add(self, parents, text):
239
def _idx_to_name(self, version):
240
return self._names[version]
242
def _lookup(self, name):
243
"""Convert symbolic version name to index."""
244
self.check_not_reserved_id(name)
246
return self._name_map[name]
248
raise RevisionNotPresent(name, self._weave_name)
251
"""See VersionedFile.versions."""
252
return self._names[:]
254
def has_version(self, version_id):
255
"""See VersionedFile.has_version."""
256
return (version_id in self._name_map)
258
__contains__ = has_version
260
def get_parent_map(self, version_ids):
261
"""See VersionedFile.get_parent_map."""
263
for version_id in version_ids:
265
result[version_id] = tuple(
266
map(self._idx_to_name, self._parents[self._lookup(version_id)]))
267
except RevisionNotPresent:
271
def get_parents_with_ghosts(self, version_id):
272
raise NotImplementedError(self.get_parents_with_ghosts)
274
def _check_repeated_add(self, name, parents, text, sha1):
275
"""Check that a duplicated add is OK.
277
If it is, return the (old) index; otherwise raise an exception.
279
idx = self._lookup(name)
280
if sorted(self._parents[idx]) != sorted(parents) \
281
or sha1 != self._sha1s[idx]:
282
raise RevisionAlreadyPresent(name, self._weave_name)
285
def _add_lines(self, version_id, parents, lines, parent_texts,
286
left_matching_blocks, nostore_sha, random_id, check_content):
287
"""See VersionedFile.add_lines."""
288
idx = self._add(version_id, lines, map(self._lookup, parents),
289
nostore_sha=nostore_sha)
290
return sha_strings(lines), sum(map(len, lines)), idx
292
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
186
293
"""Add a single text on top of the weave.
188
295
Returns the index number of the newly added version.
298
Symbolic name for this version.
299
(Typically the revision-id of the revision that added it.)
191
302
List or set of direct parent version numbers.
194
Sequence of lines to be added in the new version."""
305
Sequence of lines to be added in the new version.
307
:param nostore_sha: See VersionedFile.add_lines.
309
assert isinstance(version_id, basestring)
310
self._check_lines_not_unicode(lines)
311
self._check_lines_are_lines(lines)
313
sha1 = sha_strings(lines)
314
if sha1 == nostore_sha:
315
raise errors.ExistingContent
316
if version_id in self._name_map:
317
return self._check_repeated_add(version_id, parents, lines, sha1)
196
319
self._check_versions(parents)
197
## self._check_lines(text)
320
## self._check_lines(lines)
198
321
new_version = len(self._parents)
206
# if we abort after here the weave will be corrupt
207
self._parents.append(frozenset(parents))
323
# if we abort after here the (in-memory) weave will be corrupt because only
324
# some fields are updated
325
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
326
# - Robert Collins 20060226
327
self._parents.append(parents[:])
208
328
self._sha1s.append(sha1)
329
self._names.append(version_id)
330
self._name_map[version_id] = new_version
446
assert isinstance(l, basestring)
648
assert l.__class__ in (str, unicode)
447
649
if isactive is None:
448
650
isactive = (not dset) and istack and (istack[-1] in included)
450
652
result.append((istack[-1], lineno, l))
454
raise WFE("unclosed insertion blocks at end of weave",
655
raise WeaveFormatError("unclosed insertion blocks "
656
"at end of weave: %s" % istack)
457
raise WFE("unclosed deletion blocks at end of weave",
464
def get_iter(self, version):
465
"""Yield lines for the specified version."""
466
for origin, lineno, line in self._extract([version]):
470
def get(self, index):
471
return list(self.get_iter(index))
474
def mash_iter(self, included):
475
"""Return composed version of multiple included versions."""
476
for origin, lineno, text in self._extract(included):
480
def dump(self, to_file):
481
from pprint import pprint
482
print >>to_file, "Weave._weave = ",
483
pprint(self._weave, to_file)
484
print >>to_file, "Weave._parents = ",
485
pprint(self._parents, to_file)
489
def numversions(self):
658
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
662
def _maybe_lookup(self, name_or_index):
663
"""Convert possible symbolic name to index, or pass through indexes.
667
if isinstance(name_or_index, (int, long)):
670
return self._lookup(name_or_index)
672
def get_lines(self, version_id):
673
"""See VersionedFile.get_lines()."""
674
int_index = self._maybe_lookup(version_id)
675
result = [line for (origin, lineno, line) in self._extract([int_index])]
676
expected_sha1 = self._sha1s[int_index]
677
measured_sha1 = sha_strings(result)
678
if measured_sha1 != expected_sha1:
679
raise errors.WeaveInvalidChecksum(
680
'file %s, revision %s, expected: %s, measured %s'
681
% (self._weave_name, version_id,
682
expected_sha1, measured_sha1))
685
def get_sha1(self, version_id):
686
"""See VersionedFile.get_sha1()."""
687
return self._sha1s[self._lookup(version_id)]
689
def get_sha1s(self, version_ids):
690
"""See VersionedFile.get_sha1s()."""
691
return [self._sha1s[self._lookup(v)] for v in version_ids]
693
def num_versions(self):
694
"""How many versions are in this weave?"""
490
695
l = len(self._parents)
491
696
assert l == len(self._sha1s)
496
return self.numversions()
699
__len__ = num_versions
499
701
def check(self, progress_bar=None):
500
# check no circular inclusions
501
for version in range(self.numversions()):
702
# TODO evaluate performance hit of using string sets in this routine.
703
# TODO: check no circular inclusions
704
# TODO: create a nested progress bar
705
for version in range(self.num_versions()):
502
706
inclusions = list(self._parents[version])
504
708
inclusions.sort()
506
710
raise WeaveFormatError("invalid included version %d for index %d"
507
711
% (inclusions[-1], version))
509
# try extracting all versions; this is a bit slow and parallel
510
# extraction could be used
512
nv = self.numversions()
513
for version in range(nv):
713
# try extracting all versions; parallel extraction is used
714
nv = self.num_versions()
719
# For creating the ancestry, IntSet is much faster (3.7s vs 0.17s)
720
# The problem is that set membership is much more expensive
721
name = self._idx_to_name(i)
722
sha1s[name] = sha.new()
724
new_inc = set([name])
725
for p in self._parents[i]:
726
new_inc.update(inclusions[self._idx_to_name(p)])
728
assert set(new_inc) == set(self.get_ancestry(name)), \
729
'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
730
inclusions[name] = new_inc
732
nlines = len(self._weave)
734
update_text = 'checking weave'
736
short_name = os.path.basename(self._weave_name)
737
update_text = 'checking %s' % (short_name,)
738
update_text = update_text[:25]
740
for lineno, insert, deleteset, line in self._walk_internal():
515
progress_bar.update('checking text', version, nv)
517
for l in self.get_iter(version):
520
expected = self._sha1s[version]
742
progress_bar.update(update_text, lineno, nlines)
744
for name, name_inclusions in inclusions.items():
745
# The active inclusion must be an ancestor,
746
# and no ancestors must have deleted this line,
747
# because we don't support resurrection.
748
if (insert in name_inclusions) and not (deleteset & name_inclusions):
749
sha1s[name].update(line)
752
version = self._idx_to_name(i)
753
hd = sha1s[version].hexdigest()
754
expected = self._sha1s[i]
521
755
if hd != expected:
522
raise WeaveError("mismatched sha1 for version %d; "
523
"got %s, expected %s"
524
% (version, hd, expected))
756
raise errors.WeaveInvalidChecksum(
757
"mismatched sha1 for version %s: "
758
"got %s, expected %s"
759
% (version, hd, expected))
526
761
# TODO: check insertions are properly nested, that there are
527
762
# no lines outside of insertion blocks, that deletions are
528
763
# properly paired, etc.
532
def merge(self, merge_versions):
533
"""Automerge and mark conflicts between versions.
535
This returns a sequence, each entry describing alternatives
536
for a chunk of the file. Each of the alternatives is given as
539
If there is a chunk of the file where there's no diagreement,
540
only one alternative is given.
543
# approach: find the included versions common to all the
545
raise NotImplementedError()
549
def _delta(self, included, lines):
550
"""Return changes from basis to new revision.
552
The old text for comparison is the union of included revisions.
554
This is used in inserting a new text.
556
Delta is returned as a sequence of
557
(weave1, weave2, newlines).
559
This indicates that weave1:weave2 of the old weave should be
560
replaced by the sequence of lines in newlines. Note that
561
these line numbers are positions in the total weave and don't
562
correspond to the lines in any extracted version, or even the
563
extracted union of included versions.
565
If line1=line2, this is a pure insert; if newlines=[] this is a
566
pure delete. (Similar to difflib.)
571
def plan_merge(self, ver_a, ver_b):
572
"""Return pseudo-annotation indicating how the two versions merge.
574
This is computed between versions a and b and their common
577
Weave lines present in none of them are skipped entirely.
579
inc_a = self.inclusions([ver_a])
580
inc_b = self.inclusions([ver_b])
581
inc_c = inc_a & inc_b
583
for lineno, insert, deleteset, line in self._walk():
584
if deleteset & inc_c:
585
# killed in parent; can't be in either a or b
586
# not relevant to our work
587
yield 'killed-base', line
588
elif insert in inc_c:
589
# was inserted in base
590
killed_a = bool(deleteset & inc_a)
591
killed_b = bool(deleteset & inc_b)
592
if killed_a and killed_b:
593
yield 'killed-both', line
595
yield 'killed-a', line
597
yield 'killed-b', line
599
yield 'unchanged', line
600
elif insert in inc_a:
601
if deleteset & inc_a:
602
yield 'ghost-a', line
606
elif insert in inc_b:
607
if deleteset & inc_b:
608
yield 'ghost-b', line
612
# not in either revision
613
yield 'irrelevant', line
615
yield 'unchanged', '' # terminator
619
def weave_merge(self, plan):
624
for state, line in plan:
625
if state == 'unchanged' or state == 'killed-both':
626
# resync and flush queued conflicts changes if any
627
if not lines_a and not lines_b:
629
elif ch_a and not ch_b:
631
for l in lines_a: yield l
632
elif ch_b and not ch_a:
633
for l in lines_b: yield l
634
elif lines_a == lines_b:
635
for l in lines_a: yield l
638
for l in lines_a: yield l
640
for l in lines_b: yield l
647
if state == 'unchanged':
650
elif state == 'killed-a':
653
elif state == 'killed-b':
656
elif state == 'new-a':
659
elif state == 'new-b':
663
assert state in ('irrelevant', 'ghost-a', 'ghost-b', 'killed-base',
674
"""Show some text information about the weave."""
675
print '%6s %40s %20s' % ('ver', 'sha1', 'parents')
676
for i in (6, 40, 20):
765
def _join(self, other, pb, msg, version_ids, ignore_missing):
766
"""Worker routine for join()."""
767
if not other.versions():
768
return # nothing to update, easy
771
# versions is never none, InterWeave checks this.
774
# two loops so that we do not change ourselves before verifying it
776
# work through in index order to make sure we get all dependencies
779
# get the selected versions only that are in other.versions.
780
version_ids = set(other.versions()).intersection(set(version_ids))
781
# pull in the referenced graph.
782
version_ids = other.get_ancestry(version_ids)
783
pending_parents = other.get_parent_map(version_ids)
784
pending_graph = pending_parents.items()
785
if len(pending_graph) != len(version_ids):
786
raise RevisionNotPresent(
787
set(version_ids) - set(pending_parents.keys()), self)
788
for name in topo_sort(pending_graph):
789
other_idx = other._name_map[name]
790
# returns True if we have it, False if we need it.
791
if not self._check_version_consistent(other, other_idx, name):
792
names_to_join.append((other_idx, name))
800
for other_idx, name in names_to_join:
801
# TODO: If all the parents of the other version are already
802
# present then we can avoid some work by just taking the delta
803
# and adjusting the offsets.
804
new_parents = self._imported_parents(other, other_idx)
805
sha1 = other._sha1s[other_idx]
810
pb.update(msg, merged, len(names_to_join))
812
lines = other.get_lines(other_idx)
813
self._add(name, lines, new_parents, sha1)
815
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
816
merged, processed, self._weave_name, time.time()-time0))
818
def _imported_parents(self, other, other_idx):
819
"""Return list of parents in self corresponding to indexes in other."""
821
for parent_idx in other._parents[other_idx]:
822
parent_name = other._names[parent_idx]
823
if parent_name not in self._name_map:
824
# should not be possible
825
raise WeaveError("missing parent {%s} of {%s} in %r"
826
% (parent_name, other._name_map[other_idx], self))
827
new_parents.append(self._name_map[parent_name])
830
def _check_version_consistent(self, other, other_idx, name):
831
"""Check if a version in consistent in this and other.
833
To be consistent it must have:
836
* the same direct parents (by name, not index, and disregarding
839
If present & correct return True;
840
if not present in self return False;
841
if inconsistent raise error."""
842
this_idx = self._name_map.get(name, -1)
844
if self._sha1s[this_idx] != other._sha1s[other_idx]:
845
raise errors.WeaveTextDiffers(name, self, other)
846
self_parents = self._parents[this_idx]
847
other_parents = other._parents[other_idx]
848
n1 = set([self._names[i] for i in self_parents])
849
n2 = set([other._names[i] for i in other_parents])
850
if not self._compatible_parents(n1, n2):
851
raise WeaveParentMismatch("inconsistent parents "
852
"for version {%s}: %s vs %s" % (name, n1, n2))
858
def _reweave(self, other, pb, msg):
859
"""Reweave self with other - internal helper for join().
861
:param other: The other weave to merge
862
:param pb: An optional progress bar, indicating how far done we are
863
:param msg: An optional message for the progress
865
new_weave = _reweave(self, other, pb=pb, msg=msg)
866
self._copy_weave_content(new_weave)
868
def _copy_weave_content(self, otherweave):
869
"""adsorb the content from otherweave."""
870
for attr in self.__slots__:
871
if attr != '_weave_name':
872
setattr(self, attr, copy(getattr(otherweave, attr)))
875
class WeaveFile(Weave):
876
"""A WeaveFile represents a Weave on disk and writes on change."""
878
WEAVE_SUFFIX = '.weave'
880
def __init__(self, name, transport, filemode=None, create=False, access_mode='w', get_scope=None):
881
"""Create a WeaveFile.
883
:param create: If not True, only open an existing knit.
885
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope)
886
self._transport = transport
887
self._filemode = filemode
889
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
890
except errors.NoSuchFile:
896
def _add_lines(self, version_id, parents, lines, parent_texts,
897
left_matching_blocks, nostore_sha, random_id, check_content):
898
"""Add a version and save the weave."""
899
self.check_not_reserved_id(version_id)
900
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
901
parent_texts, left_matching_blocks, nostore_sha, random_id,
906
def _clone_text(self, new_version_id, old_version_id, parents):
907
"""See VersionedFile.clone_text."""
908
super(WeaveFile, self)._clone_text(new_version_id, old_version_id, parents)
911
def copy_to(self, name, transport):
912
"""See VersionedFile.copy_to()."""
913
# as we are all in memory always, just serialise to the new place.
915
write_weave_v5(self, sio)
917
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
920
"""Save the weave."""
921
self._check_write_ok()
923
write_weave_v5(self, sio)
925
self._transport.put_file(self._weave_name + WeaveFile.WEAVE_SUFFIX,
931
"""See VersionedFile.get_suffixes()."""
932
return [WeaveFile.WEAVE_SUFFIX]
934
def join(self, other, pb=None, msg=None, version_ids=None,
935
ignore_missing=False):
936
"""Join other into self and save."""
937
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
941
def _reweave(wa, wb, pb=None, msg=None):
942
"""Combine two weaves and return the result.
944
This works even if a revision R has different parents in
945
wa and wb. In the resulting weave all the parents are given.
947
This is done by just building up a new weave, maintaining ordering
948
of the versions in the two inputs. More efficient approaches
949
might be possible but it should only be necessary to do
950
this operation rarely, when a new previously ghost version is
953
:param pb: An optional progress bar, indicating how far done we are
954
:param msg: An optional message for the progress
958
queue_a = range(wa.num_versions())
959
queue_b = range(wb.num_versions())
960
# first determine combined parents of all versions
961
# map from version name -> all parent names
962
combined_parents = _reweave_parent_graphs(wa, wb)
963
mutter("combined parents: %r", combined_parents)
964
order = topo_sort(combined_parents.iteritems())
965
mutter("order to reweave: %r", order)
970
for idx, name in enumerate(order):
972
pb.update(msg, idx, len(order))
973
if name in wa._name_map:
974
lines = wa.get_lines(name)
975
if name in wb._name_map:
976
lines_b = wb.get_lines(name)
978
mutter('Weaves differ on content. rev_id {%s}', name)
979
mutter('weaves: %s, %s', wa._weave_name, wb._weave_name)
981
lines = list(difflib.unified_diff(lines, lines_b,
982
wa._weave_name, wb._weave_name))
983
mutter('lines:\n%s', ''.join(lines))
984
raise errors.WeaveTextDiffers(name, wa, wb)
986
lines = wb.get_lines(name)
987
wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]])
990
def _reweave_parent_graphs(wa, wb):
991
"""Return combined parent ancestry for two weaves.
993
Returned as a list of (version_name, set(parent_names))"""
995
for weave in [wa, wb]:
996
for idx, name in enumerate(weave._names):
997
p = combined.setdefault(name, set())
998
p.update(map(weave._idx_to_name, weave._parents[idx]))
1003
"""Show the weave's table-of-contents"""
1004
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
1005
for i in (6, 50, 10, 10):
679
for i in range(w.numversions()):
1008
for i in range(w.num_versions()):
680
1009
sha1 = w._sha1s[i]
681
print '%6d %40s %s' % (i, sha1, ' '.join(map(str, w._parents[i])))
685
def weave_stats(weave_file):
686
from bzrlib.progress import ProgressBar
1011
parent_str = ' '.join(map(str, w._parents[i]))
1012
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
1016
def weave_stats(weave_file, pb):
687
1017
from bzrlib.weavefile import read_weave
691
1019
wf = file(weave_file, 'rb')
692
1020
w = read_weave(wf)
693
1021
# FIXME: doesn't work on pipes