3
# Copyright (C) 2005 Canonical Ltd
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
# Author: Martin Pool <mbp@canonical.com>
22
"""Weave - storage of related text file versions"""
25
# XXX: If we do weaves this way, will a merge still behave the same
26
# way if it's done in a different order? That's a pretty desirable
29
# TODO: Nothing here so far assumes the lines are really \n newlines,
30
# rather than being split up in some other way. We could accomodate
31
# binaries, perhaps by naively splitting on \n or perhaps using
32
# something like a rolling checksum.
34
# TODO: End marker for each version so we can stop reading?
36
# TODO: Check that no insertion occurs inside a deletion that was
37
# active in the version of the insertion.
39
# TODO: In addition to the SHA-1 check, perhaps have some code that
40
# checks structural constraints of the weave: ie that insertions are
41
# properly nested, that there is no text outside of an insertion, that
42
# insertions or deletions are not repeated, etc.
44
# TODO: Parallel-extract that passes back each line along with a
45
# description of which revisions include it. Nice for checking all
46
# shas or calculating stats in parallel.
48
# TODO: Using a single _extract routine and then processing the output
49
# is probably inefficient. It's simple enough that we can afford to
50
# have slight specializations for different ways its used: annotate,
51
# basis for add, get, etc.
53
# TODO: Probably the API should work only in names to hide the integer
54
# indexes from the user.
56
# TODO: Is there any potential performance win by having an add()
57
# variant that is passed a pre-cooked version of the single basis
60
# TODO: Reweave can possibly be made faster by remembering diffs
61
# where the basis and destination are unchanged.
63
# FIXME: Sometimes we will be given a parents list for a revision
64
# that includes some redundant parents (i.e. already a parent of
65
# something in the list.) We should eliminate them. This can
66
# be done fairly efficiently because the sequence numbers constrain
67
# the possible relationships.
71
from cStringIO import StringIO
72
from difflib import SequenceMatcher
77
from bzrlib.trace import mutter
78
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
79
RevisionAlreadyPresent,
81
WeaveRevisionAlreadyPresent,
82
WeaveRevisionNotPresent,
84
import bzrlib.errors as errors
85
from bzrlib.osutils import sha_strings
86
from bzrlib.symbol_versioning import *
87
from bzrlib.tsort import topo_sort
88
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
89
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
92
class Weave(VersionedFile):
93
"""weave - versioned text file storage.
95
A Weave manages versions of line-based text files, keeping track
96
of the originating version for each line.
98
To clients the "lines" of the file are represented as a list of strings.
99
These strings will typically have terminal newline characters, but
100
this is not required. In particular files commonly do not have a newline
101
at the end of the file.
103
Texts can be identified in either of two ways:
105
* a nonnegative index number.
107
* a version-id string.
109
Typically the index number will be valid only inside this weave and
110
the version-id is used to reference it in the larger world.
112
The weave is represented as a list mixing edit instructions and
113
literal text. Each entry in _weave can be either a string (or
114
unicode), or a tuple. If a string, it means that the given line
115
should be output in the currently active revisions.
117
If a tuple, it gives a processing instruction saying in which
118
revisions the enclosed lines are active. The tuple has the form
119
(instruction, version).
121
The instruction can be '{' or '}' for an insertion block, and '['
122
and ']' for a deletion block respectively. The version is the
123
integer version index. There is no replace operator, only deletes
124
and inserts. For '}', the end of an insertion, there is no
125
version parameter because it always closes the most recently
130
* A later version can delete lines that were introduced by any
131
number of ancestor versions; this implies that deletion
132
instructions can span insertion blocks without regard to the
133
insertion block's nesting.
135
* Similarly, deletions need not be properly nested with regard to
136
each other, because they might have been generated by
137
independent revisions.
139
* Insertions are always made by inserting a new bracketed block
140
into a single point in the previous weave. This implies they
141
can nest but not overlap, and the nesting must always have later
142
insertions on the inside.
144
* It doesn't seem very useful to have an active insertion
145
inside an inactive insertion, but it might happen.
147
* Therefore, all instructions are always"considered"; that
148
is passed onto and off the stack. An outer inactive block
149
doesn't disable an inner block.
151
* Lines are enabled if the most recent enclosing insertion is
152
active and none of the enclosing deletions are active.
154
* There is no point having a deletion directly inside its own
155
insertion; you might as well just not write it. And there
156
should be no way to get an earlier version deleting a later
160
Text of the weave; list of control instruction tuples and strings.
163
List of parents, indexed by version number.
164
It is only necessary to store the minimal set of parents for
165
each version; the parent's parents are implied.
168
List of hex SHA-1 of each version.
171
List of symbolic names for each version. Each should be unique.
174
For each name, the version number.
177
Descriptive name of this weave; typically the filename if known.
181
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
184
def __init__(self, weave_name=None):
190
self._weave_name = weave_name
193
return "Weave(%r)" % self._weave_name
196
"""Return a deep copy of self.
198
The copy can be modified without affecting the original weave."""
200
other._weave = self._weave[:]
201
other._parents = self._parents[:]
202
other._sha1s = self._sha1s[:]
203
other._names = self._names[:]
204
other._name_map = self._name_map.copy()
205
other._weave_name = self._weave_name
208
def __eq__(self, other):
209
if not isinstance(other, Weave):
211
return self._parents == other._parents \
212
and self._weave == other._weave \
213
and self._sha1s == other._sha1s
215
def __ne__(self, other):
216
return not self.__eq__(other)
218
@deprecated_method(zero_eight)
219
def idx_to_name(self, index):
220
"""Old public interface, the public interface is all names now."""
223
def _idx_to_name(self, version):
224
return self._names[version]
226
@deprecated_method(zero_eight)
227
def lookup(self, name):
228
"""Backwards compatability thunk:
230
Return name, as name is valid in the api now, and spew deprecation
235
def _lookup(self, name):
236
"""Convert symbolic version name to index."""
238
return self._name_map[name]
240
raise RevisionNotPresent(name, self._weave_name)
242
@deprecated_method(zero_eight)
243
def iter_names(self):
244
"""Deprecated convenience function, please see VersionedFile.names()."""
245
return iter(self.names())
247
@deprecated_method(zero_eight)
249
"""See Weave.versions for the current api."""
250
return self.versions()
253
"""See VersionedFile.versions."""
254
return self._names[:]
256
def has_version(self, version_id):
257
"""See VersionedFile.has_version."""
258
return self._name_map.has_key(version_id)
260
__contains__ = has_version
262
def get_parents(self, version_id):
263
"""See VersionedFile.get_parent."""
264
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
266
def _check_repeated_add(self, name, parents, text, sha1):
267
"""Check that a duplicated add is OK.
269
If it is, return the (old) index; otherwise raise an exception.
271
idx = self._lookup(name)
272
if sorted(self._parents[idx]) != sorted(parents) \
273
or sha1 != self._sha1s[idx]:
274
raise RevisionAlreadyPresent(name, self._weave_name)
277
@deprecated_method(zero_eight)
278
def add_identical(self, old_rev_id, new_rev_id, parents):
279
"""Please use Weave.clone_text now."""
280
return self.clone_text(new_rev_id, old_rev_id, parents)
282
def add_lines(self, version_id, parents, lines):
283
"""See VersionedFile.add_lines."""
284
return self._add(version_id, lines, map(self._lookup, parents))
286
@deprecated_method(zero_eight)
287
def add(self, name, parents, text, sha1=None):
288
"""See VersionedFile.add_lines for the non deprecated api."""
289
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
291
def _add(self, version_id, lines, parents, sha1=None):
292
"""Add a single text on top of the weave.
294
Returns the index number of the newly added version.
297
Symbolic name for this version.
298
(Typically the revision-id of the revision that added it.)
301
List or set of direct parent version numbers.
304
Sequence of lines to be added in the new version.
307
assert isinstance(version_id, basestring)
309
sha1 = sha_strings(lines)
310
if version_id in self._name_map:
311
return self._check_repeated_add(version_id, parents, lines, sha1)
313
self._check_versions(parents)
314
## self._check_lines(lines)
315
new_version = len(self._parents)
317
# if we abort after here the (in-memory) weave will be corrupt because only
318
# some fields are updated
319
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
320
# - Robert Collins 20060226
321
self._parents.append(parents[:])
322
self._sha1s.append(sha1)
323
self._names.append(version_id)
324
self._name_map[version_id] = new_version
328
# special case; adding with no parents revision; can do
329
# this more quickly by just appending unconditionally.
330
# even more specially, if we're adding an empty text we
331
# need do nothing at all.
333
self._weave.append(('{', new_version))
334
self._weave.extend(lines)
335
self._weave.append(('}', None))
338
if len(parents) == 1:
339
pv = list(parents)[0]
340
if sha1 == self._sha1s[pv]:
341
# special case: same as the single parent
345
ancestors = self._inclusions(parents)
349
# basis a list of (origin, lineno, line)
352
for origin, lineno, line in self._extract(ancestors):
353
basis_lineno.append(lineno)
354
basis_lines.append(line)
356
# another small special case: a merge, producing the same text
358
if lines == basis_lines:
361
# add a sentinal, because we can also match against the final line
362
basis_lineno.append(len(self._weave))
364
# XXX: which line of the weave should we really consider
365
# matches the end of the file? the current code says it's the
366
# last line of the weave?
368
#print 'basis_lines:', basis_lines
369
#print 'new_lines: ', lines
371
s = SequenceMatcher(None, basis_lines, lines)
373
# offset gives the number of lines that have been inserted
374
# into the weave up to the current point; if the original edit instruction
375
# says to change line A then we actually change (A+offset)
378
for tag, i1, i2, j1, j2 in s.get_opcodes():
379
# i1,i2 are given in offsets within basis_lines; we need to map them
380
# back to offsets within the entire weave
381
#print 'raw match', tag, i1, i2, j1, j2
385
i1 = basis_lineno[i1]
386
i2 = basis_lineno[i2]
388
assert 0 <= j1 <= j2 <= len(lines)
390
#print tag, i1, i2, j1, j2
392
# the deletion and insertion are handled separately.
393
# first delete the region.
395
self._weave.insert(i1+offset, ('[', new_version))
396
self._weave.insert(i2+offset+1, (']', new_version))
400
# there may have been a deletion spanning up to
401
# i2; we want to insert after this region to make sure
402
# we don't destroy ourselves
404
self._weave[i:i] = ([('{', new_version)]
407
offset += 2 + (j2 - j1)
410
def clone_text(self, new_version_id, old_version_id, parents):
411
"""See VersionedFile.clone_text."""
412
old_lines = self.get_text(old_version_id)
413
self.add_lines(new_version_id, parents, old_lines)
415
def _inclusions(self, versions):
416
"""Return set of all ancestors of given version(s)."""
417
if not len(versions):
420
for v in xrange(max(versions), 0, -1):
422
# include all its parents
423
i.update(self._parents[v])
425
## except IndexError:
426
## raise ValueError("version %d not present in weave" % v)
428
@deprecated_method(zero_eight)
429
def inclusions(self, version_ids):
430
"""Deprecated - see VersionedFile.get_ancestry for the replacement."""
433
if isinstance(version_ids[0], int):
434
return [self._idx_to_name(v) for v in self._inclusions(version_ids)]
436
return self.get_ancestry(version_ids)
438
def get_ancestry(self, version_ids):
439
"""See VersionedFile.get_ancestry."""
440
if isinstance(version_ids, basestring):
441
version_ids = [version_ids]
442
i = self._inclusions([self._lookup(v) for v in version_ids])
443
return [self._idx_to_name(v) for v in i]
445
def _check_lines(self, text):
446
if not isinstance(text, list):
447
raise ValueError("text should be a list, not %s" % type(text))
450
if not isinstance(l, basestring):
451
raise ValueError("text line should be a string or unicode, not %s"
456
def _check_versions(self, indexes):
457
"""Check everything in the sequence of indexes is valid"""
462
raise IndexError("invalid version number %r" % i)
464
def _compatible_parents(self, my_parents, other_parents):
465
"""During join check that other_parents are joinable with my_parents.
467
Joinable is defined as 'is a subset of' - supersets may require
468
regeneration of diffs, but subsets do not.
470
return len(other_parents.difference(my_parents)) == 0
472
def annotate(self, version_id):
473
if isinstance(version_id, int):
474
warn('Weave.annotate(int) is deprecated. Please use version names'
475
' in all circumstances as of 0.8',
480
for origin, lineno, text in self._extract([version_id]):
481
result.append((origin, text))
484
return super(Weave, self).annotate(version_id)
486
def annotate_iter(self, version_id):
487
"""Yield list of (version-id, line) pairs for the specified version.
489
The index indicates when the line originated in the weave."""
490
incls = [self._lookup(version_id)]
491
for origin, lineno, text in self._extract(incls):
492
yield self._idx_to_name(origin), text
494
@deprecated_method(zero_eight)
496
"""_walk has become visit, a supported api."""
497
return self._walk_internal()
499
def iter_lines_added_or_present_in_versions(self, version_ids=None):
500
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
501
if version_ids is None:
502
version_ids = self.versions()
503
version_ids = set(version_ids)
504
for lineno, inserted, deletes, line in self._walk_internal(version_ids):
505
# if inserted not in version_ids then it was inserted before the
506
# versions we care about, but because weaves cannot represent ghosts
507
# properly, we do not filter down to that
508
# if inserted not in version_ids: continue
514
#@deprecated_method(zero_eight)
515
def walk(self, version_ids=None):
516
"""See VersionedFile.walk."""
517
return self._walk_internal(version_ids)
519
def _walk_internal(self, version_ids=None):
520
"""Helper method for weave actions."""
525
lineno = 0 # line of weave, 0-based
527
for l in self._weave:
528
if isinstance(l, tuple):
532
istack.append(self._idx_to_name(v))
536
assert self._idx_to_name(v) not in dset
537
dset.add(self._idx_to_name(v))
539
dset.remove(self._idx_to_name(v))
541
raise WeaveFormatError('unexpected instruction %r' % v)
543
assert isinstance(l, basestring)
545
yield lineno, istack[-1], dset.copy(), l
549
raise WeaveFormatError("unclosed insertion blocks "
550
"at end of weave: %s" % istack)
552
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
555
def _extract(self, versions):
556
"""Yield annotation of lines in included set.
558
Yields a sequence of tuples (origin, lineno, text), where
559
origin is the origin version, lineno the index in the weave,
560
and text the text of the line.
562
The set typically but not necessarily corresponds to a version.
565
if not isinstance(i, int):
568
included = self._inclusions(versions)
573
lineno = 0 # line of weave, 0-based
579
WFE = WeaveFormatError
581
for l in self._weave:
582
if isinstance(l, tuple):
586
assert v not in istack
600
assert isinstance(l, basestring)
602
isactive = (not dset) and istack and (istack[-1] in included)
604
result.append((istack[-1], lineno, l))
607
raise WeaveFormatError("unclosed insertion blocks "
608
"at end of weave: %s" % istack)
610
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
614
@deprecated_method(zero_eight)
615
def get_iter(self, name_or_index):
616
"""Deprecated, please do not use. Lookups are not not needed.
618
Please use get_lines now.
620
return self._get_iter(self._maybe_lookup(name_or_index))
622
@deprecated_method(zero_eight)
623
def maybe_lookup(self, name_or_index):
624
"""Deprecated, please do not use. Lookups are not not needed."""
625
return self._maybe_lookup(name_or_index)
627
def _maybe_lookup(self, name_or_index):
628
"""Convert possible symbolic name to index, or pass through indexes.
632
if isinstance(name_or_index, (int, long)):
635
return self._lookup(name_or_index)
637
def _get_iter(self, version_id):
638
"""Yield lines for the specified version."""
639
incls = [self._maybe_lookup(version_id)]
644
# We don't have sha1 sums for multiple entries
646
for origin, lineno, line in self._extract(incls):
651
expected_sha1 = self._sha1s[index]
652
measured_sha1 = cur_sha.hexdigest()
653
if measured_sha1 != expected_sha1:
654
raise errors.WeaveInvalidChecksum(
655
'file %s, revision %s, expected: %s, measured %s'
656
% (self._weave_name, self._names[index],
657
expected_sha1, measured_sha1))
659
@deprecated_method(zero_eight)
660
def get(self, version_id):
661
"""Please use either Weave.get_text or Weave.get_lines as desired."""
662
return self.get_lines(version_id)
664
def get_lines(self, version_id):
665
"""See VersionedFile.get_lines()."""
666
return list(self._get_iter(version_id))
668
def get_sha1(self, name):
669
"""Get the stored sha1 sum for the given revision.
671
:param name: The name of the version to lookup
673
return self._sha1s[self._lookup(name)]
675
@deprecated_method(zero_eight)
676
def numversions(self):
677
"""How many versions are in this weave?
679
Deprecated in favour of num_versions.
681
return self.num_versions()
683
def num_versions(self):
684
"""How many versions are in this weave?"""
685
l = len(self._parents)
686
assert l == len(self._sha1s)
689
__len__ = num_versions
691
def check(self, progress_bar=None):
692
# TODO evaluate performance hit of using string sets in this routine.
693
# check no circular inclusions
694
for version in range(self.num_versions()):
695
inclusions = list(self._parents[version])
698
if inclusions[-1] >= version:
699
raise WeaveFormatError("invalid included version %d for index %d"
700
% (inclusions[-1], version))
702
# try extracting all versions; parallel extraction is used
703
nv = self.num_versions()
708
# For creating the ancestry, IntSet is much faster (3.7s vs 0.17s)
709
# The problem is that set membership is much more expensive
710
name = self._idx_to_name(i)
711
sha1s[name] = sha.new()
713
new_inc = set([name])
714
for p in self._parents[i]:
715
new_inc.update(inclusions[self._idx_to_name(p)])
717
assert set(new_inc) == set(self.get_ancestry(name)), \
718
'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
719
inclusions[name] = new_inc
721
nlines = len(self._weave)
723
update_text = 'checking weave'
725
short_name = os.path.basename(self._weave_name)
726
update_text = 'checking %s' % (short_name,)
727
update_text = update_text[:25]
729
for lineno, insert, deleteset, line in self._walk_internal():
731
progress_bar.update(update_text, lineno, nlines)
733
for name, name_inclusions in inclusions.items():
734
# The active inclusion must be an ancestor,
735
# and no ancestors must have deleted this line,
736
# because we don't support resurrection.
737
if (insert in name_inclusions) and not (deleteset & name_inclusions):
738
sha1s[name].update(line)
741
version = self._idx_to_name(i)
742
hd = sha1s[version].hexdigest()
743
expected = self._sha1s[i]
745
raise errors.WeaveInvalidChecksum(
746
"mismatched sha1 for version %s: "
747
"got %s, expected %s"
748
% (version, hd, expected))
750
# TODO: check insertions are properly nested, that there are
751
# no lines outside of insertion blocks, that deletions are
752
# properly paired, etc.
754
def _join(self, other, pb, msg, version_ids, ignore_missing):
755
"""Worker routine for join()."""
756
if not other.versions():
757
return # nothing to update, easy
760
for version_id in version_ids:
761
if not other.has_version(version_id) and not ignore_missing:
762
raise RevisionNotPresent(version_id, self._weave_name)
764
version_ids = other.versions()
766
# two loops so that we do not change ourselves before verifying it
768
# work through in index order to make sure we get all dependencies
771
# get the selected versions only that are in other.versions.
772
version_ids = set(other.versions()).intersection(set(version_ids))
773
# pull in the referenced graph.
774
version_ids = other.get_ancestry(version_ids)
775
pending_graph = [(version, other.get_parents(version)) for
776
version in version_ids]
777
for name in topo_sort(pending_graph):
778
other_idx = other._name_map[name]
779
self._check_version_consistent(other, other_idx, name)
780
sha1 = other._sha1s[other_idx]
784
if name in self._name_map:
785
idx = self._lookup(name)
786
n1 = set(map(other._idx_to_name, other._parents[other_idx]))
787
n2 = set(map(self._idx_to_name, self._parents[idx]))
788
if sha1 == self._sha1s[idx] and n1 == n2:
791
names_to_join.append((other_idx, name))
798
for other_idx, name in names_to_join:
799
# TODO: If all the parents of the other version are already
800
# present then we can avoid some work by just taking the delta
801
# and adjusting the offsets.
802
new_parents = self._imported_parents(other, other_idx)
803
sha1 = other._sha1s[other_idx]
808
pb.update(msg, merged, len(names_to_join))
810
lines = other.get_lines(other_idx)
811
self._add(name, lines, new_parents, sha1)
813
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
814
merged, processed, self._weave_name, time.time()-time0))
816
def _imported_parents(self, other, other_idx):
817
"""Return list of parents in self corresponding to indexes in other."""
819
for parent_idx in other._parents[other_idx]:
820
parent_name = other._names[parent_idx]
821
if parent_name not in self._names:
822
# should not be possible
823
raise WeaveError("missing parent {%s} of {%s} in %r"
824
% (parent_name, other._name_map[other_idx], self))
825
new_parents.append(self._name_map[parent_name])
828
def _check_version_consistent(self, other, other_idx, name):
829
"""Check if a version in consistent in this and other.
831
To be consistent it must have:
834
* the same direct parents (by name, not index, and disregarding
837
If present & correct return True;
838
if not present in self return False;
839
if inconsistent raise error."""
840
this_idx = self._name_map.get(name, -1)
842
if self._sha1s[this_idx] != other._sha1s[other_idx]:
843
raise errors.WeaveTextDiffers(name, self, other)
844
self_parents = self._parents[this_idx]
845
other_parents = other._parents[other_idx]
846
n1 = set([self._names[i] for i in self_parents])
847
n2 = set([other._names[i] for i in other_parents])
848
if not self._compatible_parents(n1, n2):
849
raise WeaveParentMismatch("inconsistent parents "
850
"for version {%s}: %s vs %s" % (name, n1, n2))
856
@deprecated_method(zero_eight)
857
def reweave(self, other, pb=None, msg=None):
858
"""reweave has been superceded by plain use of join."""
859
return self.join(other, pb, msg)
861
def _reweave(self, other, pb, msg):
862
"""Reweave self with other - internal helper for join().
864
:param other: The other weave to merge
865
:param pb: An optional progress bar, indicating how far done we are
866
:param msg: An optional message for the progress
868
new_weave = _reweave(self, other, pb=pb, msg=msg)
869
self._copy_weave_content(new_weave)
871
def _copy_weave_content(self, otherweave):
872
"""adsorb the content from otherweave."""
873
for attr in self.__slots__:
874
if attr != '_weave_name':
875
setattr(self, attr, copy(getattr(otherweave, attr)))
878
class WeaveFile(Weave):
879
"""A WeaveFile represents a Weave on disk and writes on change."""
881
WEAVE_SUFFIX = '.weave'
883
def __init__(self, name, transport, mode=None, create=False):
884
"""Create a WeaveFile.
886
:param create: If not True, only open an existing knit.
888
super(WeaveFile, self).__init__(name)
889
self._transport = transport
892
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
893
except errors.NoSuchFile:
899
def add_lines(self, version_id, parents, lines):
900
"""Add a version and save the weave."""
901
super(WeaveFile, self).add_lines(version_id, parents, lines)
904
def copy_to(self, name, transport):
905
"""See VersionedFile.copy_to()."""
906
# as we are all in memory always, just serialise to the new place.
908
write_weave_v5(self, sio)
910
transport.put(name + WeaveFile.WEAVE_SUFFIX, sio, self._mode)
912
def create_empty(self, name, transport, mode=None):
913
return WeaveFile(name, transport, mode, create=True)
916
"""Save the weave."""
918
write_weave_v5(self, sio)
920
self._transport.put(self._weave_name + WeaveFile.WEAVE_SUFFIX,
926
"""See VersionedFile.get_suffixes()."""
927
return [WeaveFile.WEAVE_SUFFIX]
929
def join(self, other, pb=None, msg=None, version_ids=None,
930
ignore_missing=False):
931
"""Join other into self and save."""
932
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
936
@deprecated_function(zero_eight)
937
def reweave(wa, wb, pb=None, msg=None):
938
"""reweaving is deprecation, please just use weave.join()."""
939
_reweave(wa, wb, pb, msg)
941
def _reweave(wa, wb, pb=None, msg=None):
942
"""Combine two weaves and return the result.
944
This works even if a revision R has different parents in
945
wa and wb. In the resulting weave all the parents are given.
947
This is done by just building up a new weave, maintaining ordering
948
of the versions in the two inputs. More efficient approaches
949
might be possible but it should only be necessary to do
950
this operation rarely, when a new previously ghost version is
953
:param pb: An optional progress bar, indicating how far done we are
954
:param msg: An optional message for the progress
958
queue_a = range(wa.num_versions())
959
queue_b = range(wb.num_versions())
960
# first determine combined parents of all versions
961
# map from version name -> all parent names
962
combined_parents = _reweave_parent_graphs(wa, wb)
963
mutter("combined parents: %r", combined_parents)
964
order = topo_sort(combined_parents.iteritems())
965
mutter("order to reweave: %r", order)
970
for idx, name in enumerate(order):
972
pb.update(msg, idx, len(order))
973
if name in wa._name_map:
974
lines = wa.get_lines(name)
975
if name in wb._name_map:
976
lines_b = wb.get_lines(name)
978
mutter('Weaves differ on content. rev_id {%s}', name)
979
mutter('weaves: %s, %s', wa._weave_name, wb._weave_name)
981
lines = list(difflib.unified_diff(lines, lines_b,
982
wa._weave_name, wb._weave_name))
983
mutter('lines:\n%s', ''.join(lines))
984
raise errors.WeaveTextDiffers(name, wa, wb)
986
lines = wb.get_lines(name)
987
wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]])
990
def _reweave_parent_graphs(wa, wb):
991
"""Return combined parent ancestry for two weaves.
993
Returned as a list of (version_name, set(parent_names))"""
995
for weave in [wa, wb]:
996
for idx, name in enumerate(weave._names):
997
p = combined.setdefault(name, set())
998
p.update(map(weave._idx_to_name, weave._parents[idx]))
1003
"""Show the weave's table-of-contents"""
1004
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
1005
for i in (6, 50, 10, 10):
1008
for i in range(w.num_versions()):
1011
parent_str = ' '.join(map(str, w._parents[i]))
1012
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
1016
def weave_stats(weave_file, pb):
1017
from bzrlib.weavefile import read_weave
1019
wf = file(weave_file, 'rb')
1020
w = read_weave(wf, WeaveVersionedFile)
1021
# FIXME: doesn't work on pipes
1022
weave_size = wf.tell()
1026
for i in range(vers):
1027
pb.update('checking sizes', i, vers)
1028
for origin, lineno, line in w._extract([i]):
1033
print 'versions %9d' % vers
1034
print 'weave file %9d bytes' % weave_size
1035
print 'total contents %9d bytes' % total
1036
print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
1039
print 'average size %9d bytes' % avg
1040
print 'relative size %9.2fx' % (float(weave_size) / float(avg))
1044
print """bzr weave tool
1046
Experimental tool for weave algorithm.
1049
weave init WEAVEFILE
1050
Create an empty weave file
1051
weave get WEAVEFILE VERSION
1052
Write out specified version.
1053
weave check WEAVEFILE
1054
Check consistency of all versions.
1056
Display table of contents.
1057
weave add WEAVEFILE NAME [BASE...] < NEWTEXT
1058
Add NEWTEXT, with specified parent versions.
1059
weave annotate WEAVEFILE VERSION
1060
Display origin of each line.
1061
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
1062
Auto-merge two versions and display conflicts.
1063
weave diff WEAVEFILE VERSION1 VERSION2
1064
Show differences between two versions.
1068
% weave init foo.weave
1070
% weave add foo.weave ver0 < foo.txt
1073
(create updated version)
1075
% weave get foo.weave 0 | diff -u - foo.txt
1076
% weave add foo.weave ver1 0 < foo.txt
1079
% weave get foo.weave 0 > foo.txt (create forked version)
1081
% weave add foo.weave ver2 0 < foo.txt
1084
% weave merge foo.weave 1 2 > foo.txt (merge them)
1085
% vi foo.txt (resolve conflicts)
1086
% weave add foo.weave merged 1 2 < foo.txt (commit merged version)
1098
# in case we're run directly from the subdirectory
1099
sys.path.append('..')
1101
from bzrlib.weavefile import write_weave, read_weave
1102
from bzrlib.progress import ProgressBar
1117
return read_weave(file(argv[2], 'rb'))
1123
# at the moment, based on everything in the file
1125
parents = map(int, argv[4:])
1126
lines = sys.stdin.readlines()
1127
ver = w.add(name, parents, lines)
1128
write_weave(w, file(argv[2], 'wb'))
1129
print 'added version %r %d' % (name, ver)
1132
if os.path.exists(fn):
1133
raise IOError("file exists")
1135
write_weave(w, file(fn, 'wb'))
1136
elif cmd == 'get': # get one version
1138
sys.stdout.writelines(w.get_iter(int(argv[3])))
1141
from difflib import unified_diff
1144
v1, v2 = map(int, argv[3:5])
1147
diff_gen = unified_diff(lines1, lines2,
1148
'%s version %d' % (fn, v1),
1149
'%s version %d' % (fn, v2))
1150
sys.stdout.writelines(diff_gen)
1152
elif cmd == 'annotate':
1154
# newline is added to all lines regardless; too hard to get
1155
# reasonable formatting otherwise
1157
for origin, text in w.annotate(int(argv[3])):
1158
text = text.rstrip('\r\n')
1160
print ' | %s' % (text)
1162
print '%5d | %s' % (origin, text)
1168
elif cmd == 'stats':
1169
weave_stats(argv[2], ProgressBar())
1171
elif cmd == 'check':
1176
print '%d versions ok' % w.num_versions()
1178
elif cmd == 'inclusions':
1180
print ' '.join(map(str, w.inclusions([int(argv[3])])))
1182
elif cmd == 'parents':
1184
print ' '.join(map(str, w._parents[int(argv[3])]))
1186
elif cmd == 'plan-merge':
1188
for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
1190
print '%14s | %s' % (state, line),
1192
elif cmd == 'merge':
1194
p = w.plan_merge(int(argv[3]), int(argv[4]))
1195
sys.stdout.writelines(w.weave_merge(p))
1198
raise ValueError('unknown command %r' % cmd)
1202
def profile_main(argv):
1203
import tempfile, hotshot, hotshot.stats
1205
prof_f = tempfile.NamedTemporaryFile()
1207
prof = hotshot.Profile(prof_f.name)
1209
ret = prof.runcall(main, argv)
1212
stats = hotshot.stats.load(prof_f.name)
1214
stats.sort_stats('cumulative')
1215
## XXX: Might like to write to stderr or the trace file instead but
1216
## print_stats seems hardcoded to stdout
1217
stats.print_stats(20)
1222
def lsprofile_main(argv):
1223
from bzrlib.lsprof import profile
1224
ret,stats = profile(main, argv)
1230
if __name__ == '__main__':
1232
if '--profile' in sys.argv:
1234
args.remove('--profile')
1235
sys.exit(profile_main(args))
1236
elif '--lsprof' in sys.argv:
1238
args.remove('--lsprof')
1239
sys.exit(lsprofile_main(args))
1241
sys.exit(main(sys.argv))
1244
class InterWeave(InterVersionedFile):
1245
"""Optimised code paths for weave to weave operations."""
1247
_matching_file_factory = staticmethod(WeaveFile)
1250
def is_compatible(source, target):
1251
"""Be compatible with weaves."""
1253
return (isinstance(source, Weave) and
1254
isinstance(target, Weave))
1255
except AttributeError:
1258
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1259
"""See InterVersionedFile.join."""
1260
if self.target.versions() == []:
1262
self.target._copy_weave_content(self.source)
1265
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1266
except errors.WeaveParentMismatch:
1267
self.target._reweave(self.source, pb, msg)
1270
InterVersionedFile.register_optimiser(InterWeave)