57
61
# where the basis and destination are unchanged.
59
63
# FIXME: Sometimes we will be given a parents list for a revision
60
# that includes some redundant parents (i.e. already a parent of
61
# something in the list.) We should eliminate them. This can
64
# that includes some redundant parents (i.e. already a parent of
65
# something in the list.) We should eliminate them. This can
62
66
# be done fairly efficiently because the sequence numbers constrain
63
67
# the possible relationships.
65
69
# FIXME: the conflict markers should be *7* characters
67
71
from copy import copy
68
from io import BytesIO
72
from cStringIO import StringIO
72
from ..lazy_import import lazy_import
73
lazy_import(globals(), """
74
from breezy import tsort
80
from ..errors import (
81
RevisionAlreadyPresent,
84
from ..osutils import dirname, sha, sha_strings, split_lines
85
from ..revision import NULL_REVISION
86
from ..trace import mutter
87
from .versionedfile import (
93
UnavailableRepresentation,
96
from .weavefile import _read_weave_v5, write_weave_v5
99
class WeaveError(errors.BzrError):
101
_fmt = "Error in processing weave: %(msg)s"
103
def __init__(self, msg=None):
104
errors.BzrError.__init__(self)
108
class WeaveRevisionAlreadyPresent(WeaveError):
110
_fmt = "Revision {%(revision_id)s} already present in %(weave)s"
112
def __init__(self, revision_id, weave):
114
WeaveError.__init__(self)
115
self.revision_id = revision_id
119
class WeaveRevisionNotPresent(WeaveError):
121
_fmt = "Revision {%(revision_id)s} not present in %(weave)s"
123
def __init__(self, revision_id, weave):
124
WeaveError.__init__(self)
125
self.revision_id = revision_id
129
class WeaveFormatError(WeaveError):
131
_fmt = "Weave invariant violated: %(what)s"
133
def __init__(self, what):
134
WeaveError.__init__(self)
138
class WeaveParentMismatch(WeaveError):
140
_fmt = "Parents are mismatched between two revisions. %(msg)s"
143
class WeaveInvalidChecksum(WeaveError):
145
_fmt = "Text did not match its checksum: %(msg)s"
148
class WeaveTextDiffers(WeaveError):
150
_fmt = ("Weaves differ on text content. Revision:"
151
" {%(revision_id)s}, %(weave_a)s, %(weave_b)s")
153
def __init__(self, revision_id, weave_a, weave_b):
154
WeaveError.__init__(self)
155
self.revision_id = revision_id
156
self.weave_a = weave_a
157
self.weave_b = weave_b
160
class WeaveContentFactory(ContentFactory):
161
"""Content factory for streaming from weaves.
163
:seealso ContentFactory:
166
def __init__(self, version, weave):
167
"""Create a WeaveContentFactory for version from weave."""
168
ContentFactory.__init__(self)
169
self.sha1 = weave.get_sha1s([version])[version]
170
self.key = (version,)
171
parents = weave.get_parent_map([version])[version]
172
self.parents = tuple((parent,) for parent in parents)
173
self.storage_kind = 'fulltext'
176
def get_bytes_as(self, storage_kind):
177
if storage_kind == 'fulltext':
178
return self._weave.get_text(self.key[-1])
179
elif storage_kind in ('chunked', 'lines'):
180
return self._weave.get_lines(self.key[-1])
182
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
184
def iter_bytes_as(self, storage_kind):
185
if storage_kind in ('chunked', 'lines'):
186
return iter(self._weave.get_lines(self.key[-1]))
188
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
81
from bzrlib.trace import mutter
82
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
83
RevisionAlreadyPresent,
85
WeaveRevisionAlreadyPresent,
86
WeaveRevisionNotPresent,
88
import bzrlib.errors as errors
89
from bzrlib.osutils import sha_strings
90
import bzrlib.patiencediff
91
from bzrlib.symbol_versioning import (deprecated_method,
95
from bzrlib.tsort import topo_sort
96
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
97
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
191
100
class Weave(VersionedFile):
192
101
"""weave - versioned text file storage.
194
103
A Weave manages versions of line-based text files, keeping track
195
104
of the originating version for each line.
334
222
if not isinstance(other, Weave):
336
224
return self._parents == other._parents \
337
and self._weave == other._weave \
338
and self._sha1s == other._sha1s
225
and self._weave == other._weave \
226
and self._sha1s == other._sha1s
340
228
def __ne__(self, other):
341
229
return not self.__eq__(other)
231
@deprecated_method(zero_eight)
232
def idx_to_name(self, index):
233
"""Old public interface, the public interface is all names now."""
343
236
def _idx_to_name(self, version):
344
237
return self._names[version]
239
@deprecated_method(zero_eight)
240
def lookup(self, name):
241
"""Backwards compatibility thunk:
243
Return name, as name is valid in the api now, and spew deprecation
346
248
def _lookup(self, name):
347
249
"""Convert symbolic version name to index."""
348
if not self._allow_reserved:
349
self.check_not_reserved_id(name)
250
self.check_not_reserved_id(name)
351
252
return self._name_map[name]
353
254
raise RevisionNotPresent(name, self._weave_name)
256
@deprecated_method(zero_eight)
257
def iter_names(self):
258
"""Deprecated convenience function, please see VersionedFile.names()."""
259
return iter(self.names())
261
@deprecated_method(zero_eight)
263
"""See Weave.versions for the current api."""
264
return self.versions()
355
266
def versions(self):
356
267
"""See VersionedFile.versions."""
357
268
return self._names[:]
363
274
__contains__ = has_version
365
def get_record_stream(self, versions, ordering, include_delta_closure):
366
"""Get a stream of records for versions.
368
:param versions: The versions to include. Each version is a tuple
370
:param ordering: Either 'unordered' or 'topological'. A topologically
371
sorted stream has compression parents strictly before their
373
:param include_delta_closure: If True then the closure across any
374
compression parents will be included (in the opaque data).
375
:return: An iterator of ContentFactory objects, each of which is only
376
valid until the iterator is advanced.
378
versions = [version[-1] for version in versions]
379
if ordering == 'topological':
380
parents = self.get_parent_map(versions)
381
new_versions = tsort.topo_sort(parents)
382
new_versions.extend(set(versions).difference(set(parents)))
383
versions = new_versions
384
elif ordering == 'groupcompress':
385
parents = self.get_parent_map(versions)
386
new_versions = sort_groupcompress(parents)
387
new_versions.extend(set(versions).difference(set(parents)))
388
versions = new_versions
389
for version in versions:
391
yield WeaveContentFactory(version, self)
393
yield AbsentContentFactory((version,))
395
def get_parent_map(self, version_ids):
396
"""See VersionedFile.get_parent_map."""
276
def get_delta(self, version_id):
277
"""See VersionedFile.get_delta."""
278
return self.get_deltas([version_id])[version_id]
280
def get_deltas(self, version_ids):
281
"""See VersionedFile.get_deltas."""
282
version_ids = self.get_ancestry(version_ids)
398
283
for version_id in version_ids:
399
if version_id == NULL_REVISION:
404
map(self._idx_to_name,
405
self._parents[self._lookup(version_id)]))
406
except RevisionNotPresent:
284
if not self.has_version(version_id):
285
raise RevisionNotPresent(version_id, self)
286
# try extracting all versions; parallel extraction is used
287
nv = self.num_versions()
293
last_parent_lines = {}
295
parent_inclusions = {}
300
# its simplest to generate a full set of prepared variables.
302
name = self._names[i]
303
sha1s[name] = self.get_sha1(name)
304
parents_list = self.get_parents(name)
306
parent = parents_list[0]
307
parents[name] = parent
308
parent_inclusions[name] = inclusions[parent]
311
parent_inclusions[name] = set()
312
# we want to emit start, finish, replacement_length, replacement_lines tuples.
313
diff_hunks[name] = []
314
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
315
parent_linenums[name] = 0
317
parent_noeols[name] = False
318
last_parent_lines[name] = None
319
new_inc = set([name])
320
for p in self._parents[i]:
321
new_inc.update(inclusions[self._idx_to_name(p)])
322
# debug only, known good so far.
323
#assert set(new_inc) == set(self.get_ancestry(name)), \
324
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
325
inclusions[name] = new_inc
327
nlines = len(self._weave)
329
for lineno, inserted, deletes, line in self._walk_internal():
330
# a line is active in a version if:
331
# insert is in the versions inclusions
333
# deleteset & the versions inclusions is an empty set.
334
# so - if we have a included by mapping - version is included by
335
# children, we get a list of children to examine for deletes affect
336
# ing them, which is less than the entire set of children.
337
for version_id in version_ids:
338
# The active inclusion must be an ancestor,
339
# and no ancestors must have deleted this line,
340
# because we don't support resurrection.
341
parent_inclusion = parent_inclusions[version_id]
342
inclusion = inclusions[version_id]
343
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
344
version_active = inserted in inclusion and not (deletes & inclusion)
345
if not parent_active and not version_active:
346
# unrelated line of ancestry
408
result[version_id] = parents
348
elif parent_active and version_active:
350
parent_linenum = parent_linenums[version_id]
351
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
352
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
354
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
355
parent_linenums[version_id] = parent_linenum
358
noeols[version_id] = True
361
elif parent_active and not version_active:
363
current_hunks[version_id][1] += 1
364
parent_linenums[version_id] += 1
365
last_parent_lines[version_id] = line
366
elif not parent_active and version_active:
368
# noeol only occurs at the end of a file because we
369
# diff linewise. We want to show noeol changes as a
370
# empty diff unless the actual eol-less content changed.
373
if last_parent_lines[version_id][-1] != '\n':
374
parent_noeols[version_id] = True
375
except (TypeError, IndexError):
378
if theline[-1] != '\n':
379
noeols[version_id] = True
383
parent_should_go = False
385
if parent_noeols[version_id] == noeols[version_id]:
386
# no noeol toggle, so trust the weaves statement
387
# that this line is changed.
389
if parent_noeols[version_id]:
390
theline = theline + '\n'
391
elif parent_noeols[version_id]:
392
# parent has no eol, we do:
393
# our line is new, report as such..
395
elif noeols[version_id]:
396
# append a eol so that it looks like
398
theline = theline + '\n'
399
if parents[version_id] is not None:
400
#if last_parent_lines[version_id] is not None:
401
parent_should_go = True
402
if last_parent_lines[version_id] != theline:
405
#parent_should_go = False
407
current_hunks[version_id][2] += 1
408
current_hunks[version_id][3].append((inserted, theline))
410
# last hunk last parent line is not eaten
411
current_hunks[version_id][1] -= 1
412
if current_hunks[version_id][1] < 0:
413
current_hunks[version_id][1] = 0
414
# import pdb;pdb.set_trace()
415
# assert current_hunks[version_id][1] >= 0
419
version = self._idx_to_name(i)
420
if current_hunks[version] != [0, 0, 0, []]:
421
diff_hunks[version].append(tuple(current_hunks[version]))
423
for version_id in version_ids:
424
result[version_id] = (
428
diff_hunks[version_id],
411
def get_parents_with_ghosts(self, version_id):
412
raise NotImplementedError(self.get_parents_with_ghosts)
414
def insert_record_stream(self, stream):
415
"""Insert a record stream into this versioned file.
417
:param stream: A stream of records to insert.
419
:seealso VersionedFile.get_record_stream:
422
for record in stream:
423
# Raise an error when a record is missing.
424
if record.storage_kind == 'absent':
425
raise RevisionNotPresent([record.key[0]], self)
426
# adapt to non-tuple interface
427
parents = [parent[0] for parent in record.parents]
428
if record.storage_kind in ('fulltext', 'chunked', 'lines'):
430
record.key[0], parents,
431
record.get_bytes_as('lines'))
433
adapter_key = record.storage_kind, 'lines'
435
adapter = adapters[adapter_key]
437
adapter_factory = adapter_registry.get(adapter_key)
438
adapter = adapter_factory(self)
439
adapters[adapter_key] = adapter
440
lines = adapter.get_bytes(record, 'lines')
442
self.add_lines(record.key[0], parents, lines)
443
except RevisionAlreadyPresent:
432
def get_parents(self, version_id):
433
"""See VersionedFile.get_parent."""
434
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
446
436
def _check_repeated_add(self, name, parents, text, sha1):
447
437
"""Check that a duplicated add is OK.
451
441
idx = self._lookup(name)
452
442
if sorted(self._parents[idx]) != sorted(parents) \
453
or sha1 != self._sha1s[idx]:
443
or sha1 != self._sha1s[idx]:
454
444
raise RevisionAlreadyPresent(name, self._weave_name)
457
def _add_lines(self, version_id, parents, lines, parent_texts,
458
left_matching_blocks, nostore_sha, random_id,
447
@deprecated_method(zero_eight)
448
def add_identical(self, old_rev_id, new_rev_id, parents):
449
"""Please use Weave.clone_text now."""
450
return self.clone_text(new_rev_id, old_rev_id, parents)
452
def _add_lines(self, version_id, parents, lines, parent_texts):
460
453
"""See VersionedFile.add_lines."""
461
idx = self._add(version_id, lines, list(map(self._lookup, parents)),
462
nostore_sha=nostore_sha)
463
return sha_strings(lines), sum(map(len, lines)), idx
465
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
454
return self._add(version_id, lines, map(self._lookup, parents))
456
@deprecated_method(zero_eight)
457
def add(self, name, parents, text, sha1=None):
458
"""See VersionedFile.add_lines for the non deprecated api."""
459
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
461
def _add(self, version_id, lines, parents, sha1=None):
466
462
"""Add a single text on top of the weave.
468
464
Returns the index number of the newly added version.
471
467
Symbolic name for this version.
472
468
(Typically the revision-id of the revision that added it.)
473
If None, a name will be allocated based on the hash. (sha1:SHAHASH)
476
471
List or set of direct parent version numbers.
479
474
Sequence of lines to be added in the new version.
481
:param nostore_sha: See VersionedFile.add_lines.
477
assert isinstance(version_id, basestring)
483
478
self._check_lines_not_unicode(lines)
484
479
self._check_lines_are_lines(lines)
486
481
sha1 = sha_strings(lines)
487
if sha1 == nostore_sha:
488
raise ExistingContent
489
if version_id is None:
490
version_id = b"sha1:" + sha1
491
482
if version_id in self._name_map:
492
483
return self._check_repeated_add(version_id, parents, lines, sha1)
494
485
self._check_versions(parents)
486
## self._check_lines(lines)
495
487
new_version = len(self._parents)
497
# if we abort after here the (in-memory) weave will be corrupt because
498
# only some fields are updated
489
# if we abort after here the (in-memory) weave will be corrupt because only
490
# some fields are updated
499
491
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
500
492
# - Robert Collins 20060226
501
493
self._parents.append(parents[:])
543
537
# matches the end of the file? the current code says it's the
544
538
# last line of the weave?
546
# print 'basis_lines:', basis_lines
547
# print 'new_lines: ', lines
540
#print 'basis_lines:', basis_lines
541
#print 'new_lines: ', lines
549
543
s = self._matcher(None, basis_lines, lines)
551
545
# offset gives the number of lines that have been inserted
552
# into the weave up to the current point; if the original edit
553
# instruction says to change line A then we actually change (A+offset)
546
# into the weave up to the current point; if the original edit instruction
547
# says to change line A then we actually change (A+offset)
556
550
for tag, i1, i2, j1, j2 in s.get_opcodes():
557
# i1,i2 are given in offsets within basis_lines; we need to map
558
# them back to offsets within the entire weave print 'raw match',
559
# tag, i1, i2, j1, j2
551
# i1,i2 are given in offsets within basis_lines; we need to map them
552
# back to offsets within the entire weave
553
#print 'raw match', tag, i1, i2, j1, j2
560
554
if tag == 'equal':
562
557
i1 = basis_lineno[i1]
563
558
i2 = basis_lineno[i2]
560
assert 0 <= j1 <= j2 <= len(lines)
562
#print tag, i1, i2, j1, j2
564
564
# the deletion and insertion are handled separately.
565
565
# first delete the region.
567
self._weave.insert(i1 + offset, (b'[', new_version))
568
self._weave.insert(i2 + offset + 1, (b']', new_version))
567
self._weave.insert(i1+offset, ('[', new_version))
568
self._weave.insert(i2+offset+1, (']', new_version))
573
573
# i2; we want to insert after this region to make sure
574
574
# we don't destroy ourselves
576
self._weave[i:i] = ([(b'{', new_version)] +
576
self._weave[i:i] = ([('{', new_version)]
579
579
offset += 2 + (j2 - j1)
580
580
return new_version
582
def _clone_text(self, new_version_id, old_version_id, parents):
583
"""See VersionedFile.clone_text."""
584
old_lines = self.get_text(old_version_id)
585
self.add_lines(new_version_id, parents, old_lines)
582
587
def _inclusions(self, versions):
583
588
"""Return set of all ancestors of given version(s)."""
584
589
if not len(versions):
586
591
i = set(versions)
587
for v in range(max(versions), 0, -1):
592
for v in xrange(max(versions), 0, -1):
589
594
# include all its parents
590
595
i.update(self._parents[v])
593
def get_ancestry(self, version_ids, topo_sorted=True):
597
## except IndexError:
598
## raise ValueError("version %d not present in weave" % v)
600
@deprecated_method(zero_eight)
601
def inclusions(self, version_ids):
602
"""Deprecated - see VersionedFile.get_ancestry for the replacement."""
605
if isinstance(version_ids[0], int):
606
return [self._idx_to_name(v) for v in self._inclusions(version_ids)]
608
return self.get_ancestry(version_ids)
610
def get_ancestry(self, version_ids):
594
611
"""See VersionedFile.get_ancestry."""
595
if isinstance(version_ids, bytes):
612
if isinstance(version_ids, basestring):
596
613
version_ids = [version_ids]
597
614
i = self._inclusions([self._lookup(v) for v in version_ids])
598
return set(self._idx_to_name(v) for v in i)
615
return [self._idx_to_name(v) for v in i]
617
def _check_lines(self, text):
618
if not isinstance(text, list):
619
raise ValueError("text should be a list, not %s" % type(text))
622
if not isinstance(l, basestring):
623
raise ValueError("text line should be a string or unicode, not %s"
600
628
def _check_versions(self, indexes):
601
629
"""Check everything in the sequence of indexes is valid"""
742
# 449 0 4474.6820 2356.5590 breezy.weave:556(_extract)
786
WFE = WeaveFormatError
789
# 449 0 4474.6820 2356.5590 bzrlib.weave:556(_extract)
743
790
# +285282 0 1676.8040 1676.8040 +<isinstance>
744
791
# 1.6 seconds in 'isinstance'.
745
792
# changing the first isinstance:
746
# 449 0 2814.2660 1577.1760 breezy.weave:556(_extract)
793
# 449 0 2814.2660 1577.1760 bzrlib.weave:556(_extract)
747
794
# +140414 0 762.8050 762.8050 +<isinstance>
748
795
# note that the inline time actually dropped (less function calls)
749
796
# and total processing time was halved.
750
797
# we're still spending ~1/4 of the method in isinstance though.
751
798
# so lets hard code the acceptable string classes we expect:
752
# 449 0 1202.9420 786.2930 breezy.weave:556(_extract)
753
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
799
# 449 0 1202.9420 786.2930 bzrlib.weave:556(_extract)
800
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
755
802
# yay, down to ~1/4 the initial extract time, and our inline time
756
803
# has shrunk again, with isinstance no longer dominating.
757
804
# tweaking the stack inclusion test to use a set gives:
758
# 449 0 1122.8030 713.0080 breezy.weave:556(_extract)
759
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
805
# 449 0 1122.8030 713.0080 bzrlib.weave:556(_extract)
806
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
761
808
# - a 5% win, or possibly just noise. However with large istacks that
762
# 'in' test could dominate, so I'm leaving this change in place - when
763
# its fast enough to consider profiling big datasets we can review.
809
# 'in' test could dominate, so I'm leaving this change in place -
810
# when its fast enough to consider profiling big datasets we can review.
765
815
for l in self._weave:
766
816
if l.__class__ == tuple:
773
824
iset.remove(istack.pop())
775
826
if v in included:
778
831
if v in included:
781
raise AssertionError()
835
assert l.__class__ in (str, unicode)
783
836
if isactive is None:
784
isactive = (not dset) and istack and (
785
istack[-1] in included)
837
isactive = (not dset) and istack and (istack[-1] in included)
787
839
result.append((istack[-1], lineno, l))
790
842
raise WeaveFormatError("unclosed insertion blocks "
791
"at end of weave: %s" % istack)
843
"at end of weave: %s" % istack)
793
raise WeaveFormatError(
794
"unclosed deletion blocks at end of weave: %s" % dset)
845
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
849
@deprecated_method(zero_eight)
850
def get_iter(self, name_or_index):
851
"""Deprecated, please do not use. Lookups are not not needed.
853
Please use get_lines now.
855
return iter(self.get_lines(self._maybe_lookup(name_or_index)))
857
@deprecated_method(zero_eight)
858
def maybe_lookup(self, name_or_index):
859
"""Deprecated, please do not use. Lookups are not not needed."""
860
return self._maybe_lookup(name_or_index)
797
862
def _maybe_lookup(self, name_or_index):
798
863
"""Convert possible symbolic name to index, or pass through indexes.
800
865
NOT FOR PUBLIC USE.
802
# GZ 2017-04-01: This used to check for long as well, but I don't think
803
# there are python implementations with sys.maxsize > sys.maxint
804
if isinstance(name_or_index, int):
867
if isinstance(name_or_index, (int, long)):
805
868
return name_or_index
807
870
return self._lookup(name_or_index)
872
@deprecated_method(zero_eight)
873
def get(self, version_id):
874
"""Please use either Weave.get_text or Weave.get_lines as desired."""
875
return self.get_lines(version_id)
809
877
def get_lines(self, version_id):
810
878
"""See VersionedFile.get_lines()."""
811
879
int_index = self._maybe_lookup(version_id)
812
result = [line for (origin, lineno, line)
813
in self._extract([int_index])]
880
result = [line for (origin, lineno, line) in self._extract([int_index])]
814
881
expected_sha1 = self._sha1s[int_index]
815
882
measured_sha1 = sha_strings(result)
816
883
if measured_sha1 != expected_sha1:
817
raise WeaveInvalidChecksum(
818
'file %s, revision %s, expected: %s, measured %s'
819
% (self._weave_name, version_id,
820
expected_sha1, measured_sha1))
823
def get_sha1s(self, version_ids):
824
"""See VersionedFile.get_sha1s()."""
826
for v in version_ids:
827
result[v] = self._sha1s[self._lookup(v)]
884
raise errors.WeaveInvalidChecksum(
885
'file %s, revision %s, expected: %s, measured %s'
886
% (self._weave_name, version_id,
887
expected_sha1, measured_sha1))
890
def get_sha1(self, version_id):
891
"""See VersionedFile.get_sha1()."""
892
return self._sha1s[self._lookup(version_id)]
894
@deprecated_method(zero_eight)
895
def numversions(self):
896
"""How many versions are in this weave?
898
Deprecated in favour of num_versions.
900
return self.num_versions()
830
902
def num_versions(self):
831
903
"""How many versions are in this weave?"""
832
return len(self._parents)
904
l = len(self._parents)
905
assert l == len(self._sha1s)
834
908
__len__ = num_versions
883
954
# The active inclusion must be an ancestor,
884
955
# and no ancestors must have deleted this line,
885
956
# because we don't support resurrection.
886
if ((insert in name_inclusions) and
887
not (deleteset & name_inclusions)):
957
if (insert in name_inclusions) and not (deleteset & name_inclusions):
888
958
sha1s[name].update(line)
890
960
for i in range(nv):
891
961
version = self._idx_to_name(i)
892
hd = sha1s[version].hexdigest().encode()
962
hd = sha1s[version].hexdigest()
893
963
expected = self._sha1s[i]
894
964
if hd != expected:
895
raise WeaveInvalidChecksum(
896
"mismatched sha1 for version %s: "
897
"got %s, expected %s"
898
% (version, hd, expected))
965
raise errors.WeaveInvalidChecksum(
966
"mismatched sha1 for version %s: "
967
"got %s, expected %s"
968
% (version, hd, expected))
900
970
# TODO: check insertions are properly nested, that there are
901
971
# no lines outside of insertion blocks, that deletions are
902
972
# properly paired, etc.
974
def _join(self, other, pb, msg, version_ids, ignore_missing):
975
"""Worker routine for join()."""
976
if not other.versions():
977
return # nothing to update, easy
980
# versions is never none, InterWeave checks this.
983
# two loops so that we do not change ourselves before verifying it
985
# work through in index order to make sure we get all dependencies
988
# get the selected versions only that are in other.versions.
989
version_ids = set(other.versions()).intersection(set(version_ids))
990
# pull in the referenced graph.
991
version_ids = other.get_ancestry(version_ids)
992
pending_graph = [(version, other.get_parents(version)) for
993
version in version_ids]
994
for name in topo_sort(pending_graph):
995
other_idx = other._name_map[name]
996
# returns True if we have it, False if we need it.
997
if not self._check_version_consistent(other, other_idx, name):
998
names_to_join.append((other_idx, name))
1007
for other_idx, name in names_to_join:
1008
# TODO: If all the parents of the other version are already
1009
# present then we can avoid some work by just taking the delta
1010
# and adjusting the offsets.
1011
new_parents = self._imported_parents(other, other_idx)
1012
sha1 = other._sha1s[other_idx]
1017
pb.update(msg, merged, len(names_to_join))
1019
lines = other.get_lines(other_idx)
1020
self._add(name, lines, new_parents, sha1)
1022
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1023
merged, processed, self._weave_name, time.time()-time0))
904
1025
def _imported_parents(self, other, other_idx):
905
1026
"""Return list of parents in self corresponding to indexes in other."""
906
1027
new_parents = []
922
1043
* the same direct parents (by name, not index, and disregarding
925
1046
If present & correct return True;
926
if not present in self return False;
1047
if not present in self return False;
927
1048
if inconsistent raise error."""
928
1049
this_idx = self._name_map.get(name, -1)
929
1050
if this_idx != -1:
930
1051
if self._sha1s[this_idx] != other._sha1s[other_idx]:
931
raise WeaveTextDiffers(name, self, other)
1052
raise errors.WeaveTextDiffers(name, self, other)
932
1053
self_parents = self._parents[this_idx]
933
1054
other_parents = other._parents[other_idx]
934
n1 = {self._names[i] for i in self_parents}
935
n2 = {other._names[i] for i in other_parents}
1055
n1 = set([self._names[i] for i in self_parents])
1056
n2 = set([other._names[i] for i in other_parents])
936
1057
if not self._compatible_parents(n1, n2):
937
raise WeaveParentMismatch(
938
"inconsistent parents "
1058
raise WeaveParentMismatch("inconsistent parents "
939
1059
"for version {%s}: %s vs %s" % (name, n1, n2))
941
1061
return True # ok!
1065
@deprecated_method(zero_eight)
1066
def reweave(self, other, pb=None, msg=None):
1067
"""reweave has been superseded by plain use of join."""
1068
return self.join(other, pb, msg)
945
1070
def _reweave(self, other, pb, msg):
946
1071
"""Reweave self with other - internal helper for join().
963
1088
"""A WeaveFile represents a Weave on disk and writes on change."""
965
1090
WEAVE_SUFFIX = '.weave'
967
def __init__(self, name, transport, filemode=None, create=False,
968
access_mode='w', get_scope=None):
1092
def __init__(self, name, transport, filemode=None, create=False, access_mode='w'):
969
1093
"""Create a WeaveFile.
971
1095
:param create: If not True, only open an existing knit.
973
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
974
allow_reserved=False)
1097
super(WeaveFile, self).__init__(name, access_mode)
975
1098
self._transport = transport
976
1099
self._filemode = filemode
978
f = self._transport.get(name + WeaveFile.WEAVE_SUFFIX)
979
_read_weave_v5(BytesIO(f.read()), self)
1101
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
980
1102
except errors.NoSuchFile:
983
1105
# new file, save it
986
def _add_lines(self, version_id, parents, lines, parent_texts,
987
left_matching_blocks, nostore_sha, random_id,
1108
def _add_lines(self, version_id, parents, lines, parent_texts):
989
1109
"""Add a version and save the weave."""
990
1110
self.check_not_reserved_id(version_id)
991
result = super(WeaveFile, self)._add_lines(
992
version_id, parents, lines, parent_texts, left_matching_blocks,
993
nostore_sha, random_id, check_content)
1111
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
1116
def _clone_text(self, new_version_id, old_version_id, parents):
1117
"""See VersionedFile.clone_text."""
1118
super(WeaveFile, self)._clone_text(new_version_id, old_version_id, parents)
997
1121
def copy_to(self, name, transport):
998
1122
"""See VersionedFile.copy_to()."""
999
1123
# as we are all in memory always, just serialise to the new place.
1001
1125
write_weave_v5(self, sio)
1003
1127
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1129
def create_empty(self, name, transport, filemode=None):
1130
return WeaveFile(name, transport, filemode, create=True)
1005
1132
def _save(self):
1006
1133
"""Save the weave."""
1007
1134
self._check_write_ok()
1009
1136
write_weave_v5(self, sio)
1011
bytes = sio.getvalue()
1012
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
1014
self._transport.put_bytes(path, bytes, self._filemode)
1015
except errors.NoSuchFile:
1016
self._transport.mkdir(dirname(path))
1017
self._transport.put_bytes(path, bytes, self._filemode)
1138
self._transport.put_file(self._weave_name + WeaveFile.WEAVE_SUFFIX,
1020
1143
def get_suffixes():
1021
1144
"""See VersionedFile.get_suffixes()."""
1022
1145
return [WeaveFile.WEAVE_SUFFIX]
1024
def insert_record_stream(self, stream):
1025
super(WeaveFile, self).insert_record_stream(stream)
1147
def join(self, other, pb=None, msg=None, version_ids=None,
1148
ignore_missing=False):
1149
"""Join other into self and save."""
1150
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
1154
@deprecated_function(zero_eight)
1155
def reweave(wa, wb, pb=None, msg=None):
1156
"""reweaving is deprecation, please just use weave.join()."""
1157
_reweave(wa, wb, pb, msg)
1029
1159
def _reweave(wa, wb, pb=None, msg=None):
1030
1160
"""Combine two weaves and return the result.
1032
This works even if a revision R has different parents in
1162
This works even if a revision R has different parents in
1033
1163
wa and wb. In the resulting weave all the parents are given.
1035
This is done by just building up a new weave, maintaining ordering
1165
This is done by just building up a new weave, maintaining ordering
1036
1166
of the versions in the two inputs. More efficient approaches
1037
might be possible but it should only be necessary to do
1038
this operation rarely, when a new previously ghost version is
1167
might be possible but it should only be necessary to do
1168
this operation rarely, when a new previously ghost version is
1041
1171
:param pb: An optional progress bar, indicating how far done we are
1042
1172
:param msg: An optional message for the progress
1176
queue_a = range(wa.num_versions())
1177
queue_b = range(wb.num_versions())
1045
1178
# first determine combined parents of all versions
1046
1179
# map from version name -> all parent names
1047
1180
combined_parents = _reweave_parent_graphs(wa, wb)
1048
1181
mutter("combined parents: %r", combined_parents)
1049
order = tsort.topo_sort(combined_parents.items())
1182
order = topo_sort(combined_parents.iteritems())
1050
1183
mutter("order to reweave: %r", order)
1052
1185
if pb and not msg:
1083
1215
p = combined.setdefault(name, set())
1084
1216
p.update(map(weave._idx_to_name, weave._parents[idx]))
1085
1217
return combined
1221
"""Show the weave's table-of-contents"""
1222
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
1223
for i in (6, 50, 10, 10):
1226
for i in range(w.num_versions()):
1229
parent_str = ' '.join(map(str, w._parents[i]))
1230
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
1234
def weave_stats(weave_file, pb):
1235
from bzrlib.weavefile import read_weave
1237
wf = file(weave_file, 'rb')
1239
# FIXME: doesn't work on pipes
1240
weave_size = wf.tell()
1244
for i in range(vers):
1245
pb.update('checking sizes', i, vers)
1246
for origin, lineno, line in w._extract([i]):
1251
print 'versions %9d' % vers
1252
print 'weave file %9d bytes' % weave_size
1253
print 'total contents %9d bytes' % total
1254
print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
1257
print 'average size %9d bytes' % avg
1258
print 'relative size %9.2fx' % (float(weave_size) / float(avg))
1262
print """bzr weave tool
1264
Experimental tool for weave algorithm.
1267
weave init WEAVEFILE
1268
Create an empty weave file
1269
weave get WEAVEFILE VERSION
1270
Write out specified version.
1271
weave check WEAVEFILE
1272
Check consistency of all versions.
1274
Display table of contents.
1275
weave add WEAVEFILE NAME [BASE...] < NEWTEXT
1276
Add NEWTEXT, with specified parent versions.
1277
weave annotate WEAVEFILE VERSION
1278
Display origin of each line.
1279
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
1280
Auto-merge two versions and display conflicts.
1281
weave diff WEAVEFILE VERSION1 VERSION2
1282
Show differences between two versions.
1286
% weave init foo.weave
1288
% weave add foo.weave ver0 < foo.txt
1291
(create updated version)
1293
% weave get foo.weave 0 | diff -u - foo.txt
1294
% weave add foo.weave ver1 0 < foo.txt
1297
% weave get foo.weave 0 > foo.txt (create forked version)
1299
% weave add foo.weave ver2 0 < foo.txt
1302
% weave merge foo.weave 1 2 > foo.txt (merge them)
1303
% vi foo.txt (resolve conflicts)
1304
% weave add foo.weave merged 1 2 < foo.txt (commit merged version)
1316
# in case we're run directly from the subdirectory
1317
sys.path.append('..')
1319
from bzrlib.weavefile import write_weave, read_weave
1320
from bzrlib.progress import ProgressBar
1335
return read_weave(file(argv[2], 'rb'))
1341
# at the moment, based on everything in the file
1343
parents = map(int, argv[4:])
1344
lines = sys.stdin.readlines()
1345
ver = w.add(name, parents, lines)
1346
write_weave(w, file(argv[2], 'wb'))
1347
print 'added version %r %d' % (name, ver)
1350
if os.path.exists(fn):
1351
raise IOError("file exists")
1353
write_weave(w, file(fn, 'wb'))
1354
elif cmd == 'get': # get one version
1356
sys.stdout.writelines(w.get_iter(int(argv[3])))
1361
v1, v2 = map(int, argv[3:5])
1364
diff_gen = bzrlib.patiencediff.unified_diff(lines1, lines2,
1365
'%s version %d' % (fn, v1),
1366
'%s version %d' % (fn, v2))
1367
sys.stdout.writelines(diff_gen)
1369
elif cmd == 'annotate':
1371
# newline is added to all lines regardless; too hard to get
1372
# reasonable formatting otherwise
1374
for origin, text in w.annotate(int(argv[3])):
1375
text = text.rstrip('\r\n')
1377
print ' | %s' % (text)
1379
print '%5d | %s' % (origin, text)
1385
elif cmd == 'stats':
1386
weave_stats(argv[2], ProgressBar())
1388
elif cmd == 'check':
1393
print '%d versions ok' % w.num_versions()
1395
elif cmd == 'inclusions':
1397
print ' '.join(map(str, w.inclusions([int(argv[3])])))
1399
elif cmd == 'parents':
1401
print ' '.join(map(str, w._parents[int(argv[3])]))
1403
elif cmd == 'plan-merge':
1404
# replaced by 'bzr weave-plan-merge'
1406
for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
1408
print '%14s | %s' % (state, line),
1409
elif cmd == 'merge':
1410
# replaced by 'bzr weave-merge-text'
1412
p = w.plan_merge(int(argv[3]), int(argv[4]))
1413
sys.stdout.writelines(w.weave_merge(p))
1415
raise ValueError('unknown command %r' % cmd)
1418
if __name__ == '__main__':
1420
sys.exit(main(sys.argv))
1423
class InterWeave(InterVersionedFile):
1424
"""Optimised code paths for weave to weave operations."""
1426
_matching_file_from_factory = staticmethod(WeaveFile)
1427
_matching_file_to_factory = staticmethod(WeaveFile)
1430
def is_compatible(source, target):
1431
"""Be compatible with weaves."""
1433
return (isinstance(source, Weave) and
1434
isinstance(target, Weave))
1435
except AttributeError:
1438
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1439
"""See InterVersionedFile.join."""
1440
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1441
if self.target.versions() == [] and version_ids is None:
1442
self.target._copy_weave_content(self.source)
1445
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1446
except errors.WeaveParentMismatch:
1447
self.target._reweave(self.source, pb, msg)
1450
InterVersionedFile.register_optimiser(InterWeave)