1
# Copyright (C) 2005, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
# Author: Martin Pool <mbp@canonical.com>
19
"""Weave - storage of related text file versions"""
21
from __future__ import absolute_import
23
# XXX: If we do weaves this way, will a merge still behave the same
24
# way if it's done in a different order? That's a pretty desirable
27
# TODO: Nothing here so far assumes the lines are really \n newlines,
28
# rather than being split up in some other way. We could accommodate
29
# binaries, perhaps by naively splitting on \n or perhaps using
30
# something like a rolling checksum.
32
# TODO: End marker for each version so we can stop reading?
34
# TODO: Check that no insertion occurs inside a deletion that was
35
# active in the version of the insertion.
37
# TODO: In addition to the SHA-1 check, perhaps have some code that
38
# checks structural constraints of the weave: ie that insertions are
39
# properly nested, that there is no text outside of an insertion, that
40
# insertions or deletions are not repeated, etc.
42
# TODO: Parallel-extract that passes back each line along with a
43
# description of which revisions include it. Nice for checking all
44
# shas or calculating stats in parallel.
46
# TODO: Using a single _extract routine and then processing the output
47
# is probably inefficient. It's simple enough that we can afford to
48
# have slight specializations for different ways its used: annotate,
49
# basis for add, get, etc.
51
# TODO: Probably the API should work only in names to hide the integer
52
# indexes from the user.
54
# TODO: Is there any potential performance win by having an add()
55
# variant that is passed a pre-cooked version of the single basis
58
# TODO: Reweave can possibly be made faster by remembering diffs
59
# where the basis and destination are unchanged.
61
# FIXME: Sometimes we will be given a parents list for a revision
62
# that includes some redundant parents (i.e. already a parent of
63
# something in the list.) We should eliminate them. This can
64
# be done fairly efficiently because the sequence numbers constrain
65
# the possible relationships.
67
# FIXME: the conflict markers should be *7* characters
72
from ..lazy_import import lazy_import
73
lazy_import(globals(), """
74
from breezy import tsort
80
from ..errors import (
81
RevisionAlreadyPresent,
83
UnavailableRepresentation,
85
from ..osutils import dirname, sha, sha_strings, split_lines
86
from .. import patiencediff
87
from ..revision import NULL_REVISION
88
from ..sixish import (
91
from ..trace import mutter
92
from .versionedfile import (
99
from .weavefile import _read_weave_v5, write_weave_v5
102
class WeaveError(errors.BzrError):
104
_fmt = "Error in processing weave: %(msg)s"
106
def __init__(self, msg=None):
107
errors.BzrError.__init__(self)
111
class WeaveRevisionAlreadyPresent(WeaveError):
113
_fmt = "Revision {%(revision_id)s} already present in %(weave)s"
115
def __init__(self, revision_id, weave):
117
WeaveError.__init__(self)
118
self.revision_id = revision_id
122
class WeaveRevisionNotPresent(WeaveError):
124
_fmt = "Revision {%(revision_id)s} not present in %(weave)s"
126
def __init__(self, revision_id, weave):
127
WeaveError.__init__(self)
128
self.revision_id = revision_id
132
class WeaveFormatError(WeaveError):
134
_fmt = "Weave invariant violated: %(what)s"
136
def __init__(self, what):
137
WeaveError.__init__(self)
141
class WeaveParentMismatch(WeaveError):
143
_fmt = "Parents are mismatched between two revisions. %(msg)s"
146
class WeaveInvalidChecksum(WeaveError):
148
_fmt = "Text did not match its checksum: %(msg)s"
151
class WeaveTextDiffers(WeaveError):
153
_fmt = ("Weaves differ on text content. Revision:"
154
" {%(revision_id)s}, %(weave_a)s, %(weave_b)s")
156
def __init__(self, revision_id, weave_a, weave_b):
157
WeaveError.__init__(self)
158
self.revision_id = revision_id
159
self.weave_a = weave_a
160
self.weave_b = weave_b
163
class WeaveContentFactory(ContentFactory):
164
"""Content factory for streaming from weaves.
166
:seealso ContentFactory:
169
def __init__(self, version, weave):
170
"""Create a WeaveContentFactory for version from weave."""
171
ContentFactory.__init__(self)
172
self.sha1 = weave.get_sha1s([version])[version]
173
self.key = (version,)
174
parents = weave.get_parent_map([version])[version]
175
self.parents = tuple((parent,) for parent in parents)
176
self.storage_kind = 'fulltext'
179
def get_bytes_as(self, storage_kind):
180
if storage_kind == 'fulltext':
181
return self._weave.get_text(self.key[-1])
182
elif storage_kind == 'chunked':
183
return self._weave.get_lines(self.key[-1])
185
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
188
class Weave(VersionedFile):
189
"""weave - versioned text file storage.
191
A Weave manages versions of line-based text files, keeping track
192
of the originating version for each line.
194
To clients the "lines" of the file are represented as a list of strings.
195
These strings will typically have terminal newline characters, but
196
this is not required. In particular files commonly do not have a newline
197
at the end of the file.
199
Texts can be identified in either of two ways:
201
* a nonnegative index number.
203
* a version-id string.
205
Typically the index number will be valid only inside this weave and
206
the version-id is used to reference it in the larger world.
208
The weave is represented as a list mixing edit instructions and
209
literal text. Each entry in _weave can be either a string (or
210
unicode), or a tuple. If a string, it means that the given line
211
should be output in the currently active revisions.
213
If a tuple, it gives a processing instruction saying in which
214
revisions the enclosed lines are active. The tuple has the form
215
(instruction, version).
217
The instruction can be '{' or '}' for an insertion block, and '['
218
and ']' for a deletion block respectively. The version is the
219
integer version index. There is no replace operator, only deletes
220
and inserts. For '}', the end of an insertion, there is no
221
version parameter because it always closes the most recently
226
* A later version can delete lines that were introduced by any
227
number of ancestor versions; this implies that deletion
228
instructions can span insertion blocks without regard to the
229
insertion block's nesting.
231
* Similarly, deletions need not be properly nested with regard to
232
each other, because they might have been generated by
233
independent revisions.
235
* Insertions are always made by inserting a new bracketed block
236
into a single point in the previous weave. This implies they
237
can nest but not overlap, and the nesting must always have later
238
insertions on the inside.
240
* It doesn't seem very useful to have an active insertion
241
inside an inactive insertion, but it might happen.
243
* Therefore, all instructions are always"considered"; that
244
is passed onto and off the stack. An outer inactive block
245
doesn't disable an inner block.
247
* Lines are enabled if the most recent enclosing insertion is
248
active and none of the enclosing deletions are active.
250
* There is no point having a deletion directly inside its own
251
insertion; you might as well just not write it. And there
252
should be no way to get an earlier version deleting a later
256
Text of the weave; list of control instruction tuples and strings.
259
List of parents, indexed by version number.
260
It is only necessary to store the minimal set of parents for
261
each version; the parent's parents are implied.
264
List of hex SHA-1 of each version.
267
List of symbolic names for each version. Each should be unique.
270
For each name, the version number.
273
Descriptive name of this weave; typically the filename if known.
277
__slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
278
'_weave_name', '_matcher', '_allow_reserved']
280
def __init__(self, weave_name=None, access_mode='w', matcher=None,
281
get_scope=None, allow_reserved=False):
284
:param get_scope: A callable that returns an opaque object to be used
285
for detecting when this weave goes out of scope (should stop
286
answering requests or allowing mutation).
288
super(Weave, self).__init__()
294
self._weave_name = weave_name
296
self._matcher = patiencediff.PatienceSequenceMatcher
298
self._matcher = matcher
299
if get_scope is None:
302
self._get_scope = get_scope
303
self._scope = get_scope()
304
self._access_mode = access_mode
305
self._allow_reserved = allow_reserved
308
return "Weave(%r)" % self._weave_name
310
def _check_write_ok(self):
311
"""Is the versioned file marked as 'finished' ? Raise if it is."""
312
if self._get_scope() != self._scope:
313
raise errors.OutSideTransaction()
314
if self._access_mode != 'w':
315
raise errors.ReadOnlyObjectDirtiedError(self)
318
"""Return a deep copy of self.
320
The copy can be modified without affecting the original weave."""
322
other._weave = self._weave[:]
323
other._parents = self._parents[:]
324
other._sha1s = self._sha1s[:]
325
other._names = self._names[:]
326
other._name_map = self._name_map.copy()
327
other._weave_name = self._weave_name
330
def __eq__(self, other):
331
if not isinstance(other, Weave):
333
return self._parents == other._parents \
334
and self._weave == other._weave \
335
and self._sha1s == other._sha1s
337
def __ne__(self, other):
338
return not self.__eq__(other)
340
def _idx_to_name(self, version):
341
return self._names[version]
343
def _lookup(self, name):
344
"""Convert symbolic version name to index."""
345
if not self._allow_reserved:
346
self.check_not_reserved_id(name)
348
return self._name_map[name]
350
raise RevisionNotPresent(name, self._weave_name)
353
"""See VersionedFile.versions."""
354
return self._names[:]
356
def has_version(self, version_id):
357
"""See VersionedFile.has_version."""
358
return (version_id in self._name_map)
360
__contains__ = has_version
362
def get_record_stream(self, versions, ordering, include_delta_closure):
363
"""Get a stream of records for versions.
365
:param versions: The versions to include. Each version is a tuple
367
:param ordering: Either 'unordered' or 'topological'. A topologically
368
sorted stream has compression parents strictly before their
370
:param include_delta_closure: If True then the closure across any
371
compression parents will be included (in the opaque data).
372
:return: An iterator of ContentFactory objects, each of which is only
373
valid until the iterator is advanced.
375
versions = [version[-1] for version in versions]
376
if ordering == 'topological':
377
parents = self.get_parent_map(versions)
378
new_versions = tsort.topo_sort(parents)
379
new_versions.extend(set(versions).difference(set(parents)))
380
versions = new_versions
381
elif ordering == 'groupcompress':
382
parents = self.get_parent_map(versions)
383
new_versions = sort_groupcompress(parents)
384
new_versions.extend(set(versions).difference(set(parents)))
385
versions = new_versions
386
for version in versions:
388
yield WeaveContentFactory(version, self)
390
yield AbsentContentFactory((version,))
392
def get_parent_map(self, version_ids):
393
"""See VersionedFile.get_parent_map."""
395
for version_id in version_ids:
396
if version_id == NULL_REVISION:
401
map(self._idx_to_name,
402
self._parents[self._lookup(version_id)]))
403
except RevisionNotPresent:
405
result[version_id] = parents
408
def get_parents_with_ghosts(self, version_id):
409
raise NotImplementedError(self.get_parents_with_ghosts)
411
def insert_record_stream(self, stream):
412
"""Insert a record stream into this versioned file.
414
:param stream: A stream of records to insert.
416
:seealso VersionedFile.get_record_stream:
419
for record in stream:
420
# Raise an error when a record is missing.
421
if record.storage_kind == 'absent':
422
raise RevisionNotPresent([record.key[0]], self)
423
# adapt to non-tuple interface
424
parents = [parent[0] for parent in record.parents]
425
if (record.storage_kind == 'fulltext' or
426
record.storage_kind == 'chunked'):
428
record.key[0], parents,
429
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
431
adapter_key = record.storage_kind, 'fulltext'
433
adapter = adapters[adapter_key]
435
adapter_factory = adapter_registry.get(adapter_key)
436
adapter = adapter_factory(self)
437
adapters[adapter_key] = adapter
438
lines = split_lines(adapter.get_bytes(record))
440
self.add_lines(record.key[0], parents, lines)
441
except RevisionAlreadyPresent:
444
def _check_repeated_add(self, name, parents, text, sha1):
445
"""Check that a duplicated add is OK.
447
If it is, return the (old) index; otherwise raise an exception.
449
idx = self._lookup(name)
450
if sorted(self._parents[idx]) != sorted(parents) \
451
or sha1 != self._sha1s[idx]:
452
raise RevisionAlreadyPresent(name, self._weave_name)
455
def _add_lines(self, version_id, parents, lines, parent_texts,
456
left_matching_blocks, nostore_sha, random_id,
458
"""See VersionedFile.add_lines."""
459
idx = self._add(version_id, lines, list(map(self._lookup, parents)),
460
nostore_sha=nostore_sha)
461
return sha_strings(lines), sum(map(len, lines)), idx
463
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
464
"""Add a single text on top of the weave.
466
Returns the index number of the newly added version.
469
Symbolic name for this version.
470
(Typically the revision-id of the revision that added it.)
471
If None, a name will be allocated based on the hash. (sha1:SHAHASH)
474
List or set of direct parent version numbers.
477
Sequence of lines to be added in the new version.
479
:param nostore_sha: See VersionedFile.add_lines.
481
self._check_lines_not_unicode(lines)
482
self._check_lines_are_lines(lines)
484
sha1 = sha_strings(lines)
485
if sha1 == nostore_sha:
486
raise errors.ExistingContent
487
if version_id is None:
488
version_id = b"sha1:" + sha1
489
if version_id in self._name_map:
490
return self._check_repeated_add(version_id, parents, lines, sha1)
492
self._check_versions(parents)
493
new_version = len(self._parents)
495
# if we abort after here the (in-memory) weave will be corrupt because
496
# only some fields are updated
497
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
498
# - Robert Collins 20060226
499
self._parents.append(parents[:])
500
self._sha1s.append(sha1)
501
self._names.append(version_id)
502
self._name_map[version_id] = new_version
505
# special case; adding with no parents revision; can do
506
# this more quickly by just appending unconditionally.
507
# even more specially, if we're adding an empty text we
508
# need do nothing at all.
510
self._weave.append((b'{', new_version))
511
self._weave.extend(lines)
512
self._weave.append((b'}', None))
515
if len(parents) == 1:
516
pv = list(parents)[0]
517
if sha1 == self._sha1s[pv]:
518
# special case: same as the single parent
521
ancestors = self._inclusions(parents)
525
# basis a list of (origin, lineno, line)
528
for origin, lineno, line in self._extract(ancestors):
529
basis_lineno.append(lineno)
530
basis_lines.append(line)
532
# another small special case: a merge, producing the same text
534
if lines == basis_lines:
537
# add a sentinel, because we can also match against the final line
538
basis_lineno.append(len(self._weave))
540
# XXX: which line of the weave should we really consider
541
# matches the end of the file? the current code says it's the
542
# last line of the weave?
544
# print 'basis_lines:', basis_lines
545
# print 'new_lines: ', lines
547
s = self._matcher(None, basis_lines, lines)
549
# offset gives the number of lines that have been inserted
550
# into the weave up to the current point; if the original edit
551
# instruction says to change line A then we actually change (A+offset)
554
for tag, i1, i2, j1, j2 in s.get_opcodes():
555
# i1,i2 are given in offsets within basis_lines; we need to map
556
# them back to offsets within the entire weave print 'raw match',
557
# tag, i1, i2, j1, j2
560
i1 = basis_lineno[i1]
561
i2 = basis_lineno[i2]
562
# the deletion and insertion are handled separately.
563
# first delete the region.
565
self._weave.insert(i1 + offset, (b'[', new_version))
566
self._weave.insert(i2 + offset + 1, (b']', new_version))
570
# there may have been a deletion spanning up to
571
# i2; we want to insert after this region to make sure
572
# we don't destroy ourselves
574
self._weave[i:i] = ([(b'{', new_version)] +
577
offset += 2 + (j2 - j1)
580
def _inclusions(self, versions):
581
"""Return set of all ancestors of given version(s)."""
582
if not len(versions):
585
for v in range(max(versions), 0, -1):
587
# include all its parents
588
i.update(self._parents[v])
591
def get_ancestry(self, version_ids, topo_sorted=True):
592
"""See VersionedFile.get_ancestry."""
593
if isinstance(version_ids, bytes):
594
version_ids = [version_ids]
595
i = self._inclusions([self._lookup(v) for v in version_ids])
596
return [self._idx_to_name(v) for v in i]
598
def _check_versions(self, indexes):
599
"""Check everything in the sequence of indexes is valid"""
604
raise IndexError("invalid version number %r" % i)
606
def _compatible_parents(self, my_parents, other_parents):
607
"""During join check that other_parents are joinable with my_parents.
609
Joinable is defined as 'is a subset of' - supersets may require
610
regeneration of diffs, but subsets do not.
612
return len(other_parents.difference(my_parents)) == 0
614
def annotate(self, version_id):
615
"""Return a list of (version-id, line) tuples for version_id.
617
The index indicates when the line originated in the weave."""
618
incls = [self._lookup(version_id)]
619
return [(self._idx_to_name(origin), text) for origin, lineno, text in
620
self._extract(incls)]
622
def iter_lines_added_or_present_in_versions(self, version_ids=None,
624
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
625
if version_ids is None:
626
version_ids = self.versions()
627
version_ids = set(version_ids)
628
for lineno, inserted, deletes, line in self._walk_internal(
630
if inserted not in version_ids:
632
if not line.endswith(b'\n'):
633
yield line + b'\n', inserted
637
def _walk_internal(self, version_ids=None):
638
"""Helper method for weave actions."""
643
lineno = 0 # line of weave, 0-based
645
for l in self._weave:
646
if l.__class__ == tuple:
649
istack.append(self._names[v])
653
dset.add(self._names[v])
655
dset.remove(self._names[v])
657
raise WeaveFormatError('unexpected instruction %r' % v)
659
yield lineno, istack[-1], frozenset(dset), l
663
raise WeaveFormatError("unclosed insertion blocks "
664
"at end of weave: %s" % istack)
666
raise WeaveFormatError(
667
"unclosed deletion blocks at end of weave: %s" % dset)
669
def plan_merge(self, ver_a, ver_b):
670
"""Return pseudo-annotation indicating how the two versions merge.
672
This is computed between versions a and b and their common
675
Weave lines present in none of them are skipped entirely.
677
inc_a = set(self.get_ancestry([ver_a]))
678
inc_b = set(self.get_ancestry([ver_b]))
679
inc_c = inc_a & inc_b
681
for lineno, insert, deleteset, line in self._walk_internal(
683
if deleteset & inc_c:
684
# killed in parent; can't be in either a or b
685
# not relevant to our work
686
yield 'killed-base', line
687
elif insert in inc_c:
688
# was inserted in base
689
killed_a = bool(deleteset & inc_a)
690
killed_b = bool(deleteset & inc_b)
691
if killed_a and killed_b:
692
yield 'killed-both', line
694
yield 'killed-a', line
696
yield 'killed-b', line
698
yield 'unchanged', line
699
elif insert in inc_a:
700
if deleteset & inc_a:
701
yield 'ghost-a', line
705
elif insert in inc_b:
706
if deleteset & inc_b:
707
yield 'ghost-b', line
711
# not in either revision
712
yield 'irrelevant', line
714
def _extract(self, versions):
715
"""Yield annotation of lines in included set.
717
Yields a sequence of tuples (origin, lineno, text), where
718
origin is the origin version, lineno the index in the weave,
719
and text the text of the line.
721
The set typically but not necessarily corresponds to a version.
724
if not isinstance(i, int):
727
included = self._inclusions(versions)
733
lineno = 0 # line of weave, 0-based
740
# 449 0 4474.6820 2356.5590 breezy.weave:556(_extract)
741
# +285282 0 1676.8040 1676.8040 +<isinstance>
742
# 1.6 seconds in 'isinstance'.
743
# changing the first isinstance:
744
# 449 0 2814.2660 1577.1760 breezy.weave:556(_extract)
745
# +140414 0 762.8050 762.8050 +<isinstance>
746
# note that the inline time actually dropped (less function calls)
747
# and total processing time was halved.
748
# we're still spending ~1/4 of the method in isinstance though.
749
# so lets hard code the acceptable string classes we expect:
750
# 449 0 1202.9420 786.2930 breezy.weave:556(_extract)
751
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
753
# yay, down to ~1/4 the initial extract time, and our inline time
754
# has shrunk again, with isinstance no longer dominating.
755
# tweaking the stack inclusion test to use a set gives:
756
# 449 0 1122.8030 713.0080 breezy.weave:556(_extract)
757
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
759
# - a 5% win, or possibly just noise. However with large istacks that
760
# 'in' test could dominate, so I'm leaving this change in place - when
761
# its fast enough to consider profiling big datasets we can review.
763
for l in self._weave:
764
if l.__class__ == tuple:
771
iset.remove(istack.pop())
779
raise AssertionError()
782
isactive = (not dset) and istack and (
783
istack[-1] in included)
785
result.append((istack[-1], lineno, l))
788
raise WeaveFormatError("unclosed insertion blocks "
789
"at end of weave: %s" % istack)
791
raise WeaveFormatError(
792
"unclosed deletion blocks at end of weave: %s" % dset)
795
def _maybe_lookup(self, name_or_index):
796
"""Convert possible symbolic name to index, or pass through indexes.
800
# GZ 2017-04-01: This used to check for long as well, but I don't think
801
# there are python implementations with sys.maxsize > sys.maxint
802
if isinstance(name_or_index, int):
805
return self._lookup(name_or_index)
807
def get_lines(self, version_id):
808
"""See VersionedFile.get_lines()."""
809
int_index = self._maybe_lookup(version_id)
810
result = [line for (origin, lineno, line)
811
in self._extract([int_index])]
812
expected_sha1 = self._sha1s[int_index]
813
measured_sha1 = sha_strings(result)
814
if measured_sha1 != expected_sha1:
815
raise WeaveInvalidChecksum(
816
'file %s, revision %s, expected: %s, measured %s'
817
% (self._weave_name, version_id,
818
expected_sha1, measured_sha1))
821
def get_sha1s(self, version_ids):
822
"""See VersionedFile.get_sha1s()."""
824
for v in version_ids:
825
result[v] = self._sha1s[self._lookup(v)]
828
def num_versions(self):
829
"""How many versions are in this weave?"""
830
return len(self._parents)
832
__len__ = num_versions
834
def check(self, progress_bar=None):
835
# TODO evaluate performance hit of using string sets in this routine.
836
# TODO: check no circular inclusions
837
# TODO: create a nested progress bar
838
for version in range(self.num_versions()):
839
inclusions = list(self._parents[version])
842
if inclusions[-1] >= version:
843
raise WeaveFormatError(
844
"invalid included version %d for index %d"
845
% (inclusions[-1], version))
847
# try extracting all versions; parallel extraction is used
848
nv = self.num_versions()
853
# For creating the ancestry, IntSet is much faster (3.7s vs 0.17s)
854
# The problem is that set membership is much more expensive
855
name = self._idx_to_name(i)
859
for p in self._parents[i]:
860
new_inc.update(inclusions[self._idx_to_name(p)])
862
if set(new_inc) != set(self.get_ancestry(name)):
863
raise AssertionError(
865
% (set(new_inc), set(self.get_ancestry(name))))
866
inclusions[name] = new_inc
868
nlines = len(self._weave)
870
update_text = 'checking weave'
872
short_name = os.path.basename(self._weave_name)
873
update_text = 'checking %s' % (short_name,)
874
update_text = update_text[:25]
876
for lineno, insert, deleteset, line in self._walk_internal():
878
progress_bar.update(update_text, lineno, nlines)
880
for name, name_inclusions in inclusions.items():
881
# The active inclusion must be an ancestor,
882
# and no ancestors must have deleted this line,
883
# because we don't support resurrection.
884
if ((insert in name_inclusions) and
885
not (deleteset & name_inclusions)):
886
sha1s[name].update(line)
889
version = self._idx_to_name(i)
890
hd = sha1s[version].hexdigest().encode()
891
expected = self._sha1s[i]
893
raise WeaveInvalidChecksum(
894
"mismatched sha1 for version %s: "
895
"got %s, expected %s"
896
% (version, hd, expected))
898
# TODO: check insertions are properly nested, that there are
899
# no lines outside of insertion blocks, that deletions are
900
# properly paired, etc.
902
def _imported_parents(self, other, other_idx):
903
"""Return list of parents in self corresponding to indexes in other."""
905
for parent_idx in other._parents[other_idx]:
906
parent_name = other._names[parent_idx]
907
if parent_name not in self._name_map:
908
# should not be possible
909
raise WeaveError("missing parent {%s} of {%s} in %r"
910
% (parent_name, other._name_map[other_idx], self))
911
new_parents.append(self._name_map[parent_name])
914
def _check_version_consistent(self, other, other_idx, name):
915
"""Check if a version in consistent in this and other.
917
To be consistent it must have:
920
* the same direct parents (by name, not index, and disregarding
923
If present & correct return True;
924
if not present in self return False;
925
if inconsistent raise error."""
926
this_idx = self._name_map.get(name, -1)
928
if self._sha1s[this_idx] != other._sha1s[other_idx]:
929
raise WeaveTextDiffers(name, self, other)
930
self_parents = self._parents[this_idx]
931
other_parents = other._parents[other_idx]
932
n1 = {self._names[i] for i in self_parents}
933
n2 = {other._names[i] for i in other_parents}
934
if not self._compatible_parents(n1, n2):
935
raise WeaveParentMismatch(
936
"inconsistent parents "
937
"for version {%s}: %s vs %s" % (name, n1, n2))
943
def _reweave(self, other, pb, msg):
944
"""Reweave self with other - internal helper for join().
946
:param other: The other weave to merge
947
:param pb: An optional progress bar, indicating how far done we are
948
:param msg: An optional message for the progress
950
new_weave = _reweave(self, other, pb=pb, msg=msg)
951
self._copy_weave_content(new_weave)
953
def _copy_weave_content(self, otherweave):
954
"""adsorb the content from otherweave."""
955
for attr in self.__slots__:
956
if attr != '_weave_name':
957
setattr(self, attr, copy(getattr(otherweave, attr)))
960
class WeaveFile(Weave):
961
"""A WeaveFile represents a Weave on disk and writes on change."""
963
WEAVE_SUFFIX = '.weave'
965
def __init__(self, name, transport, filemode=None, create=False,
966
access_mode='w', get_scope=None):
967
"""Create a WeaveFile.
969
:param create: If not True, only open an existing knit.
971
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
972
allow_reserved=False)
973
self._transport = transport
974
self._filemode = filemode
976
f = self._transport.get(name + WeaveFile.WEAVE_SUFFIX)
977
_read_weave_v5(BytesIO(f.read()), self)
978
except errors.NoSuchFile:
984
def _add_lines(self, version_id, parents, lines, parent_texts,
985
left_matching_blocks, nostore_sha, random_id,
987
"""Add a version and save the weave."""
988
self.check_not_reserved_id(version_id)
989
result = super(WeaveFile, self)._add_lines(
990
version_id, parents, lines, parent_texts, left_matching_blocks,
991
nostore_sha, random_id, check_content)
995
def copy_to(self, name, transport):
996
"""See VersionedFile.copy_to()."""
997
# as we are all in memory always, just serialise to the new place.
999
write_weave_v5(self, sio)
1001
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1004
"""Save the weave."""
1005
self._check_write_ok()
1007
write_weave_v5(self, sio)
1009
bytes = sio.getvalue()
1010
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
1012
self._transport.put_bytes(path, bytes, self._filemode)
1013
except errors.NoSuchFile:
1014
self._transport.mkdir(dirname(path))
1015
self._transport.put_bytes(path, bytes, self._filemode)
1019
"""See VersionedFile.get_suffixes()."""
1020
return [WeaveFile.WEAVE_SUFFIX]
1022
def insert_record_stream(self, stream):
1023
super(WeaveFile, self).insert_record_stream(stream)
1027
def _reweave(wa, wb, pb=None, msg=None):
1028
"""Combine two weaves and return the result.
1030
This works even if a revision R has different parents in
1031
wa and wb. In the resulting weave all the parents are given.
1033
This is done by just building up a new weave, maintaining ordering
1034
of the versions in the two inputs. More efficient approaches
1035
might be possible but it should only be necessary to do
1036
this operation rarely, when a new previously ghost version is
1039
:param pb: An optional progress bar, indicating how far done we are
1040
:param msg: An optional message for the progress
1043
# first determine combined parents of all versions
1044
# map from version name -> all parent names
1045
combined_parents = _reweave_parent_graphs(wa, wb)
1046
mutter("combined parents: %r", combined_parents)
1047
order = tsort.topo_sort(combined_parents.items())
1048
mutter("order to reweave: %r", order)
1053
for idx, name in enumerate(order):
1055
pb.update(msg, idx, len(order))
1056
if name in wa._name_map:
1057
lines = wa.get_lines(name)
1058
if name in wb._name_map:
1059
lines_b = wb.get_lines(name)
1060
if lines != lines_b:
1061
mutter('Weaves differ on content. rev_id {%s}', name)
1062
mutter('weaves: %s, %s', wa._weave_name, wb._weave_name)
1064
lines = list(difflib.unified_diff(lines, lines_b,
1065
wa._weave_name, wb._weave_name))
1066
mutter('lines:\n%s', ''.join(lines))
1067
raise WeaveTextDiffers(name, wa, wb)
1069
lines = wb.get_lines(name)
1070
wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]])
1074
def _reweave_parent_graphs(wa, wb):
1075
"""Return combined parent ancestry for two weaves.
1077
Returned as a list of (version_name, set(parent_names))"""
1079
for weave in [wa, wb]:
1080
for idx, name in enumerate(weave._names):
1081
p = combined.setdefault(name, set())
1082
p.update(map(weave._idx_to_name, weave._parents[idx]))