59
61
# where the basis and destination are unchanged.
61
63
# FIXME: Sometimes we will be given a parents list for a revision
62
# that includes some redundant parents (i.e. already a parent of
63
# something in the list.) We should eliminate them. This can
64
# that includes some redundant parents (i.e. already a parent of
65
# something in the list.) We should eliminate them. This can
64
66
# be done fairly efficiently because the sequence numbers constrain
65
67
# the possible relationships.
67
69
# FIXME: the conflict markers should be *7* characters
69
71
from copy import copy
72
from cStringIO import StringIO
73
from ..lazy_import import lazy_import
74
lazy_import(globals(), """
75
from breezy import tsort
81
from ..errors import (
82
RevisionAlreadyPresent,
84
UnavailableRepresentation,
86
from ..osutils import dirname, sha, sha_strings, split_lines
87
from ..revision import NULL_REVISION
88
from ..sixish import (
91
from ..trace import mutter
92
from .versionedfile import (
99
from .weavefile import _read_weave_v5, write_weave_v5
102
class WeaveError(errors.BzrError):
104
_fmt = "Error in processing weave: %(msg)s"
106
def __init__(self, msg=None):
107
errors.BzrError.__init__(self)
111
class WeaveRevisionAlreadyPresent(WeaveError):
113
_fmt = "Revision {%(revision_id)s} already present in %(weave)s"
115
def __init__(self, revision_id, weave):
117
WeaveError.__init__(self)
118
self.revision_id = revision_id
122
class WeaveRevisionNotPresent(WeaveError):
124
_fmt = "Revision {%(revision_id)s} not present in %(weave)s"
126
def __init__(self, revision_id, weave):
127
WeaveError.__init__(self)
128
self.revision_id = revision_id
132
class WeaveFormatError(WeaveError):
134
_fmt = "Weave invariant violated: %(what)s"
136
def __init__(self, what):
137
WeaveError.__init__(self)
141
class WeaveParentMismatch(WeaveError):
143
_fmt = "Parents are mismatched between two revisions. %(msg)s"
146
class WeaveInvalidChecksum(WeaveError):
148
_fmt = "Text did not match its checksum: %(msg)s"
151
class WeaveTextDiffers(WeaveError):
153
_fmt = ("Weaves differ on text content. Revision:"
154
" {%(revision_id)s}, %(weave_a)s, %(weave_b)s")
156
def __init__(self, revision_id, weave_a, weave_b):
157
WeaveError.__init__(self)
158
self.revision_id = revision_id
159
self.weave_a = weave_a
160
self.weave_b = weave_b
163
class WeaveContentFactory(ContentFactory):
164
"""Content factory for streaming from weaves.
166
:seealso ContentFactory:
169
def __init__(self, version, weave):
170
"""Create a WeaveContentFactory for version from weave."""
171
ContentFactory.__init__(self)
172
self.sha1 = weave.get_sha1s([version])[version]
173
self.key = (version,)
174
parents = weave.get_parent_map([version])[version]
175
self.parents = tuple((parent,) for parent in parents)
176
self.storage_kind = 'fulltext'
179
def get_bytes_as(self, storage_kind):
180
if storage_kind == 'fulltext':
181
return self._weave.get_text(self.key[-1])
182
elif storage_kind in ('chunked', 'lines'):
183
return self._weave.get_lines(self.key[-1])
185
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
187
def iter_bytes_as(self, storage_kind):
188
if storage_kind in ('chunked', 'lines'):
189
return iter(self._weave.get_lines(self.key[-1]))
191
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
78
from bzrlib.trace import mutter
79
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
80
RevisionAlreadyPresent,
82
WeaveRevisionAlreadyPresent,
83
WeaveRevisionNotPresent,
85
import bzrlib.errors as errors
86
from bzrlib.osutils import sha_strings
87
import bzrlib.patiencediff
88
from bzrlib.symbol_versioning import (deprecated_method,
92
from bzrlib.tsort import topo_sort
93
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
94
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
194
97
class Weave(VersionedFile):
195
98
"""weave - versioned text file storage.
197
100
A Weave manages versions of line-based text files, keeping track
198
101
of the originating version for each line.
337
219
if not isinstance(other, Weave):
339
221
return self._parents == other._parents \
340
and self._weave == other._weave \
341
and self._sha1s == other._sha1s
222
and self._weave == other._weave \
223
and self._sha1s == other._sha1s
343
225
def __ne__(self, other):
344
226
return not self.__eq__(other)
228
@deprecated_method(zero_eight)
229
def idx_to_name(self, index):
230
"""Old public interface, the public interface is all names now."""
346
233
def _idx_to_name(self, version):
347
234
return self._names[version]
236
@deprecated_method(zero_eight)
237
def lookup(self, name):
238
"""Backwards compatibility thunk:
240
Return name, as name is valid in the api now, and spew deprecation
349
245
def _lookup(self, name):
350
246
"""Convert symbolic version name to index."""
351
if not self._allow_reserved:
352
self.check_not_reserved_id(name)
354
248
return self._name_map[name]
356
250
raise RevisionNotPresent(name, self._weave_name)
252
@deprecated_method(zero_eight)
253
def iter_names(self):
254
"""Deprecated convenience function, please see VersionedFile.names()."""
255
return iter(self.names())
257
@deprecated_method(zero_eight)
259
"""See Weave.versions for the current api."""
260
return self.versions()
358
262
def versions(self):
359
263
"""See VersionedFile.versions."""
360
264
return self._names[:]
362
266
def has_version(self, version_id):
363
267
"""See VersionedFile.has_version."""
364
return (version_id in self._name_map)
268
return self._name_map.has_key(version_id)
366
270
__contains__ = has_version
368
def get_record_stream(self, versions, ordering, include_delta_closure):
369
"""Get a stream of records for versions.
371
:param versions: The versions to include. Each version is a tuple
373
:param ordering: Either 'unordered' or 'topological'. A topologically
374
sorted stream has compression parents strictly before their
376
:param include_delta_closure: If True then the closure across any
377
compression parents will be included (in the opaque data).
378
:return: An iterator of ContentFactory objects, each of which is only
379
valid until the iterator is advanced.
381
versions = [version[-1] for version in versions]
382
if ordering == 'topological':
383
parents = self.get_parent_map(versions)
384
new_versions = tsort.topo_sort(parents)
385
new_versions.extend(set(versions).difference(set(parents)))
386
versions = new_versions
387
elif ordering == 'groupcompress':
388
parents = self.get_parent_map(versions)
389
new_versions = sort_groupcompress(parents)
390
new_versions.extend(set(versions).difference(set(parents)))
391
versions = new_versions
392
for version in versions:
394
yield WeaveContentFactory(version, self)
396
yield AbsentContentFactory((version,))
398
def get_parent_map(self, version_ids):
399
"""See VersionedFile.get_parent_map."""
272
def get_delta(self, version_id):
273
"""See VersionedFile.get_delta."""
274
return self.get_deltas([version_id])[version_id]
276
def get_deltas(self, version_ids):
277
"""See VersionedFile.get_deltas."""
278
version_ids = self.get_ancestry(version_ids)
401
279
for version_id in version_ids:
402
if version_id == NULL_REVISION:
407
map(self._idx_to_name,
408
self._parents[self._lookup(version_id)]))
409
except RevisionNotPresent:
280
if not self.has_version(version_id):
281
raise RevisionNotPresent(version_id, self)
282
# try extracting all versions; parallel extraction is used
283
nv = self.num_versions()
289
last_parent_lines = {}
291
parent_inclusions = {}
296
# its simplest to generate a full set of prepared variables.
298
name = self._names[i]
299
sha1s[name] = self.get_sha1(name)
300
parents_list = self.get_parents(name)
302
parent = parents_list[0]
303
parents[name] = parent
304
parent_inclusions[name] = inclusions[parent]
307
parent_inclusions[name] = set()
308
# we want to emit start, finish, replacement_length, replacement_lines tuples.
309
diff_hunks[name] = []
310
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
311
parent_linenums[name] = 0
313
parent_noeols[name] = False
314
last_parent_lines[name] = None
315
new_inc = set([name])
316
for p in self._parents[i]:
317
new_inc.update(inclusions[self._idx_to_name(p)])
318
# debug only, known good so far.
319
#assert set(new_inc) == set(self.get_ancestry(name)), \
320
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
321
inclusions[name] = new_inc
323
nlines = len(self._weave)
325
for lineno, inserted, deletes, line in self._walk_internal():
326
# a line is active in a version if:
327
# insert is in the versions inclusions
329
# deleteset & the versions inclusions is an empty set.
330
# so - if we have a included by mapping - version is included by
331
# children, we get a list of children to examine for deletes affect
332
# ing them, which is less than the entire set of children.
333
for version_id in version_ids:
334
# The active inclusion must be an ancestor,
335
# and no ancestors must have deleted this line,
336
# because we don't support resurrection.
337
parent_inclusion = parent_inclusions[version_id]
338
inclusion = inclusions[version_id]
339
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
340
version_active = inserted in inclusion and not (deletes & inclusion)
341
if not parent_active and not version_active:
342
# unrelated line of ancestry
411
result[version_id] = parents
344
elif parent_active and version_active:
346
parent_linenum = parent_linenums[version_id]
347
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
348
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
350
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
351
parent_linenums[version_id] = parent_linenum
354
noeols[version_id] = True
357
elif parent_active and not version_active:
359
current_hunks[version_id][1] += 1
360
parent_linenums[version_id] += 1
361
last_parent_lines[version_id] = line
362
elif not parent_active and version_active:
364
# noeol only occurs at the end of a file because we
365
# diff linewise. We want to show noeol changes as a
366
# empty diff unless the actual eol-less content changed.
369
if last_parent_lines[version_id][-1] != '\n':
370
parent_noeols[version_id] = True
371
except (TypeError, IndexError):
374
if theline[-1] != '\n':
375
noeols[version_id] = True
379
parent_should_go = False
381
if parent_noeols[version_id] == noeols[version_id]:
382
# no noeol toggle, so trust the weaves statement
383
# that this line is changed.
385
if parent_noeols[version_id]:
386
theline = theline + '\n'
387
elif parent_noeols[version_id]:
388
# parent has no eol, we do:
389
# our line is new, report as such..
391
elif noeols[version_id]:
392
# append a eol so that it looks like
394
theline = theline + '\n'
395
if parents[version_id] is not None:
396
#if last_parent_lines[version_id] is not None:
397
parent_should_go = True
398
if last_parent_lines[version_id] != theline:
401
#parent_should_go = False
403
current_hunks[version_id][2] += 1
404
current_hunks[version_id][3].append((inserted, theline))
406
# last hunk last parent line is not eaten
407
current_hunks[version_id][1] -= 1
408
if current_hunks[version_id][1] < 0:
409
current_hunks[version_id][1] = 0
410
# import pdb;pdb.set_trace()
411
# assert current_hunks[version_id][1] >= 0
415
version = self._idx_to_name(i)
416
if current_hunks[version] != [0, 0, 0, []]:
417
diff_hunks[version].append(tuple(current_hunks[version]))
419
for version_id in version_ids:
420
result[version_id] = (
424
diff_hunks[version_id],
414
def get_parents_with_ghosts(self, version_id):
415
raise NotImplementedError(self.get_parents_with_ghosts)
417
def insert_record_stream(self, stream):
418
"""Insert a record stream into this versioned file.
420
:param stream: A stream of records to insert.
422
:seealso VersionedFile.get_record_stream:
425
for record in stream:
426
# Raise an error when a record is missing.
427
if record.storage_kind == 'absent':
428
raise RevisionNotPresent([record.key[0]], self)
429
# adapt to non-tuple interface
430
parents = [parent[0] for parent in record.parents]
431
if record.storage_kind in ('fulltext', 'chunked', 'lines'):
433
record.key[0], parents,
434
record.get_bytes_as('lines'))
436
adapter_key = record.storage_kind, 'lines'
438
adapter = adapters[adapter_key]
440
adapter_factory = adapter_registry.get(adapter_key)
441
adapter = adapter_factory(self)
442
adapters[adapter_key] = adapter
443
lines = adapter.get_bytes(record, 'lines')
445
self.add_lines(record.key[0], parents, lines)
446
except RevisionAlreadyPresent:
428
def get_parents(self, version_id):
429
"""See VersionedFile.get_parent."""
430
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
449
432
def _check_repeated_add(self, name, parents, text, sha1):
450
433
"""Check that a duplicated add is OK.
454
437
idx = self._lookup(name)
455
438
if sorted(self._parents[idx]) != sorted(parents) \
456
or sha1 != self._sha1s[idx]:
439
or sha1 != self._sha1s[idx]:
457
440
raise RevisionAlreadyPresent(name, self._weave_name)
460
def _add_lines(self, version_id, parents, lines, parent_texts,
461
left_matching_blocks, nostore_sha, random_id,
443
@deprecated_method(zero_eight)
444
def add_identical(self, old_rev_id, new_rev_id, parents):
445
"""Please use Weave.clone_text now."""
446
return self.clone_text(new_rev_id, old_rev_id, parents)
448
def _add_lines(self, version_id, parents, lines, parent_texts):
463
449
"""See VersionedFile.add_lines."""
464
idx = self._add(version_id, lines, list(map(self._lookup, parents)),
465
nostore_sha=nostore_sha)
466
return sha_strings(lines), sum(map(len, lines)), idx
468
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
450
return self._add(version_id, lines, map(self._lookup, parents))
452
@deprecated_method(zero_eight)
453
def add(self, name, parents, text, sha1=None):
454
"""See VersionedFile.add_lines for the non deprecated api."""
455
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
457
def _add(self, version_id, lines, parents, sha1=None):
469
458
"""Add a single text on top of the weave.
471
460
Returns the index number of the newly added version.
474
463
Symbolic name for this version.
475
464
(Typically the revision-id of the revision that added it.)
476
If None, a name will be allocated based on the hash. (sha1:SHAHASH)
479
467
List or set of direct parent version numbers.
482
470
Sequence of lines to be added in the new version.
484
:param nostore_sha: See VersionedFile.add_lines.
473
assert isinstance(version_id, basestring)
486
474
self._check_lines_not_unicode(lines)
487
475
self._check_lines_are_lines(lines)
489
477
sha1 = sha_strings(lines)
490
if sha1 == nostore_sha:
491
raise errors.ExistingContent
492
if version_id is None:
493
version_id = b"sha1:" + sha1
494
478
if version_id in self._name_map:
495
479
return self._check_repeated_add(version_id, parents, lines, sha1)
497
481
self._check_versions(parents)
482
## self._check_lines(lines)
498
483
new_version = len(self._parents)
500
# if we abort after here the (in-memory) weave will be corrupt because
501
# only some fields are updated
485
# if we abort after here the (in-memory) weave will be corrupt because only
486
# some fields are updated
502
487
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
503
488
# - Robert Collins 20060226
504
489
self._parents.append(parents[:])
546
533
# matches the end of the file? the current code says it's the
547
534
# last line of the weave?
549
# print 'basis_lines:', basis_lines
550
# print 'new_lines: ', lines
536
#print 'basis_lines:', basis_lines
537
#print 'new_lines: ', lines
552
539
s = self._matcher(None, basis_lines, lines)
554
541
# offset gives the number of lines that have been inserted
555
# into the weave up to the current point; if the original edit
556
# instruction says to change line A then we actually change (A+offset)
542
# into the weave up to the current point; if the original edit instruction
543
# says to change line A then we actually change (A+offset)
559
546
for tag, i1, i2, j1, j2 in s.get_opcodes():
560
# i1,i2 are given in offsets within basis_lines; we need to map
561
# them back to offsets within the entire weave print 'raw match',
562
# tag, i1, i2, j1, j2
547
# i1,i2 are given in offsets within basis_lines; we need to map them
548
# back to offsets within the entire weave
549
#print 'raw match', tag, i1, i2, j1, j2
563
550
if tag == 'equal':
565
553
i1 = basis_lineno[i1]
566
554
i2 = basis_lineno[i2]
556
assert 0 <= j1 <= j2 <= len(lines)
558
#print tag, i1, i2, j1, j2
567
560
# the deletion and insertion are handled separately.
568
561
# first delete the region.
570
self._weave.insert(i1 + offset, (b'[', new_version))
571
self._weave.insert(i2 + offset + 1, (b']', new_version))
563
self._weave.insert(i1+offset, ('[', new_version))
564
self._weave.insert(i2+offset+1, (']', new_version))
576
569
# i2; we want to insert after this region to make sure
577
570
# we don't destroy ourselves
579
self._weave[i:i] = ([(b'{', new_version)] +
572
self._weave[i:i] = ([('{', new_version)]
582
575
offset += 2 + (j2 - j1)
583
576
return new_version
578
def _clone_text(self, new_version_id, old_version_id, parents):
579
"""See VersionedFile.clone_text."""
580
old_lines = self.get_text(old_version_id)
581
self.add_lines(new_version_id, parents, old_lines)
585
583
def _inclusions(self, versions):
586
584
"""Return set of all ancestors of given version(s)."""
587
585
if not len(versions):
589
587
i = set(versions)
590
for v in range(max(versions), 0, -1):
588
for v in xrange(max(versions), 0, -1):
592
590
# include all its parents
593
591
i.update(self._parents[v])
596
def get_ancestry(self, version_ids, topo_sorted=True):
593
## except IndexError:
594
## raise ValueError("version %d not present in weave" % v)
596
@deprecated_method(zero_eight)
597
def inclusions(self, version_ids):
598
"""Deprecated - see VersionedFile.get_ancestry for the replacement."""
601
if isinstance(version_ids[0], int):
602
return [self._idx_to_name(v) for v in self._inclusions(version_ids)]
604
return self.get_ancestry(version_ids)
606
def get_ancestry(self, version_ids):
597
607
"""See VersionedFile.get_ancestry."""
598
if isinstance(version_ids, bytes):
608
if isinstance(version_ids, basestring):
599
609
version_ids = [version_ids]
600
610
i = self._inclusions([self._lookup(v) for v in version_ids])
601
611
return [self._idx_to_name(v) for v in i]
613
def _check_lines(self, text):
614
if not isinstance(text, list):
615
raise ValueError("text should be a list, not %s" % type(text))
618
if not isinstance(l, basestring):
619
raise ValueError("text line should be a string or unicode, not %s"
603
624
def _check_versions(self, indexes):
604
625
"""Check everything in the sequence of indexes is valid"""
605
626
for i in indexes:
611
632
def _compatible_parents(self, my_parents, other_parents):
612
633
"""During join check that other_parents are joinable with my_parents.
614
Joinable is defined as 'is a subset of' - supersets may require
635
Joinable is defined as 'is a subset of' - supersets may require
615
636
regeneration of diffs, but subsets do not.
617
638
return len(other_parents.difference(my_parents)) == 0
619
640
def annotate(self, version_id):
620
"""Return a list of (version-id, line) tuples for version_id.
641
if isinstance(version_id, int):
642
warnings.warn('Weave.annotate(int) is deprecated. Please use version names'
643
' in all circumstances as of 0.8',
648
for origin, lineno, text in self._extract([version_id]):
649
result.append((origin, text))
652
return super(Weave, self).annotate(version_id)
654
def annotate_iter(self, version_id):
655
"""Yield list of (version-id, line) pairs for the specified version.
622
657
The index indicates when the line originated in the weave."""
623
658
incls = [self._lookup(version_id)]
624
return [(self._idx_to_name(origin), text) for origin, lineno, text in
625
self._extract(incls)]
627
def iter_lines_added_or_present_in_versions(self, version_ids=None,
659
for origin, lineno, text in self._extract(incls):
660
yield self._idx_to_name(origin), text
662
@deprecated_method(zero_eight)
664
"""_walk has become visit, a supported api."""
665
return self._walk_internal()
667
def iter_lines_added_or_present_in_versions(self, version_ids=None):
629
668
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
630
669
if version_ids is None:
631
670
version_ids = self.versions()
632
671
version_ids = set(version_ids)
633
for lineno, inserted, deletes, line in self._walk_internal(
635
if inserted not in version_ids:
637
if not line.endswith(b'\n'):
638
yield line + b'\n', inserted
672
for lineno, inserted, deletes, line in self._walk_internal(version_ids):
673
# if inserted not in version_ids then it was inserted before the
674
# versions we care about, but because weaves cannot represent ghosts
675
# properly, we do not filter down to that
676
# if inserted not in version_ids: continue
682
#@deprecated_method(zero_eight)
683
def walk(self, version_ids=None):
684
"""See VersionedFile.walk."""
685
return self._walk_internal(version_ids)
642
687
def _walk_internal(self, version_ids=None):
643
688
"""Helper method for weave actions."""
745
# 449 0 4474.6820 2356.5590 breezy.weave:556(_extract)
795
WFE = WeaveFormatError
798
# 449 0 4474.6820 2356.5590 bzrlib.weave:556(_extract)
746
799
# +285282 0 1676.8040 1676.8040 +<isinstance>
747
800
# 1.6 seconds in 'isinstance'.
748
801
# changing the first isinstance:
749
# 449 0 2814.2660 1577.1760 breezy.weave:556(_extract)
802
# 449 0 2814.2660 1577.1760 bzrlib.weave:556(_extract)
750
803
# +140414 0 762.8050 762.8050 +<isinstance>
751
804
# note that the inline time actually dropped (less function calls)
752
805
# and total processing time was halved.
753
806
# we're still spending ~1/4 of the method in isinstance though.
754
807
# so lets hard code the acceptable string classes we expect:
755
# 449 0 1202.9420 786.2930 breezy.weave:556(_extract)
756
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
808
# 449 0 1202.9420 786.2930 bzrlib.weave:556(_extract)
809
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
758
811
# yay, down to ~1/4 the initial extract time, and our inline time
759
812
# has shrunk again, with isinstance no longer dominating.
760
813
# tweaking the stack inclusion test to use a set gives:
761
# 449 0 1122.8030 713.0080 breezy.weave:556(_extract)
762
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
814
# 449 0 1122.8030 713.0080 bzrlib.weave:556(_extract)
815
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
764
817
# - a 5% win, or possibly just noise. However with large istacks that
765
# 'in' test could dominate, so I'm leaving this change in place - when
766
# its fast enough to consider profiling big datasets we can review.
818
# 'in' test could dominate, so I'm leaving this change in place -
819
# when its fast enough to consider profiling big datasets we can review.
768
824
for l in self._weave:
769
825
if l.__class__ == tuple:
776
833
iset.remove(istack.pop())
778
835
if v in included:
781
840
if v in included:
784
raise AssertionError()
844
assert l.__class__ in (str, unicode)
786
845
if isactive is None:
787
isactive = (not dset) and istack and (
788
istack[-1] in included)
846
isactive = (not dset) and istack and (istack[-1] in included)
790
848
result.append((istack[-1], lineno, l))
793
851
raise WeaveFormatError("unclosed insertion blocks "
794
"at end of weave: %s" % istack)
852
"at end of weave: %s" % istack)
796
raise WeaveFormatError(
797
"unclosed deletion blocks at end of weave: %s" % dset)
854
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
858
@deprecated_method(zero_eight)
859
def get_iter(self, name_or_index):
860
"""Deprecated, please do not use. Lookups are not not needed.
862
Please use get_lines now.
864
return iter(self.get_lines(self._maybe_lookup(name_or_index)))
866
@deprecated_method(zero_eight)
867
def maybe_lookup(self, name_or_index):
868
"""Deprecated, please do not use. Lookups are not not needed."""
869
return self._maybe_lookup(name_or_index)
800
871
def _maybe_lookup(self, name_or_index):
801
872
"""Convert possible symbolic name to index, or pass through indexes.
803
874
NOT FOR PUBLIC USE.
805
# GZ 2017-04-01: This used to check for long as well, but I don't think
806
# there are python implementations with sys.maxsize > sys.maxint
807
if isinstance(name_or_index, int):
876
if isinstance(name_or_index, (int, long)):
808
877
return name_or_index
810
879
return self._lookup(name_or_index)
881
@deprecated_method(zero_eight)
882
def get(self, version_id):
883
"""Please use either Weave.get_text or Weave.get_lines as desired."""
884
return self.get_lines(version_id)
812
886
def get_lines(self, version_id):
813
887
"""See VersionedFile.get_lines()."""
814
888
int_index = self._maybe_lookup(version_id)
815
result = [line for (origin, lineno, line)
816
in self._extract([int_index])]
889
result = [line for (origin, lineno, line) in self._extract([int_index])]
817
890
expected_sha1 = self._sha1s[int_index]
818
891
measured_sha1 = sha_strings(result)
819
892
if measured_sha1 != expected_sha1:
820
raise WeaveInvalidChecksum(
821
'file %s, revision %s, expected: %s, measured %s'
822
% (self._weave_name, version_id,
823
expected_sha1, measured_sha1))
826
def get_sha1s(self, version_ids):
827
"""See VersionedFile.get_sha1s()."""
829
for v in version_ids:
830
result[v] = self._sha1s[self._lookup(v)]
893
raise errors.WeaveInvalidChecksum(
894
'file %s, revision %s, expected: %s, measured %s'
895
% (self._weave_name, version_id,
896
expected_sha1, measured_sha1))
899
def get_sha1(self, version_id):
900
"""See VersionedFile.get_sha1()."""
901
return self._sha1s[self._lookup(version_id)]
903
@deprecated_method(zero_eight)
904
def numversions(self):
905
"""How many versions are in this weave?
907
Deprecated in favour of num_versions.
909
return self.num_versions()
833
911
def num_versions(self):
834
912
"""How many versions are in this weave?"""
835
return len(self._parents)
913
l = len(self._parents)
914
assert l == len(self._sha1s)
837
917
__len__ = num_versions
886
963
# The active inclusion must be an ancestor,
887
964
# and no ancestors must have deleted this line,
888
965
# because we don't support resurrection.
889
if ((insert in name_inclusions) and
890
not (deleteset & name_inclusions)):
966
if (insert in name_inclusions) and not (deleteset & name_inclusions):
891
967
sha1s[name].update(line)
893
969
for i in range(nv):
894
970
version = self._idx_to_name(i)
895
hd = sha1s[version].hexdigest().encode()
971
hd = sha1s[version].hexdigest()
896
972
expected = self._sha1s[i]
897
973
if hd != expected:
898
raise WeaveInvalidChecksum(
899
"mismatched sha1 for version %s: "
900
"got %s, expected %s"
901
% (version, hd, expected))
974
raise errors.WeaveInvalidChecksum(
975
"mismatched sha1 for version %s: "
976
"got %s, expected %s"
977
% (version, hd, expected))
903
979
# TODO: check insertions are properly nested, that there are
904
980
# no lines outside of insertion blocks, that deletions are
905
981
# properly paired, etc.
983
def _join(self, other, pb, msg, version_ids, ignore_missing):
984
"""Worker routine for join()."""
985
if not other.versions():
986
return # nothing to update, easy
989
# versions is never none, InterWeave checks this.
992
# two loops so that we do not change ourselves before verifying it
994
# work through in index order to make sure we get all dependencies
997
# get the selected versions only that are in other.versions.
998
version_ids = set(other.versions()).intersection(set(version_ids))
999
# pull in the referenced graph.
1000
version_ids = other.get_ancestry(version_ids)
1001
pending_graph = [(version, other.get_parents(version)) for
1002
version in version_ids]
1003
for name in topo_sort(pending_graph):
1004
other_idx = other._name_map[name]
1005
# returns True if we have it, False if we need it.
1006
if not self._check_version_consistent(other, other_idx, name):
1007
names_to_join.append((other_idx, name))
1016
for other_idx, name in names_to_join:
1017
# TODO: If all the parents of the other version are already
1018
# present then we can avoid some work by just taking the delta
1019
# and adjusting the offsets.
1020
new_parents = self._imported_parents(other, other_idx)
1021
sha1 = other._sha1s[other_idx]
1026
pb.update(msg, merged, len(names_to_join))
1028
lines = other.get_lines(other_idx)
1029
self._add(name, lines, new_parents, sha1)
1031
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1032
merged, processed, self._weave_name, time.time()-time0))
907
1034
def _imported_parents(self, other, other_idx):
908
1035
"""Return list of parents in self corresponding to indexes in other."""
909
1036
new_parents = []
966
1097
"""A WeaveFile represents a Weave on disk and writes on change."""
968
1099
WEAVE_SUFFIX = '.weave'
970
def __init__(self, name, transport, filemode=None, create=False,
971
access_mode='w', get_scope=None):
1101
def __init__(self, name, transport, filemode=None, create=False, access_mode='w'):
972
1102
"""Create a WeaveFile.
974
1104
:param create: If not True, only open an existing knit.
976
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
977
allow_reserved=False)
1106
super(WeaveFile, self).__init__(name, access_mode)
978
1107
self._transport = transport
979
1108
self._filemode = filemode
981
f = self._transport.get(name + WeaveFile.WEAVE_SUFFIX)
982
_read_weave_v5(BytesIO(f.read()), self)
1110
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
983
1111
except errors.NoSuchFile:
986
1114
# new file, save it
989
def _add_lines(self, version_id, parents, lines, parent_texts,
990
left_matching_blocks, nostore_sha, random_id,
1117
def _add_lines(self, version_id, parents, lines, parent_texts):
992
1118
"""Add a version and save the weave."""
993
self.check_not_reserved_id(version_id)
994
result = super(WeaveFile, self)._add_lines(
995
version_id, parents, lines, parent_texts, left_matching_blocks,
996
nostore_sha, random_id, check_content)
1119
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
1124
def _clone_text(self, new_version_id, old_version_id, parents):
1125
"""See VersionedFile.clone_text."""
1126
super(WeaveFile, self)._clone_text(new_version_id, old_version_id, parents)
1000
1129
def copy_to(self, name, transport):
1001
1130
"""See VersionedFile.copy_to()."""
1002
1131
# as we are all in memory always, just serialise to the new place.
1004
1133
write_weave_v5(self, sio)
1006
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1135
transport.put(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1137
def create_empty(self, name, transport, filemode=None):
1138
return WeaveFile(name, transport, filemode, create=True)
1008
1140
def _save(self):
1009
1141
"""Save the weave."""
1010
1142
self._check_write_ok()
1012
1144
write_weave_v5(self, sio)
1014
bytes = sio.getvalue()
1015
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
1017
self._transport.put_bytes(path, bytes, self._filemode)
1018
except errors.NoSuchFile:
1019
self._transport.mkdir(dirname(path))
1020
self._transport.put_bytes(path, bytes, self._filemode)
1146
self._transport.put(self._weave_name + WeaveFile.WEAVE_SUFFIX,
1023
1151
def get_suffixes():
1024
1152
"""See VersionedFile.get_suffixes()."""
1025
1153
return [WeaveFile.WEAVE_SUFFIX]
1027
def insert_record_stream(self, stream):
1028
super(WeaveFile, self).insert_record_stream(stream)
1155
def join(self, other, pb=None, msg=None, version_ids=None,
1156
ignore_missing=False):
1157
"""Join other into self and save."""
1158
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
1162
@deprecated_function(zero_eight)
1163
def reweave(wa, wb, pb=None, msg=None):
1164
"""reweaving is deprecation, please just use weave.join()."""
1165
_reweave(wa, wb, pb, msg)
1032
1167
def _reweave(wa, wb, pb=None, msg=None):
1033
1168
"""Combine two weaves and return the result.
1035
This works even if a revision R has different parents in
1170
This works even if a revision R has different parents in
1036
1171
wa and wb. In the resulting weave all the parents are given.
1038
This is done by just building up a new weave, maintaining ordering
1173
This is done by just building up a new weave, maintaining ordering
1039
1174
of the versions in the two inputs. More efficient approaches
1040
might be possible but it should only be necessary to do
1041
this operation rarely, when a new previously ghost version is
1175
might be possible but it should only be necessary to do
1176
this operation rarely, when a new previously ghost version is
1044
1179
:param pb: An optional progress bar, indicating how far done we are
1045
1180
:param msg: An optional message for the progress
1184
queue_a = range(wa.num_versions())
1185
queue_b = range(wb.num_versions())
1048
1186
# first determine combined parents of all versions
1049
1187
# map from version name -> all parent names
1050
1188
combined_parents = _reweave_parent_graphs(wa, wb)
1051
1189
mutter("combined parents: %r", combined_parents)
1052
order = tsort.topo_sort(combined_parents.items())
1190
order = topo_sort(combined_parents.iteritems())
1053
1191
mutter("order to reweave: %r", order)
1055
1193
if pb and not msg:
1086
1223
p = combined.setdefault(name, set())
1087
1224
p.update(map(weave._idx_to_name, weave._parents[idx]))
1088
1225
return combined
1229
"""Show the weave's table-of-contents"""
1230
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
1231
for i in (6, 50, 10, 10):
1234
for i in range(w.num_versions()):
1237
parent_str = ' '.join(map(str, w._parents[i]))
1238
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
1242
def weave_stats(weave_file, pb):
1243
from bzrlib.weavefile import read_weave
1245
wf = file(weave_file, 'rb')
1247
# FIXME: doesn't work on pipes
1248
weave_size = wf.tell()
1252
for i in range(vers):
1253
pb.update('checking sizes', i, vers)
1254
for origin, lineno, line in w._extract([i]):
1259
print 'versions %9d' % vers
1260
print 'weave file %9d bytes' % weave_size
1261
print 'total contents %9d bytes' % total
1262
print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
1265
print 'average size %9d bytes' % avg
1266
print 'relative size %9.2fx' % (float(weave_size) / float(avg))
1270
print """bzr weave tool
1272
Experimental tool for weave algorithm.
1275
weave init WEAVEFILE
1276
Create an empty weave file
1277
weave get WEAVEFILE VERSION
1278
Write out specified version.
1279
weave check WEAVEFILE
1280
Check consistency of all versions.
1282
Display table of contents.
1283
weave add WEAVEFILE NAME [BASE...] < NEWTEXT
1284
Add NEWTEXT, with specified parent versions.
1285
weave annotate WEAVEFILE VERSION
1286
Display origin of each line.
1287
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
1288
Auto-merge two versions and display conflicts.
1289
weave diff WEAVEFILE VERSION1 VERSION2
1290
Show differences between two versions.
1294
% weave init foo.weave
1296
% weave add foo.weave ver0 < foo.txt
1299
(create updated version)
1301
% weave get foo.weave 0 | diff -u - foo.txt
1302
% weave add foo.weave ver1 0 < foo.txt
1305
% weave get foo.weave 0 > foo.txt (create forked version)
1307
% weave add foo.weave ver2 0 < foo.txt
1310
% weave merge foo.weave 1 2 > foo.txt (merge them)
1311
% vi foo.txt (resolve conflicts)
1312
% weave add foo.weave merged 1 2 < foo.txt (commit merged version)
1324
# in case we're run directly from the subdirectory
1325
sys.path.append('..')
1327
from bzrlib.weavefile import write_weave, read_weave
1328
from bzrlib.progress import ProgressBar
1343
return read_weave(file(argv[2], 'rb'))
1349
# at the moment, based on everything in the file
1351
parents = map(int, argv[4:])
1352
lines = sys.stdin.readlines()
1353
ver = w.add(name, parents, lines)
1354
write_weave(w, file(argv[2], 'wb'))
1355
print 'added version %r %d' % (name, ver)
1358
if os.path.exists(fn):
1359
raise IOError("file exists")
1361
write_weave(w, file(fn, 'wb'))
1362
elif cmd == 'get': # get one version
1364
sys.stdout.writelines(w.get_iter(int(argv[3])))
1369
v1, v2 = map(int, argv[3:5])
1372
diff_gen = bzrlib.patiencediff.unified_diff(lines1, lines2,
1373
'%s version %d' % (fn, v1),
1374
'%s version %d' % (fn, v2))
1375
sys.stdout.writelines(diff_gen)
1377
elif cmd == 'annotate':
1379
# newline is added to all lines regardless; too hard to get
1380
# reasonable formatting otherwise
1382
for origin, text in w.annotate(int(argv[3])):
1383
text = text.rstrip('\r\n')
1385
print ' | %s' % (text)
1387
print '%5d | %s' % (origin, text)
1393
elif cmd == 'stats':
1394
weave_stats(argv[2], ProgressBar())
1396
elif cmd == 'check':
1401
print '%d versions ok' % w.num_versions()
1403
elif cmd == 'inclusions':
1405
print ' '.join(map(str, w.inclusions([int(argv[3])])))
1407
elif cmd == 'parents':
1409
print ' '.join(map(str, w._parents[int(argv[3])]))
1411
elif cmd == 'plan-merge':
1412
# replaced by 'bzr weave-plan-merge'
1414
for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
1416
print '%14s | %s' % (state, line),
1417
elif cmd == 'merge':
1418
# replaced by 'bzr weave-merge-text'
1420
p = w.plan_merge(int(argv[3]), int(argv[4]))
1421
sys.stdout.writelines(w.weave_merge(p))
1423
raise ValueError('unknown command %r' % cmd)
1426
if __name__ == '__main__':
1428
sys.exit(main(sys.argv))
1431
class InterWeave(InterVersionedFile):
1432
"""Optimised code paths for weave to weave operations."""
1434
_matching_file_from_factory = staticmethod(WeaveFile)
1435
_matching_file_to_factory = staticmethod(WeaveFile)
1438
def is_compatible(source, target):
1439
"""Be compatible with weaves."""
1441
return (isinstance(source, Weave) and
1442
isinstance(target, Weave))
1443
except AttributeError:
1446
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1447
"""See InterVersionedFile.join."""
1448
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1449
if self.target.versions() == [] and version_ids is None:
1450
self.target._copy_weave_content(self.source)
1453
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1454
except errors.WeaveParentMismatch:
1455
self.target._reweave(self.source, pb, msg)
1458
InterVersionedFile.register_optimiser(InterWeave)