59
61
# where the basis and destination are unchanged.
61
63
# FIXME: Sometimes we will be given a parents list for a revision
62
# that includes some redundant parents (i.e. already a parent of
63
# something in the list.) We should eliminate them. This can
64
# that includes some redundant parents (i.e. already a parent of
65
# something in the list.) We should eliminate them. This can
64
66
# be done fairly efficiently because the sequence numbers constrain
65
67
# the possible relationships.
67
69
# FIXME: the conflict markers should be *7* characters
69
71
from copy import copy
72
from cStringIO import StringIO
73
from difflib import SequenceMatcher
73
from ..lazy_import import lazy_import
74
lazy_import(globals(), """
75
from breezy import tsort
81
from ..errors import (
82
RevisionAlreadyPresent,
84
UnavailableRepresentation,
86
from ..osutils import dirname, sha, sha_strings, split_lines
87
from ..revision import NULL_REVISION
88
from ..sixish import (
91
from ..trace import mutter
92
from .versionedfile import (
99
from .weavefile import _read_weave_v5, write_weave_v5
102
class WeaveError(errors.BzrError):
104
_fmt = "Error in processing weave: %(msg)s"
106
def __init__(self, msg=None):
107
errors.BzrError.__init__(self)
111
class WeaveRevisionAlreadyPresent(WeaveError):
113
_fmt = "Revision {%(revision_id)s} already present in %(weave)s"
115
def __init__(self, revision_id, weave):
117
WeaveError.__init__(self)
118
self.revision_id = revision_id
122
class WeaveRevisionNotPresent(WeaveError):
124
_fmt = "Revision {%(revision_id)s} not present in %(weave)s"
126
def __init__(self, revision_id, weave):
127
WeaveError.__init__(self)
128
self.revision_id = revision_id
132
class WeaveFormatError(WeaveError):
134
_fmt = "Weave invariant violated: %(what)s"
136
def __init__(self, what):
137
WeaveError.__init__(self)
141
class WeaveParentMismatch(WeaveError):
143
_fmt = "Parents are mismatched between two revisions. %(msg)s"
146
class WeaveInvalidChecksum(WeaveError):
148
_fmt = "Text did not match its checksum: %(msg)s"
151
class WeaveTextDiffers(WeaveError):
153
_fmt = ("Weaves differ on text content. Revision:"
154
" {%(revision_id)s}, %(weave_a)s, %(weave_b)s")
156
def __init__(self, revision_id, weave_a, weave_b):
157
WeaveError.__init__(self)
158
self.revision_id = revision_id
159
self.weave_a = weave_a
160
self.weave_b = weave_b
163
class WeaveContentFactory(ContentFactory):
164
"""Content factory for streaming from weaves.
166
:seealso ContentFactory:
169
def __init__(self, version, weave):
170
"""Create a WeaveContentFactory for version from weave."""
171
ContentFactory.__init__(self)
172
self.sha1 = weave.get_sha1s([version])[version]
173
self.key = (version,)
174
parents = weave.get_parent_map([version])[version]
175
self.parents = tuple((parent,) for parent in parents)
176
self.storage_kind = 'fulltext'
179
def get_bytes_as(self, storage_kind):
180
if storage_kind == 'fulltext':
181
return self._weave.get_text(self.key[-1])
182
elif storage_kind in ('chunked', 'lines'):
183
return self._weave.get_lines(self.key[-1])
185
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
187
def iter_bytes_as(self, storage_kind):
188
if storage_kind in ('chunked', 'lines'):
189
return iter(self._weave.get_lines(self.key[-1]))
191
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
78
from bzrlib.trace import mutter
79
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
80
RevisionAlreadyPresent,
82
WeaveRevisionAlreadyPresent,
83
WeaveRevisionNotPresent,
85
import bzrlib.errors as errors
86
from bzrlib.osutils import sha_strings
87
from bzrlib.symbol_versioning import *
88
from bzrlib.tsort import topo_sort
89
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
90
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
194
93
class Weave(VersionedFile):
195
94
"""weave - versioned text file storage.
197
96
A Weave manages versions of line-based text files, keeping track
198
97
of the originating version for each line.
337
211
if not isinstance(other, Weave):
339
213
return self._parents == other._parents \
340
and self._weave == other._weave \
341
and self._sha1s == other._sha1s
214
and self._weave == other._weave \
215
and self._sha1s == other._sha1s
343
217
def __ne__(self, other):
344
218
return not self.__eq__(other)
220
@deprecated_method(zero_eight)
221
def idx_to_name(self, index):
222
"""Old public interface, the public interface is all names now."""
346
225
def _idx_to_name(self, version):
347
226
return self._names[version]
228
@deprecated_method(zero_eight)
229
def lookup(self, name):
230
"""Backwards compatability thunk:
232
Return name, as name is valid in the api now, and spew deprecation
349
237
def _lookup(self, name):
350
238
"""Convert symbolic version name to index."""
351
if not self._allow_reserved:
352
self.check_not_reserved_id(name)
354
240
return self._name_map[name]
356
242
raise RevisionNotPresent(name, self._weave_name)
244
@deprecated_method(zero_eight)
245
def iter_names(self):
246
"""Deprecated convenience function, please see VersionedFile.names()."""
247
return iter(self.names())
249
@deprecated_method(zero_eight)
251
"""See Weave.versions for the current api."""
252
return self.versions()
358
254
def versions(self):
359
255
"""See VersionedFile.versions."""
360
256
return self._names[:]
362
258
def has_version(self, version_id):
363
259
"""See VersionedFile.has_version."""
364
return (version_id in self._name_map)
260
return self._name_map.has_key(version_id)
366
262
__contains__ = has_version
368
def get_record_stream(self, versions, ordering, include_delta_closure):
369
"""Get a stream of records for versions.
371
:param versions: The versions to include. Each version is a tuple
373
:param ordering: Either 'unordered' or 'topological'. A topologically
374
sorted stream has compression parents strictly before their
376
:param include_delta_closure: If True then the closure across any
377
compression parents will be included (in the opaque data).
378
:return: An iterator of ContentFactory objects, each of which is only
379
valid until the iterator is advanced.
381
versions = [version[-1] for version in versions]
382
if ordering == 'topological':
383
parents = self.get_parent_map(versions)
384
new_versions = tsort.topo_sort(parents)
385
new_versions.extend(set(versions).difference(set(parents)))
386
versions = new_versions
387
elif ordering == 'groupcompress':
388
parents = self.get_parent_map(versions)
389
new_versions = sort_groupcompress(parents)
390
new_versions.extend(set(versions).difference(set(parents)))
391
versions = new_versions
392
for version in versions:
394
yield WeaveContentFactory(version, self)
396
yield AbsentContentFactory((version,))
398
def get_parent_map(self, version_ids):
399
"""See VersionedFile.get_parent_map."""
264
def get_delta(self, version_id):
265
"""See VersionedFile.get_delta."""
266
return self.get_deltas([version_id])[version_id]
268
def get_deltas(self, version_ids):
269
"""See VersionedFile.get_deltas."""
270
version_ids = self.get_ancestry(version_ids)
401
271
for version_id in version_ids:
402
if version_id == NULL_REVISION:
407
map(self._idx_to_name,
408
self._parents[self._lookup(version_id)]))
409
except RevisionNotPresent:
272
if not self.has_version(version_id):
273
raise RevisionNotPresent(version_id, self)
274
# try extracting all versions; parallel extraction is used
275
nv = self.num_versions()
281
last_parent_lines = {}
283
parent_inclusions = {}
288
# its simplest to generate a full set of prepared variables.
290
name = self._names[i]
291
sha1s[name] = self.get_sha1(name)
292
parents_list = self.get_parents(name)
294
parent = parents_list[0]
295
parents[name] = parent
296
parent_inclusions[name] = inclusions[parent]
299
parent_inclusions[name] = set()
300
# we want to emit start, finish, replacement_length, replacement_lines tuples.
301
diff_hunks[name] = []
302
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
303
parent_linenums[name] = 0
305
parent_noeols[name] = False
306
last_parent_lines[name] = None
307
new_inc = set([name])
308
for p in self._parents[i]:
309
new_inc.update(inclusions[self._idx_to_name(p)])
310
# debug only, known good so far.
311
#assert set(new_inc) == set(self.get_ancestry(name)), \
312
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
313
inclusions[name] = new_inc
315
nlines = len(self._weave)
317
for lineno, inserted, deletes, line in self._walk_internal():
318
# a line is active in a version if:
319
# insert is in the versions inclusions
321
# deleteset & the versions inclusions is an empty set.
322
# so - if we have a included by mapping - version is included by
323
# children, we get a list of children to examine for deletes affect
324
# ing them, which is less than the entire set of children.
325
for version_id in version_ids:
326
# The active inclusion must be an ancestor,
327
# and no ancestors must have deleted this line,
328
# because we don't support resurrection.
329
parent_inclusion = parent_inclusions[version_id]
330
inclusion = inclusions[version_id]
331
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
332
version_active = inserted in inclusion and not (deletes & inclusion)
333
if not parent_active and not version_active:
334
# unrelated line of ancestry
411
result[version_id] = parents
336
elif parent_active and version_active:
338
parent_linenum = parent_linenums[version_id]
339
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
340
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
342
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
343
parent_linenums[version_id] = parent_linenum
346
noeols[version_id] = True
349
elif parent_active and not version_active:
351
current_hunks[version_id][1] += 1
352
parent_linenums[version_id] += 1
353
last_parent_lines[version_id] = line
354
elif not parent_active and version_active:
356
# noeol only occurs at the end of a file because we
357
# diff linewise. We want to show noeol changes as a
358
# empty diff unless the actual eol-less content changed.
361
if last_parent_lines[version_id][-1] != '\n':
362
parent_noeols[version_id] = True
363
except (TypeError, IndexError):
366
if theline[-1] != '\n':
367
noeols[version_id] = True
371
parent_should_go = False
373
if parent_noeols[version_id] == noeols[version_id]:
374
# no noeol toggle, so trust the weaves statement
375
# that this line is changed.
377
if parent_noeols[version_id]:
378
theline = theline + '\n'
379
elif parent_noeols[version_id]:
380
# parent has no eol, we do:
381
# our line is new, report as such..
383
elif noeols[version_id]:
384
# append a eol so that it looks like
386
theline = theline + '\n'
387
if parents[version_id] is not None:
388
#if last_parent_lines[version_id] is not None:
389
parent_should_go = True
390
if last_parent_lines[version_id] != theline:
393
#parent_should_go = False
395
current_hunks[version_id][2] += 1
396
current_hunks[version_id][3].append((inserted, theline))
398
# last hunk last parent line is not eaten
399
current_hunks[version_id][1] -= 1
400
if current_hunks[version_id][1] < 0:
401
current_hunks[version_id][1] = 0
402
# import pdb;pdb.set_trace()
403
# assert current_hunks[version_id][1] >= 0
407
version = self._idx_to_name(i)
408
if current_hunks[version] != [0, 0, 0, []]:
409
diff_hunks[version].append(tuple(current_hunks[version]))
411
for version_id in version_ids:
412
result[version_id] = (
416
diff_hunks[version_id],
414
def get_parents_with_ghosts(self, version_id):
415
raise NotImplementedError(self.get_parents_with_ghosts)
417
def insert_record_stream(self, stream):
418
"""Insert a record stream into this versioned file.
420
:param stream: A stream of records to insert.
422
:seealso VersionedFile.get_record_stream:
425
for record in stream:
426
# Raise an error when a record is missing.
427
if record.storage_kind == 'absent':
428
raise RevisionNotPresent([record.key[0]], self)
429
# adapt to non-tuple interface
430
parents = [parent[0] for parent in record.parents]
431
if record.storage_kind in ('fulltext', 'chunked', 'lines'):
433
record.key[0], parents,
434
record.get_bytes_as('lines'))
436
adapter_key = record.storage_kind, 'lines'
438
adapter = adapters[adapter_key]
440
adapter_factory = adapter_registry.get(adapter_key)
441
adapter = adapter_factory(self)
442
adapters[adapter_key] = adapter
443
lines = adapter.get_bytes(record, 'lines')
445
self.add_lines(record.key[0], parents, lines)
446
except RevisionAlreadyPresent:
420
def get_parents(self, version_id):
421
"""See VersionedFile.get_parent."""
422
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
449
424
def _check_repeated_add(self, name, parents, text, sha1):
450
425
"""Check that a duplicated add is OK.
454
429
idx = self._lookup(name)
455
430
if sorted(self._parents[idx]) != sorted(parents) \
456
or sha1 != self._sha1s[idx]:
431
or sha1 != self._sha1s[idx]:
457
432
raise RevisionAlreadyPresent(name, self._weave_name)
460
def _add_lines(self, version_id, parents, lines, parent_texts,
461
left_matching_blocks, nostore_sha, random_id,
435
@deprecated_method(zero_eight)
436
def add_identical(self, old_rev_id, new_rev_id, parents):
437
"""Please use Weave.clone_text now."""
438
return self.clone_text(new_rev_id, old_rev_id, parents)
440
def _add_lines(self, version_id, parents, lines, parent_texts):
463
441
"""See VersionedFile.add_lines."""
464
idx = self._add(version_id, lines, list(map(self._lookup, parents)),
465
nostore_sha=nostore_sha)
466
return sha_strings(lines), sum(map(len, lines)), idx
468
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
442
return self._add(version_id, lines, map(self._lookup, parents))
444
@deprecated_method(zero_eight)
445
def add(self, name, parents, text, sha1=None):
446
"""See VersionedFile.add_lines for the non deprecated api."""
447
return self._add(name, text, map(self._maybe_lookup, parents), sha1)
449
def _add(self, version_id, lines, parents, sha1=None):
469
450
"""Add a single text on top of the weave.
471
452
Returns the index number of the newly added version.
474
455
Symbolic name for this version.
475
456
(Typically the revision-id of the revision that added it.)
476
If None, a name will be allocated based on the hash. (sha1:SHAHASH)
479
459
List or set of direct parent version numbers.
482
462
Sequence of lines to be added in the new version.
484
:param nostore_sha: See VersionedFile.add_lines.
465
assert isinstance(version_id, basestring)
486
466
self._check_lines_not_unicode(lines)
487
467
self._check_lines_are_lines(lines)
489
469
sha1 = sha_strings(lines)
490
if sha1 == nostore_sha:
491
raise errors.ExistingContent
492
if version_id is None:
493
version_id = b"sha1:" + sha1
494
470
if version_id in self._name_map:
495
471
return self._check_repeated_add(version_id, parents, lines, sha1)
497
473
self._check_versions(parents)
474
## self._check_lines(lines)
498
475
new_version = len(self._parents)
500
# if we abort after here the (in-memory) weave will be corrupt because
501
# only some fields are updated
477
# if we abort after here the (in-memory) weave will be corrupt because only
478
# some fields are updated
502
479
# XXX: FIXME implement a succeed-or-fail of the rest of this routine.
503
480
# - Robert Collins 20060226
504
481
self._parents.append(parents[:])
537
516
# another small special case: a merge, producing the same text
539
518
if lines == basis_lines:
542
# add a sentinel, because we can also match against the final line
521
# add a sentinal, because we can also match against the final line
543
522
basis_lineno.append(len(self._weave))
545
524
# XXX: which line of the weave should we really consider
546
525
# matches the end of the file? the current code says it's the
547
526
# last line of the weave?
549
# print 'basis_lines:', basis_lines
550
# print 'new_lines: ', lines
528
#print 'basis_lines:', basis_lines
529
#print 'new_lines: ', lines
552
s = self._matcher(None, basis_lines, lines)
531
s = SequenceMatcher(None, basis_lines, lines)
554
533
# offset gives the number of lines that have been inserted
555
# into the weave up to the current point; if the original edit
556
# instruction says to change line A then we actually change (A+offset)
534
# into the weave up to the current point; if the original edit instruction
535
# says to change line A then we actually change (A+offset)
559
538
for tag, i1, i2, j1, j2 in s.get_opcodes():
560
# i1,i2 are given in offsets within basis_lines; we need to map
561
# them back to offsets within the entire weave print 'raw match',
562
# tag, i1, i2, j1, j2
539
# i1,i2 are given in offsets within basis_lines; we need to map them
540
# back to offsets within the entire weave
541
#print 'raw match', tag, i1, i2, j1, j2
563
542
if tag == 'equal':
565
545
i1 = basis_lineno[i1]
566
546
i2 = basis_lineno[i2]
548
assert 0 <= j1 <= j2 <= len(lines)
550
#print tag, i1, i2, j1, j2
567
552
# the deletion and insertion are handled separately.
568
553
# first delete the region.
570
self._weave.insert(i1 + offset, (b'[', new_version))
571
self._weave.insert(i2 + offset + 1, (b']', new_version))
555
self._weave.insert(i1+offset, ('[', new_version))
556
self._weave.insert(i2+offset+1, (']', new_version))
576
561
# i2; we want to insert after this region to make sure
577
562
# we don't destroy ourselves
579
self._weave[i:i] = ([(b'{', new_version)] +
564
self._weave[i:i] = ([('{', new_version)]
582
567
offset += 2 + (j2 - j1)
583
568
return new_version
570
def _clone_text(self, new_version_id, old_version_id, parents):
571
"""See VersionedFile.clone_text."""
572
old_lines = self.get_text(old_version_id)
573
self.add_lines(new_version_id, parents, old_lines)
585
575
def _inclusions(self, versions):
586
576
"""Return set of all ancestors of given version(s)."""
587
577
if not len(versions):
589
579
i = set(versions)
590
for v in range(max(versions), 0, -1):
580
for v in xrange(max(versions), 0, -1):
592
582
# include all its parents
593
583
i.update(self._parents[v])
596
def get_ancestry(self, version_ids, topo_sorted=True):
585
## except IndexError:
586
## raise ValueError("version %d not present in weave" % v)
588
@deprecated_method(zero_eight)
589
def inclusions(self, version_ids):
590
"""Deprecated - see VersionedFile.get_ancestry for the replacement."""
593
if isinstance(version_ids[0], int):
594
return [self._idx_to_name(v) for v in self._inclusions(version_ids)]
596
return self.get_ancestry(version_ids)
598
def get_ancestry(self, version_ids):
597
599
"""See VersionedFile.get_ancestry."""
598
if isinstance(version_ids, bytes):
600
if isinstance(version_ids, basestring):
599
601
version_ids = [version_ids]
600
602
i = self._inclusions([self._lookup(v) for v in version_ids])
601
603
return [self._idx_to_name(v) for v in i]
605
def _check_lines(self, text):
606
if not isinstance(text, list):
607
raise ValueError("text should be a list, not %s" % type(text))
610
if not isinstance(l, basestring):
611
raise ValueError("text line should be a string or unicode, not %s"
603
616
def _check_versions(self, indexes):
604
617
"""Check everything in the sequence of indexes is valid"""
605
618
for i in indexes:
611
624
def _compatible_parents(self, my_parents, other_parents):
612
625
"""During join check that other_parents are joinable with my_parents.
614
Joinable is defined as 'is a subset of' - supersets may require
627
Joinable is defined as 'is a subset of' - supersets may require
615
628
regeneration of diffs, but subsets do not.
617
630
return len(other_parents.difference(my_parents)) == 0
619
632
def annotate(self, version_id):
620
"""Return a list of (version-id, line) tuples for version_id.
633
if isinstance(version_id, int):
634
warn('Weave.annotate(int) is deprecated. Please use version names'
635
' in all circumstances as of 0.8',
640
for origin, lineno, text in self._extract([version_id]):
641
result.append((origin, text))
644
return super(Weave, self).annotate(version_id)
646
def annotate_iter(self, version_id):
647
"""Yield list of (version-id, line) pairs for the specified version.
622
649
The index indicates when the line originated in the weave."""
623
650
incls = [self._lookup(version_id)]
624
return [(self._idx_to_name(origin), text) for origin, lineno, text in
625
self._extract(incls)]
627
def iter_lines_added_or_present_in_versions(self, version_ids=None,
651
for origin, lineno, text in self._extract(incls):
652
yield self._idx_to_name(origin), text
654
@deprecated_method(zero_eight)
656
"""_walk has become visit, a supported api."""
657
return self._walk_internal()
659
def iter_lines_added_or_present_in_versions(self, version_ids=None):
629
660
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
630
661
if version_ids is None:
631
662
version_ids = self.versions()
632
663
version_ids = set(version_ids)
633
for lineno, inserted, deletes, line in self._walk_internal(
635
if inserted not in version_ids:
637
if not line.endswith(b'\n'):
638
yield line + b'\n', inserted
664
for lineno, inserted, deletes, line in self._walk_internal(version_ids):
665
# if inserted not in version_ids then it was inserted before the
666
# versions we care about, but because weaves cannot represent ghosts
667
# properly, we do not filter down to that
668
# if inserted not in version_ids: continue
674
#@deprecated_method(zero_eight)
675
def walk(self, version_ids=None):
676
"""See VersionedFile.walk."""
677
return self._walk_internal(version_ids)
642
679
def _walk_internal(self, version_ids=None):
643
680
"""Helper method for weave actions."""
745
# 449 0 4474.6820 2356.5590 breezy.weave:556(_extract)
787
WFE = WeaveFormatError
790
# 449 0 4474.6820 2356.5590 bzrlib.weave:556(_extract)
746
791
# +285282 0 1676.8040 1676.8040 +<isinstance>
747
792
# 1.6 seconds in 'isinstance'.
748
793
# changing the first isinstance:
749
# 449 0 2814.2660 1577.1760 breezy.weave:556(_extract)
794
# 449 0 2814.2660 1577.1760 bzrlib.weave:556(_extract)
750
795
# +140414 0 762.8050 762.8050 +<isinstance>
751
796
# note that the inline time actually dropped (less function calls)
752
797
# and total processing time was halved.
753
798
# we're still spending ~1/4 of the method in isinstance though.
754
799
# so lets hard code the acceptable string classes we expect:
755
# 449 0 1202.9420 786.2930 breezy.weave:556(_extract)
756
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
800
# 449 0 1202.9420 786.2930 bzrlib.weave:556(_extract)
801
# +71352 0 377.5560 377.5560 +<method 'append' of 'list'
758
803
# yay, down to ~1/4 the initial extract time, and our inline time
759
804
# has shrunk again, with isinstance no longer dominating.
760
805
# tweaking the stack inclusion test to use a set gives:
761
# 449 0 1122.8030 713.0080 breezy.weave:556(_extract)
762
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
806
# 449 0 1122.8030 713.0080 bzrlib.weave:556(_extract)
807
# +71352 0 354.9980 354.9980 +<method 'append' of 'list'
764
809
# - a 5% win, or possibly just noise. However with large istacks that
765
# 'in' test could dominate, so I'm leaving this change in place - when
766
# its fast enough to consider profiling big datasets we can review.
810
# 'in' test could dominate, so I'm leaving this change in place -
811
# when its fast enough to consider profiling big datasets we can review.
768
816
for l in self._weave:
769
817
if l.__class__ == tuple:
776
825
iset.remove(istack.pop())
778
827
if v in included:
781
832
if v in included:
784
raise AssertionError()
836
assert l.__class__ in (str, unicode)
786
837
if isactive is None:
787
isactive = (not dset) and istack and (
788
istack[-1] in included)
838
isactive = (not dset) and istack and (istack[-1] in included)
790
840
result.append((istack[-1], lineno, l))
793
843
raise WeaveFormatError("unclosed insertion blocks "
794
"at end of weave: %s" % istack)
844
"at end of weave: %s" % istack)
796
raise WeaveFormatError(
797
"unclosed deletion blocks at end of weave: %s" % dset)
846
raise WeaveFormatError("unclosed deletion blocks at end of weave: %s"
850
@deprecated_method(zero_eight)
851
def get_iter(self, name_or_index):
852
"""Deprecated, please do not use. Lookups are not not needed.
854
Please use get_lines now.
856
return iter(self.get_lines(self._maybe_lookup(name_or_index)))
858
@deprecated_method(zero_eight)
859
def maybe_lookup(self, name_or_index):
860
"""Deprecated, please do not use. Lookups are not not needed."""
861
return self._maybe_lookup(name_or_index)
800
863
def _maybe_lookup(self, name_or_index):
801
864
"""Convert possible symbolic name to index, or pass through indexes.
803
866
NOT FOR PUBLIC USE.
805
# GZ 2017-04-01: This used to check for long as well, but I don't think
806
# there are python implementations with sys.maxsize > sys.maxint
807
if isinstance(name_or_index, int):
868
if isinstance(name_or_index, (int, long)):
808
869
return name_or_index
810
871
return self._lookup(name_or_index)
873
@deprecated_method(zero_eight)
874
def get(self, version_id):
875
"""Please use either Weave.get_text or Weave.get_lines as desired."""
876
return self.get_lines(version_id)
812
878
def get_lines(self, version_id):
813
879
"""See VersionedFile.get_lines()."""
814
880
int_index = self._maybe_lookup(version_id)
815
result = [line for (origin, lineno, line)
816
in self._extract([int_index])]
881
result = [line for (origin, lineno, line) in self._extract([int_index])]
817
882
expected_sha1 = self._sha1s[int_index]
818
883
measured_sha1 = sha_strings(result)
819
884
if measured_sha1 != expected_sha1:
820
raise WeaveInvalidChecksum(
821
'file %s, revision %s, expected: %s, measured %s'
822
% (self._weave_name, version_id,
823
expected_sha1, measured_sha1))
826
def get_sha1s(self, version_ids):
827
"""See VersionedFile.get_sha1s()."""
829
for v in version_ids:
830
result[v] = self._sha1s[self._lookup(v)]
885
raise errors.WeaveInvalidChecksum(
886
'file %s, revision %s, expected: %s, measured %s'
887
% (self._weave_name, version_id,
888
expected_sha1, measured_sha1))
891
def get_sha1(self, version_id):
892
"""See VersionedFile.get_sha1()."""
893
return self._sha1s[self._lookup(version_id)]
895
@deprecated_method(zero_eight)
896
def numversions(self):
897
"""How many versions are in this weave?
899
Deprecated in favour of num_versions.
901
return self.num_versions()
833
903
def num_versions(self):
834
904
"""How many versions are in this weave?"""
835
return len(self._parents)
905
l = len(self._parents)
906
assert l == len(self._sha1s)
837
909
__len__ = num_versions
886
955
# The active inclusion must be an ancestor,
887
956
# and no ancestors must have deleted this line,
888
957
# because we don't support resurrection.
889
if ((insert in name_inclusions) and
890
not (deleteset & name_inclusions)):
958
if (insert in name_inclusions) and not (deleteset & name_inclusions):
891
959
sha1s[name].update(line)
893
961
for i in range(nv):
894
962
version = self._idx_to_name(i)
895
hd = sha1s[version].hexdigest().encode()
963
hd = sha1s[version].hexdigest()
896
964
expected = self._sha1s[i]
897
965
if hd != expected:
898
raise WeaveInvalidChecksum(
899
"mismatched sha1 for version %s: "
900
"got %s, expected %s"
901
% (version, hd, expected))
966
raise errors.WeaveInvalidChecksum(
967
"mismatched sha1 for version %s: "
968
"got %s, expected %s"
969
% (version, hd, expected))
903
971
# TODO: check insertions are properly nested, that there are
904
972
# no lines outside of insertion blocks, that deletions are
905
973
# properly paired, etc.
975
def _join(self, other, pb, msg, version_ids, ignore_missing):
976
"""Worker routine for join()."""
977
if not other.versions():
978
return # nothing to update, easy
981
for version_id in version_ids:
982
if not other.has_version(version_id) and not ignore_missing:
983
raise RevisionNotPresent(version_id, self._weave_name)
985
version_ids = other.versions()
987
# two loops so that we do not change ourselves before verifying it
989
# work through in index order to make sure we get all dependencies
992
# get the selected versions only that are in other.versions.
993
version_ids = set(other.versions()).intersection(set(version_ids))
994
# pull in the referenced graph.
995
version_ids = other.get_ancestry(version_ids)
996
pending_graph = [(version, other.get_parents(version)) for
997
version in version_ids]
998
for name in topo_sort(pending_graph):
999
other_idx = other._name_map[name]
1000
# returns True if we have it, False if we need it.
1001
if not self._check_version_consistent(other, other_idx, name):
1002
names_to_join.append((other_idx, name))
1011
for other_idx, name in names_to_join:
1012
# TODO: If all the parents of the other version are already
1013
# present then we can avoid some work by just taking the delta
1014
# and adjusting the offsets.
1015
new_parents = self._imported_parents(other, other_idx)
1016
sha1 = other._sha1s[other_idx]
1021
pb.update(msg, merged, len(names_to_join))
1023
lines = other.get_lines(other_idx)
1024
self._add(name, lines, new_parents, sha1)
1026
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
1027
merged, processed, self._weave_name, time.time()-time0))
907
1029
def _imported_parents(self, other, other_idx):
908
1030
"""Return list of parents in self corresponding to indexes in other."""
909
1031
new_parents = []
966
1092
"""A WeaveFile represents a Weave on disk and writes on change."""
968
1094
WEAVE_SUFFIX = '.weave'
970
def __init__(self, name, transport, filemode=None, create=False,
971
access_mode='w', get_scope=None):
1096
def __init__(self, name, transport, filemode=None, create=False, access_mode='w'):
972
1097
"""Create a WeaveFile.
974
1099
:param create: If not True, only open an existing knit.
976
super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
977
allow_reserved=False)
1101
super(WeaveFile, self).__init__(name, access_mode)
978
1102
self._transport = transport
979
1103
self._filemode = filemode
981
f = self._transport.get(name + WeaveFile.WEAVE_SUFFIX)
982
_read_weave_v5(BytesIO(f.read()), self)
1105
_read_weave_v5(self._transport.get(name + WeaveFile.WEAVE_SUFFIX), self)
983
1106
except errors.NoSuchFile:
986
1109
# new file, save it
989
def _add_lines(self, version_id, parents, lines, parent_texts,
990
left_matching_blocks, nostore_sha, random_id,
1112
def _add_lines(self, version_id, parents, lines, parent_texts):
992
1113
"""Add a version and save the weave."""
993
self.check_not_reserved_id(version_id)
994
result = super(WeaveFile, self)._add_lines(
995
version_id, parents, lines, parent_texts, left_matching_blocks,
996
nostore_sha, random_id, check_content)
1114
result = super(WeaveFile, self)._add_lines(version_id, parents, lines,
1119
def _clone_text(self, new_version_id, old_version_id, parents):
1120
"""See VersionedFile.clone_text."""
1121
super(WeaveFile, self)._clone_text(new_version_id, old_version_id, parents)
1000
1124
def copy_to(self, name, transport):
1001
1125
"""See VersionedFile.copy_to()."""
1002
1126
# as we are all in memory always, just serialise to the new place.
1004
1128
write_weave_v5(self, sio)
1006
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1130
transport.put(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1132
def create_empty(self, name, transport, filemode=None):
1133
return WeaveFile(name, transport, filemode, create=True)
1008
1135
def _save(self):
1009
1136
"""Save the weave."""
1010
1137
self._check_write_ok()
1012
1139
write_weave_v5(self, sio)
1014
bytes = sio.getvalue()
1015
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
1017
self._transport.put_bytes(path, bytes, self._filemode)
1018
except errors.NoSuchFile:
1019
self._transport.mkdir(dirname(path))
1020
self._transport.put_bytes(path, bytes, self._filemode)
1141
self._transport.put(self._weave_name + WeaveFile.WEAVE_SUFFIX,
1023
1146
def get_suffixes():
1024
1147
"""See VersionedFile.get_suffixes()."""
1025
1148
return [WeaveFile.WEAVE_SUFFIX]
1027
def insert_record_stream(self, stream):
1028
super(WeaveFile, self).insert_record_stream(stream)
1150
def join(self, other, pb=None, msg=None, version_ids=None,
1151
ignore_missing=False):
1152
"""Join other into self and save."""
1153
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
1157
@deprecated_function(zero_eight)
1158
def reweave(wa, wb, pb=None, msg=None):
1159
"""reweaving is deprecation, please just use weave.join()."""
1160
_reweave(wa, wb, pb, msg)
1032
1162
def _reweave(wa, wb, pb=None, msg=None):
1033
1163
"""Combine two weaves and return the result.
1035
This works even if a revision R has different parents in
1165
This works even if a revision R has different parents in
1036
1166
wa and wb. In the resulting weave all the parents are given.
1038
This is done by just building up a new weave, maintaining ordering
1168
This is done by just building up a new weave, maintaining ordering
1039
1169
of the versions in the two inputs. More efficient approaches
1040
might be possible but it should only be necessary to do
1041
this operation rarely, when a new previously ghost version is
1170
might be possible but it should only be necessary to do
1171
this operation rarely, when a new previously ghost version is
1044
1174
:param pb: An optional progress bar, indicating how far done we are
1045
1175
:param msg: An optional message for the progress
1179
queue_a = range(wa.num_versions())
1180
queue_b = range(wb.num_versions())
1048
1181
# first determine combined parents of all versions
1049
1182
# map from version name -> all parent names
1050
1183
combined_parents = _reweave_parent_graphs(wa, wb)
1051
1184
mutter("combined parents: %r", combined_parents)
1052
order = tsort.topo_sort(combined_parents.items())
1185
order = topo_sort(combined_parents.iteritems())
1053
1186
mutter("order to reweave: %r", order)
1055
1188
if pb and not msg:
1086
1218
p = combined.setdefault(name, set())
1087
1219
p.update(map(weave._idx_to_name, weave._parents[idx]))
1088
1220
return combined
1224
"""Show the weave's table-of-contents"""
1225
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
1226
for i in (6, 50, 10, 10):
1229
for i in range(w.num_versions()):
1232
parent_str = ' '.join(map(str, w._parents[i]))
1233
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
1237
def weave_stats(weave_file, pb):
1238
from bzrlib.weavefile import read_weave
1240
wf = file(weave_file, 'rb')
1241
w = read_weave(wf, WeaveVersionedFile)
1242
# FIXME: doesn't work on pipes
1243
weave_size = wf.tell()
1247
for i in range(vers):
1248
pb.update('checking sizes', i, vers)
1249
for origin, lineno, line in w._extract([i]):
1254
print 'versions %9d' % vers
1255
print 'weave file %9d bytes' % weave_size
1256
print 'total contents %9d bytes' % total
1257
print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
1260
print 'average size %9d bytes' % avg
1261
print 'relative size %9.2fx' % (float(weave_size) / float(avg))
1265
print """bzr weave tool
1267
Experimental tool for weave algorithm.
1270
weave init WEAVEFILE
1271
Create an empty weave file
1272
weave get WEAVEFILE VERSION
1273
Write out specified version.
1274
weave check WEAVEFILE
1275
Check consistency of all versions.
1277
Display table of contents.
1278
weave add WEAVEFILE NAME [BASE...] < NEWTEXT
1279
Add NEWTEXT, with specified parent versions.
1280
weave annotate WEAVEFILE VERSION
1281
Display origin of each line.
1282
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
1283
Auto-merge two versions and display conflicts.
1284
weave diff WEAVEFILE VERSION1 VERSION2
1285
Show differences between two versions.
1289
% weave init foo.weave
1291
% weave add foo.weave ver0 < foo.txt
1294
(create updated version)
1296
% weave get foo.weave 0 | diff -u - foo.txt
1297
% weave add foo.weave ver1 0 < foo.txt
1300
% weave get foo.weave 0 > foo.txt (create forked version)
1302
% weave add foo.weave ver2 0 < foo.txt
1305
% weave merge foo.weave 1 2 > foo.txt (merge them)
1306
% vi foo.txt (resolve conflicts)
1307
% weave add foo.weave merged 1 2 < foo.txt (commit merged version)
1319
# in case we're run directly from the subdirectory
1320
sys.path.append('..')
1322
from bzrlib.weavefile import write_weave, read_weave
1323
from bzrlib.progress import ProgressBar
1338
return read_weave(file(argv[2], 'rb'))
1344
# at the moment, based on everything in the file
1346
parents = map(int, argv[4:])
1347
lines = sys.stdin.readlines()
1348
ver = w.add(name, parents, lines)
1349
write_weave(w, file(argv[2], 'wb'))
1350
print 'added version %r %d' % (name, ver)
1353
if os.path.exists(fn):
1354
raise IOError("file exists")
1356
write_weave(w, file(fn, 'wb'))
1357
elif cmd == 'get': # get one version
1359
sys.stdout.writelines(w.get_iter(int(argv[3])))
1362
from difflib import unified_diff
1365
v1, v2 = map(int, argv[3:5])
1368
diff_gen = unified_diff(lines1, lines2,
1369
'%s version %d' % (fn, v1),
1370
'%s version %d' % (fn, v2))
1371
sys.stdout.writelines(diff_gen)
1373
elif cmd == 'annotate':
1375
# newline is added to all lines regardless; too hard to get
1376
# reasonable formatting otherwise
1378
for origin, text in w.annotate(int(argv[3])):
1379
text = text.rstrip('\r\n')
1381
print ' | %s' % (text)
1383
print '%5d | %s' % (origin, text)
1389
elif cmd == 'stats':
1390
weave_stats(argv[2], ProgressBar())
1392
elif cmd == 'check':
1397
print '%d versions ok' % w.num_versions()
1399
elif cmd == 'inclusions':
1401
print ' '.join(map(str, w.inclusions([int(argv[3])])))
1403
elif cmd == 'parents':
1405
print ' '.join(map(str, w._parents[int(argv[3])]))
1407
elif cmd == 'plan-merge':
1408
# replaced by 'bzr weave-plan-merge'
1410
for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
1412
print '%14s | %s' % (state, line),
1413
elif cmd == 'merge':
1414
# replaced by 'bzr weave-merge-text'
1416
p = w.plan_merge(int(argv[3]), int(argv[4]))
1417
sys.stdout.writelines(w.weave_merge(p))
1419
raise ValueError('unknown command %r' % cmd)
1423
def profile_main(argv):
1424
import tempfile, hotshot, hotshot.stats
1426
prof_f = tempfile.NamedTemporaryFile()
1428
prof = hotshot.Profile(prof_f.name)
1430
ret = prof.runcall(main, argv)
1433
stats = hotshot.stats.load(prof_f.name)
1435
stats.sort_stats('cumulative')
1436
## XXX: Might like to write to stderr or the trace file instead but
1437
## print_stats seems hardcoded to stdout
1438
stats.print_stats(20)
1443
def lsprofile_main(argv):
1444
from bzrlib.lsprof import profile
1445
ret,stats = profile(main, argv)
1451
if __name__ == '__main__':
1453
if '--profile' in sys.argv:
1455
args.remove('--profile')
1456
sys.exit(profile_main(args))
1457
elif '--lsprof' in sys.argv:
1459
args.remove('--lsprof')
1460
sys.exit(lsprofile_main(args))
1462
sys.exit(main(sys.argv))
1465
class InterWeave(InterVersionedFile):
1466
"""Optimised code paths for weave to weave operations."""
1468
_matching_file_factory = staticmethod(WeaveFile)
1471
def is_compatible(source, target):
1472
"""Be compatible with weaves."""
1474
return (isinstance(source, Weave) and
1475
isinstance(target, Weave))
1476
except AttributeError:
1479
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1480
"""See InterVersionedFile.join."""
1481
if self.target.versions() == []:
1483
self.target._copy_weave_content(self.source)
1486
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1487
except errors.WeaveParentMismatch:
1488
self.target._reweave(self.source, pb, msg)
1491
InterVersionedFile.register_optimiser(InterWeave)