/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/weave.py

  • Committer: Andrew Bennetts
  • Date: 2008-09-08 12:59:00 UTC
  • mfrom: (3695 +trunk)
  • mto: This revision was merged to the branch mainline in revision 3756.
  • Revision ID: andrew.bennetts@canonical.com-20080908125900-8ywtsr7jqyyatjz0
Merge from bzr.dev.

Show diffs side-by-side

added added

removed removed

Lines of Context:
82
82
from bzrlib import (
83
83
    progress,
84
84
    )
85
 
from bzrlib.trace import mutter
86
85
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
87
86
        RevisionAlreadyPresent,
88
87
        RevisionNotPresent,
 
88
        UnavailableRepresentation,
89
89
        WeaveRevisionAlreadyPresent,
90
90
        WeaveRevisionNotPresent,
91
91
        )
92
92
import bzrlib.errors as errors
93
 
from bzrlib.osutils import sha_strings
 
93
from bzrlib.osutils import dirname, sha_strings, split_lines
94
94
import bzrlib.patiencediff
95
 
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
 
95
from bzrlib.revision import NULL_REVISION
 
96
from bzrlib.symbol_versioning import *
 
97
from bzrlib.trace import mutter
 
98
from bzrlib.tsort import topo_sort
 
99
from bzrlib.versionedfile import (
 
100
    AbsentContentFactory,
 
101
    adapter_registry,
 
102
    ContentFactory,
 
103
    VersionedFile,
 
104
    )
96
105
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
97
106
 
98
107
 
 
108
class WeaveContentFactory(ContentFactory):
 
109
    """Content factory for streaming from weaves.
 
110
 
 
111
    :seealso ContentFactory:
 
112
    """
 
113
 
 
114
    def __init__(self, version, weave):
 
115
        """Create a WeaveContentFactory for version from weave."""
 
116
        ContentFactory.__init__(self)
 
117
        self.sha1 = weave.get_sha1s([version])[version]
 
118
        self.key = (version,)
 
119
        parents = weave.get_parent_map([version])[version]
 
120
        self.parents = tuple((parent,) for parent in parents)
 
121
        self.storage_kind = 'fulltext'
 
122
        self._weave = weave
 
123
 
 
124
    def get_bytes_as(self, storage_kind):
 
125
        if storage_kind == 'fulltext':
 
126
            return self._weave.get_text(self.key[-1])
 
127
        else:
 
128
            raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
 
129
 
 
130
 
99
131
class Weave(VersionedFile):
100
132
    """weave - versioned text file storage.
101
133
    
186
218
    """
187
219
 
188
220
    __slots__ = ['_weave', '_parents', '_sha1s', '_names', '_name_map',
189
 
                 '_weave_name', '_matcher']
 
221
                 '_weave_name', '_matcher', '_allow_reserved']
190
222
    
191
 
    def __init__(self, weave_name=None, access_mode='w', matcher=None):
 
223
    def __init__(self, weave_name=None, access_mode='w', matcher=None,
 
224
                 get_scope=None, allow_reserved=False):
 
225
        """Create a weave.
 
226
 
 
227
        :param get_scope: A callable that returns an opaque object to be used
 
228
            for detecting when this weave goes out of scope (should stop
 
229
            answering requests or allowing mutation).
 
230
        """
192
231
        super(Weave, self).__init__(access_mode)
193
232
        self._weave = []
194
233
        self._parents = []
200
239
            self._matcher = bzrlib.patiencediff.PatienceSequenceMatcher
201
240
        else:
202
241
            self._matcher = matcher
 
242
        if get_scope is None:
 
243
            get_scope = lambda:None
 
244
        self._get_scope = get_scope
 
245
        self._scope = get_scope()
 
246
        self._access_mode = access_mode
 
247
        self._allow_reserved = allow_reserved
203
248
 
204
249
    def __repr__(self):
205
250
        return "Weave(%r)" % self._weave_name
206
251
 
 
252
    def _check_write_ok(self):
 
253
        """Is the versioned file marked as 'finished' ? Raise if it is."""
 
254
        if self._get_scope() != self._scope:
 
255
            raise errors.OutSideTransaction()
 
256
        if self._access_mode != 'w':
 
257
            raise errors.ReadOnlyObjectDirtiedError(self)
 
258
 
207
259
    def copy(self):
208
260
        """Return a deep copy of self.
209
261
        
232
284
 
233
285
    def _lookup(self, name):
234
286
        """Convert symbolic version name to index."""
235
 
        self.check_not_reserved_id(name)
 
287
        if not self._allow_reserved:
 
288
            self.check_not_reserved_id(name)
236
289
        try:
237
290
            return self._name_map[name]
238
291
        except KeyError:
248
301
 
249
302
    __contains__ = has_version
250
303
 
 
304
    def get_record_stream(self, versions, ordering, include_delta_closure):
 
305
        """Get a stream of records for versions.
 
306
 
 
307
        :param versions: The versions to include. Each version is a tuple
 
308
            (version,).
 
309
        :param ordering: Either 'unordered' or 'topological'. A topologically
 
310
            sorted stream has compression parents strictly before their
 
311
            children.
 
312
        :param include_delta_closure: If True then the closure across any
 
313
            compression parents will be included (in the opaque data).
 
314
        :return: An iterator of ContentFactory objects, each of which is only
 
315
            valid until the iterator is advanced.
 
316
        """
 
317
        versions = [version[-1] for version in versions]
 
318
        if ordering == 'topological':
 
319
            parents = self.get_parent_map(versions)
 
320
            new_versions = topo_sort(parents)
 
321
            new_versions.extend(set(versions).difference(set(parents)))
 
322
            versions = new_versions
 
323
        for version in versions:
 
324
            if version in self:
 
325
                yield WeaveContentFactory(version, self)
 
326
            else:
 
327
                yield AbsentContentFactory((version,))
 
328
 
251
329
    def get_parent_map(self, version_ids):
252
330
        """See VersionedFile.get_parent_map."""
253
331
        result = {}
254
332
        for version_id in version_ids:
255
 
            try:
256
 
                result[version_id] = tuple(
257
 
                    map(self._idx_to_name, self._parents[self._lookup(version_id)]))
258
 
            except RevisionNotPresent:
259
 
                pass
 
333
            if version_id == NULL_REVISION:
 
334
                parents = ()
 
335
            else:
 
336
                try:
 
337
                    parents = tuple(
 
338
                        map(self._idx_to_name,
 
339
                            self._parents[self._lookup(version_id)]))
 
340
                except RevisionNotPresent:
 
341
                    continue
 
342
            result[version_id] = parents
260
343
        return result
261
344
 
262
345
    def get_parents_with_ghosts(self, version_id):
263
346
        raise NotImplementedError(self.get_parents_with_ghosts)
264
347
 
 
348
    def insert_record_stream(self, stream):
 
349
        """Insert a record stream into this versioned file.
 
350
 
 
351
        :param stream: A stream of records to insert. 
 
352
        :return: None
 
353
        :seealso VersionedFile.get_record_stream:
 
354
        """
 
355
        adapters = {}
 
356
        for record in stream:
 
357
            # Raise an error when a record is missing.
 
358
            if record.storage_kind == 'absent':
 
359
                raise RevisionNotPresent([record.key[0]], self)
 
360
            # adapt to non-tuple interface
 
361
            parents = [parent[0] for parent in record.parents]
 
362
            if record.storage_kind == 'fulltext':
 
363
                self.add_lines(record.key[0], parents,
 
364
                    split_lines(record.get_bytes_as('fulltext')))
 
365
            else:
 
366
                adapter_key = record.storage_kind, 'fulltext'
 
367
                try:
 
368
                    adapter = adapters[adapter_key]
 
369
                except KeyError:
 
370
                    adapter_factory = adapter_registry.get(adapter_key)
 
371
                    adapter = adapter_factory(self)
 
372
                    adapters[adapter_key] = adapter
 
373
                lines = split_lines(adapter.get_bytes(
 
374
                    record, record.get_bytes_as(record.storage_kind)))
 
375
                try:
 
376
                    self.add_lines(record.key[0], parents, lines)
 
377
                except RevisionAlreadyPresent:
 
378
                    pass
 
379
 
265
380
    def _check_repeated_add(self, name, parents, text, sha1):
266
381
        """Check that a duplicated add is OK.
267
382
 
297
412
 
298
413
        :param nostore_sha: See VersionedFile.add_lines.
299
414
        """
300
 
        assert isinstance(version_id, basestring)
301
415
        self._check_lines_not_unicode(lines)
302
416
        self._check_lines_are_lines(lines)
303
417
        if not sha1:
378
492
            #print 'raw match', tag, i1, i2, j1, j2
379
493
            if tag == 'equal':
380
494
                continue
381
 
 
382
495
            i1 = basis_lineno[i1]
383
496
            i2 = basis_lineno[i2]
384
 
 
385
 
            assert 0 <= j1 <= j2 <= len(lines)
386
 
 
387
 
            #print tag, i1, i2, j1, j2
388
 
 
389
497
            # the deletion and insertion are handled separately.
390
498
            # first delete the region.
391
499
            if i1 != i2:
404
512
                offset += 2 + (j2 - j1)
405
513
        return new_version
406
514
 
407
 
    def _clone_text(self, new_version_id, old_version_id, parents):
408
 
        """See VersionedFile.clone_text."""
409
 
        old_lines = self.get_text(old_version_id)
410
 
        self.add_lines(new_version_id, parents, old_lines)
411
 
 
412
515
    def _inclusions(self, versions):
413
516
        """Return set of all ancestors of given version(s)."""
414
517
        if not len(versions):
456
559
        """
457
560
        return len(other_parents.difference(my_parents)) == 0
458
561
 
459
 
    def annotate_iter(self, version_id):
460
 
        """Yield list of (version-id, line) pairs for the specified version.
 
562
    def annotate(self, version_id):
 
563
        """Return a list of (version-id, line) tuples for version_id.
461
564
 
462
565
        The index indicates when the line originated in the weave."""
463
566
        incls = [self._lookup(version_id)]
464
 
        for origin, lineno, text in self._extract(incls):
465
 
            yield self._idx_to_name(origin), text
 
567
        return [(self._idx_to_name(origin), text) for origin, lineno, text in
 
568
            self._extract(incls)]
466
569
 
467
570
    def iter_lines_added_or_present_in_versions(self, version_ids=None,
468
571
                                                pb=None):
497
600
                elif c == '}':
498
601
                    istack.pop()
499
602
                elif c == '[':
500
 
                    assert self._names[v] not in dset
501
603
                    dset.add(self._names[v])
502
604
                elif c == ']':
503
605
                    dset.remove(self._names[v])
504
606
                else:
505
607
                    raise WeaveFormatError('unexpected instruction %r' % v)
506
608
            else:
507
 
                assert l.__class__ in (str, unicode)
508
 
                assert istack
509
609
                yield lineno, istack[-1], frozenset(dset), l
510
610
            lineno += 1
511
611
 
560
660
                # not in either revision
561
661
                yield 'irrelevant', line
562
662
 
563
 
        yield 'unchanged', ''           # terminator
564
 
 
565
663
    def _extract(self, versions):
566
664
        """Yield annotation of lines in included set.
567
665
 
621
719
                c, v = l
622
720
                isactive = None
623
721
                if c == '{':
624
 
                    assert v not in iset
625
722
                    istack.append(v)
626
723
                    iset.add(v)
627
724
                elif c == '}':
628
725
                    iset.remove(istack.pop())
629
726
                elif c == '[':
630
727
                    if v in included:
631
 
                        assert v not in dset
632
728
                        dset.add(v)
633
 
                else:
634
 
                    assert c == ']'
 
729
                elif c == ']':
635
730
                    if v in included:
636
 
                        assert v in dset
637
731
                        dset.remove(v)
 
732
                else:
 
733
                    raise AssertionError()
638
734
            else:
639
 
                assert l.__class__ in (str, unicode)
640
735
                if isactive is None:
641
736
                    isactive = (not dset) and istack and (istack[-1] in included)
642
737
                if isactive:
673
768
                       expected_sha1, measured_sha1))
674
769
        return result
675
770
 
676
 
    def get_sha1(self, version_id):
677
 
        """See VersionedFile.get_sha1()."""
678
 
        return self._sha1s[self._lookup(version_id)]
679
 
 
680
771
    def get_sha1s(self, version_ids):
681
772
        """See VersionedFile.get_sha1s()."""
682
 
        return [self._sha1s[self._lookup(v)] for v in version_ids]
 
773
        result = {}
 
774
        for v in version_ids:
 
775
            result[v] = self._sha1s[self._lookup(v)]
 
776
        return result
683
777
 
684
778
    def num_versions(self):
685
779
        """How many versions are in this weave?"""
686
780
        l = len(self._parents)
687
 
        assert l == len(self._sha1s)
688
781
        return l
689
782
 
690
783
    __len__ = num_versions
716
809
            for p in self._parents[i]:
717
810
                new_inc.update(inclusions[self._idx_to_name(p)])
718
811
 
719
 
            assert set(new_inc) == set(self.get_ancestry(name)), \
720
 
                'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
 
812
            if set(new_inc) != set(self.get_ancestry(name)):
 
813
                raise AssertionError(
 
814
                    'failed %s != %s' 
 
815
                    % (set(new_inc), set(self.get_ancestry(name))))
721
816
            inclusions[name] = new_inc
722
817
 
723
818
        nlines = len(self._weave)
753
848
        # no lines outside of insertion blocks, that deletions are
754
849
        # properly paired, etc.
755
850
 
756
 
    def _join(self, other, pb, msg, version_ids, ignore_missing):
757
 
        """Worker routine for join()."""
758
 
        if not other.versions():
759
 
            return          # nothing to update, easy
760
 
 
761
 
        if not version_ids:
762
 
            # versions is never none, InterWeave checks this.
763
 
            return 0
764
 
 
765
 
        # two loops so that we do not change ourselves before verifying it
766
 
        # will be ok
767
 
        # work through in index order to make sure we get all dependencies
768
 
        names_to_join = []
769
 
        processed = 0
770
 
        # get the selected versions only that are in other.versions.
771
 
        version_ids = set(other.versions()).intersection(set(version_ids))
772
 
        # pull in the referenced graph.
773
 
        version_ids = other.get_ancestry(version_ids)
774
 
        pending_parents = other.get_parent_map(version_ids)
775
 
        pending_graph = pending_parents.items()
776
 
        if len(pending_graph) != len(version_ids):
777
 
            raise RevisionNotPresent(
778
 
                set(version_ids) - set(pending_parents.keys()), self)
779
 
        for name in tsort.topo_sort(pending_graph):
780
 
            other_idx = other._name_map[name]
781
 
            # returns True if we have it, False if we need it.
782
 
            if not self._check_version_consistent(other, other_idx, name):
783
 
                names_to_join.append((other_idx, name))
784
 
            processed += 1
785
 
 
786
 
        if pb and not msg:
787
 
            msg = 'weave join'
788
 
 
789
 
        merged = 0
790
 
        time0 = time.time()
791
 
        for other_idx, name in names_to_join:
792
 
            # TODO: If all the parents of the other version are already
793
 
            # present then we can avoid some work by just taking the delta
794
 
            # and adjusting the offsets.
795
 
            new_parents = self._imported_parents(other, other_idx)
796
 
            sha1 = other._sha1s[other_idx]
797
 
 
798
 
            merged += 1
799
 
 
800
 
            if pb:
801
 
                pb.update(msg, merged, len(names_to_join))
802
 
           
803
 
            lines = other.get_lines(other_idx)
804
 
            self._add(name, lines, new_parents, sha1)
805
 
 
806
 
        mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
807
 
                merged, processed, self._weave_name, time.time()-time0))
808
 
 
809
851
    def _imported_parents(self, other, other_idx):
810
852
        """Return list of parents in self corresponding to indexes in other."""
811
853
        new_parents = []
868
910
 
869
911
    WEAVE_SUFFIX = '.weave'
870
912
    
871
 
    def __init__(self, name, transport, filemode=None, create=False, access_mode='w'):
 
913
    def __init__(self, name, transport, filemode=None, create=False, access_mode='w', get_scope=None):
872
914
        """Create a WeaveFile.
873
915
        
874
916
        :param create: If not True, only open an existing knit.
875
917
        """
876
 
        super(WeaveFile, self).__init__(name, access_mode)
 
918
        super(WeaveFile, self).__init__(name, access_mode, get_scope=get_scope,
 
919
            allow_reserved=False)
877
920
        self._transport = transport
878
921
        self._filemode = filemode
879
922
        try:
894
937
        self._save()
895
938
        return result
896
939
 
897
 
    def _clone_text(self, new_version_id, old_version_id, parents):
898
 
        """See VersionedFile.clone_text."""
899
 
        super(WeaveFile, self)._clone_text(new_version_id, old_version_id, parents)
900
 
        self._save
901
 
 
902
940
    def copy_to(self, name, transport):
903
941
        """See VersionedFile.copy_to()."""
904
942
        # as we are all in memory always, just serialise to the new place.
913
951
        sio = StringIO()
914
952
        write_weave_v5(self, sio)
915
953
        sio.seek(0)
916
 
        self._transport.put_file(self._weave_name + WeaveFile.WEAVE_SUFFIX,
917
 
                                 sio,
918
 
                                 self._filemode)
 
954
        bytes = sio.getvalue()
 
955
        path = self._weave_name + WeaveFile.WEAVE_SUFFIX
 
956
        try:
 
957
            self._transport.put_bytes(path, bytes, self._filemode)
 
958
        except errors.NoSuchFile:
 
959
            self._transport.mkdir(dirname(path))
 
960
            self._transport.put_bytes(path, bytes, self._filemode)
919
961
 
920
962
    @staticmethod
921
963
    def get_suffixes():
922
964
        """See VersionedFile.get_suffixes()."""
923
965
        return [WeaveFile.WEAVE_SUFFIX]
924
966
 
 
967
    def insert_record_stream(self, stream):
 
968
        super(WeaveFile, self).insert_record_stream(stream)
 
969
        self._save()
 
970
 
 
971
    @deprecated_method(one_five)
925
972
    def join(self, other, pb=None, msg=None, version_ids=None,
926
973
             ignore_missing=False):
927
974
        """Join other into self and save."""
1191
1238
if __name__ == '__main__':
1192
1239
    import sys
1193
1240
    sys.exit(main(sys.argv))
1194
 
 
1195
 
 
1196
 
class InterWeave(InterVersionedFile):
1197
 
    """Optimised code paths for weave to weave operations."""
1198
 
    
1199
 
    _matching_file_from_factory = staticmethod(WeaveFile)
1200
 
    _matching_file_to_factory = staticmethod(WeaveFile)
1201
 
    
1202
 
    @staticmethod
1203
 
    def is_compatible(source, target):
1204
 
        """Be compatible with weaves."""
1205
 
        try:
1206
 
            return (isinstance(source, Weave) and
1207
 
                    isinstance(target, Weave))
1208
 
        except AttributeError:
1209
 
            return False
1210
 
 
1211
 
    def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1212
 
        """See InterVersionedFile.join."""
1213
 
        version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1214
 
        if self.target.versions() == [] and version_ids is None:
1215
 
            self.target._copy_weave_content(self.source)
1216
 
            return
1217
 
        self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1218
 
 
1219
 
 
1220
 
InterVersionedFile.register_optimiser(InterWeave)