/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.5 by Ian Clatworthy
first cut at generic processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Parameterised loading of revisions into a repository."""
18
19
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
20
from bzrlib import errors, knit, lru_cache, osutils
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
21
from bzrlib import revision as _mod_revision
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
23
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
class RevisionLoader(object):
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    # NOTE: This is effectively bzrlib.repository._install_revision
26
    # refactored to be a class. When importing, we want more flexibility
27
    # in how previous revisions are cached, data is feed in, etc.
0.64.5 by Ian Clatworthy
first cut at generic processing method
28
0.64.48 by Ian Clatworthy
one revision loader instance
29
    def __init__(self, repo):
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
        """An object responsible for loading revisions into a repository.
31
32
        NOTE: Repository locking is not managed by this class. Clients
33
        should take a write lock, call load() multiple times, then release
34
        the lock.
35
36
        :param repository: the target repository
0.64.48 by Ian Clatworthy
one revision loader instance
37
        """
38
        self.repo = repo
39
40
    def load(self, rev, inv, signature, text_provider,
41
        inventories_provider=None):
42
        """Load a revision into a repository.
43
44
        :param rev: the Revision
45
        :param inv: the inventory
46
        :param signature: signing information
47
        :param text_provider: a callable expecting a file_id parameter
48
            that returns the text for that file-id
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
        :param inventories_provider: a callable expecting a repository and
50
            a list of revision-ids, that returns:
51
              * the list of revision-ids present in the repository
52
              * the list of inventories for the revision-id's,
53
                including an empty inventory for the missing revisions
54
            If None, a default implementation is provided.
55
        """
0.64.48 by Ian Clatworthy
one revision loader instance
56
        if inventories_provider is None:
57
            inventories_provider = self._default_inventories_provider
58
        present_parents, parent_invs = inventories_provider(rev.parent_ids)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
59
        self._load_texts(rev.revision_id, inv.iter_entries(), parent_invs,
0.64.5 by Ian Clatworthy
first cut at generic processing method
60
            text_provider)
61
        try:
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
62
            rev.inventory_sha1 = self._add_inventory(rev.revision_id,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
63
                inv, present_parents)
0.64.5 by Ian Clatworthy
first cut at generic processing method
64
        except errors.RevisionAlreadyPresent:
65
            pass
66
        if signature is not None:
67
            self.repo.add_signature_text(rev.revision_id, signature)
68
        self.repo.add_revision(rev.revision_id, rev, inv)
69
70
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
71
        """Load texts to a repository for inventory entries.
72
        
73
        This method is provided for subclasses to use or override.
74
75
        :param revision_id: the revision identifier
76
        :param entries: iterator over the inventory entries
77
        :param parent_inv: the parent inventories
78
        :param text_provider: a callable expecting a file_id parameter
79
            that returns the text for that file-id
80
        """
81
82
        # Backwards compatibility hack: skip the root id.
83
        if not self.repo.supports_rich_root():
84
            path, root = entries.next()
85
            if root.revision != revision_id:
86
                raise errors.IncompatibleRevision(repr(self.repo))
87
        # Add the texts that are not already present
0.64.27 by Ian Clatworthy
1st cut at performance tuning
88
        tx = self.repo.get_transaction()
0.64.5 by Ian Clatworthy
first cut at generic processing method
89
        for path, ie in entries:
0.64.27 by Ian Clatworthy
1st cut at performance tuning
90
            # This test is *really* slow: over 50% of import time
91
            #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
92
            #if ie.revision in w:
93
            #    continue
0.64.29 by Ian Clatworthy
improve explanation of faster check in revisionloader
94
            # Try another way, realising that this assumes that the
95
            # version is not already there. In the general case,
96
            # a shared repository might already have the revision but
97
            # we arguably don't need that check when importing from
98
            # a foreign system.
0.64.27 by Ian Clatworthy
1st cut at performance tuning
99
            if ie.revision != revision_id:
100
                continue
101
            text_parents = []
102
            for parent_inv in parent_invs:
103
                if ie.file_id not in parent_inv:
104
                    continue
105
                parent_id = parent_inv[ie.file_id].revision
106
                if parent_id in text_parents:
107
                    continue
108
                text_parents.append(parent_id)
109
            vfile = self.repo.weave_store.get_weave_or_empty(ie.file_id,  tx)
110
            lines = text_provider(ie.file_id)
111
            vfile.add_lines(revision_id, text_parents, lines)
0.64.5 by Ian Clatworthy
first cut at generic processing method
112
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
113
    def _add_inventory(self, revision_id, inv, parents):
114
        """Add the inventory inv to the repository as revision_id.
115
        
116
        :param parents: The revision ids of the parents that revision_id
117
                        is known to have and are in the repository already.
118
119
        :returns: The validator(which is a sha1 digest, though what is sha'd is
120
            repository format specific) of the serialized inventory.
121
        """
122
        return self.repo.add_inventory(revision_id, inv, parents)
123
0.64.5 by Ian Clatworthy
first cut at generic processing method
124
    def _default_inventories_provider(self, revision_ids):
125
        """An inventories provider that queries the repository."""
126
        present = []
127
        inventories = []
128
        for revision_id in revision_ids:
129
            if self.repo.has_revision(revision_id):
130
                present.append(revision_id)
131
                rev_tree = self.repo.revision_tree(revision_id)
132
            else:
133
                rev_tree = self.repo.revision_tree(None)
134
            inventories.append(rev_tree.inventory)
135
        return present, inventories
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
136
137
138
class ImportRevisionLoader(RevisionLoader):
139
    """A RevisionLoader optimised for importing.
140
        
0.64.48 by Ian Clatworthy
one revision loader instance
141
    This implementation caches serialised inventory texts.
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
142
    """
143
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
144
    def __init__(self, repo, parent_texts_to_cache=1, random_ids=True):
0.64.48 by Ian Clatworthy
one revision loader instance
145
        """See RevisionLoader.__init__.
146
147
        :param repository: the target repository
148
        :param parent_text_to_cache: the number of parent texts to cache
149
        """
150
        RevisionLoader.__init__(self, repo)
151
        self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
152
        self.random_ids = random_ids
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
153
154
    def _add_inventory(self, revision_id, inv, parents):
155
        """See RevisionLoader._add_inventory."""
156
        # Code taken from bzrlib.repository.add_inventory
157
        assert self.repo.is_in_write_group()
158
        _mod_revision.check_not_reserved_id(revision_id)
159
        assert inv.revision_id is None or inv.revision_id == revision_id, \
160
            "Mismatch between inventory revision" \
161
            " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
162
        assert inv.root is not None
163
        inv_lines = self.repo._serialise_inventory_to_lines(inv)
164
        inv_vf = self.repo.get_inventory_weave()
165
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
166
        sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
167
            revision_id, parents, inv_lines, self.inv_parent_texts)
168
        self.inv_parent_texts[revision_id] = parent_text
169
        return sha1
170
171
    def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
172
            parent_texts):
173
        """See Repository._inventory_add_lines()."""
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
174
        final_parents = []
175
        for parent in parents:
176
            if parent in inv_vf:
177
                final_parents.append(parent)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
178
        return inv_vf.add_lines(version_id, final_parents, lines, parent_texts,
179
            random_id=self.random_ids, check_content=False)
180
181
182
class ExperimentalRevisionLoader(ImportRevisionLoader):
183
    """A RevisionLoader over optimised for importing.
184
        
185
    WARNING: This implementation uses undoumented bzrlib internals.
186
    It may not work in the future. In fact, it may not work now as
0.64.52 by Ian Clatworthy
switch on experimental mode by default
187
    it is an incubator for experimental code.
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
188
    """
189
190
    def __init__(self, repo, parent_texts_to_cache=1, fulltext_every=200):
191
        """See ImportRevisionLoader.__init__.
192
        
193
        :para fulltext_every: how often to store an inventory fulltext
194
        """
195
        ImportRevisionLoader.__init__(self, repo, parent_texts_to_cache)
196
        self.revision_count = 0
197
        self.fulltext_every = fulltext_every
198
199
    def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
200
            parent_texts):
201
        """See Repository._inventory_add_lines()."""
202
        # setup parameters used in original code but not this API
203
        self.revision_count += 1
204
        if self.revision_count % self.fulltext_every == 0:
205
            delta = False
206
        else:
207
            delta = inv_vf.delta
208
        left_matching_blocks = None
209
        random_id = self.random_ids
210
        check_content = False
211
212
        # bzrlib.knit.add_lines() but error checking optimised
213
        inv_vf._check_add(version_id, lines, random_id, check_content)
214
215
        ####################################################################
216
        # bzrlib.knit._add() but skip checking if fulltext better than delta
217
        ####################################################################
218
219
        line_bytes = ''.join(lines)
220
        digest = osutils.sha_string(line_bytes)
221
        present_parents = []
222
        for parent in parents:
223
            if inv_vf.has_version(parent):
224
                present_parents.append(parent)
225
        if parent_texts is None:
226
            parent_texts = {}
227
228
        # can only compress against the left most present parent.
229
        if (delta and
230
            (len(present_parents) == 0 or
231
             present_parents[0] != parents[0])):
232
            delta = False
233
234
        text_length = len(line_bytes)
235
        options = []
236
        if lines:
237
            if lines[-1][-1] != '\n':
238
                # copy the contents of lines.
239
                lines = lines[:]
240
                options.append('no-eol')
241
                lines[-1] = lines[-1] + '\n'
242
                line_bytes += '\n'
243
244
        #if delta:
245
        #    # To speed the extract of texts the delta chain is limited
246
        #    # to a fixed number of deltas.  This should minimize both
247
        #    # I/O and the time spend applying deltas.
248
        #    delta = inv_vf._check_should_delta(present_parents)
249
250
        assert isinstance(version_id, str)
251
        content = inv_vf.factory.make(lines, version_id)
252
        if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
253
            # Merge annotations from parent texts if needed.
254
            delta_hunks = inv_vf._merge_annotations(content, present_parents,
255
                parent_texts, delta, inv_vf.factory.annotated,
256
                left_matching_blocks)
257
258
        if delta:
259
            options.append('line-delta')
260
            store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
261
            size, bytes = inv_vf._data._record_to_data(version_id, digest,
262
                store_lines)
263
        else:
264
            options.append('fulltext')
265
            # isinstance is slower and we have no hierarchy.
266
            if inv_vf.factory.__class__ == knit.KnitPlainFactory:
267
                # Use the already joined bytes saving iteration time in
268
                # _record_to_data.
269
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
270
                    lines, [line_bytes])
271
            else:
272
                # get mixed annotation + content and feed it into the
273
                # serialiser.
274
                store_lines = inv_vf.factory.lower_fulltext(content)
275
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
276
                    store_lines)
277
278
        access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
279
        inv_vf._index.add_versions(
280
            ((version_id, options, access_memo, parents),),
281
            random_id=random_id)
282
        return digest, text_length, content