/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.5 by Ian Clatworthy
first cut at generic processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Parameterised loading of revisions into a repository."""
18
19
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
20
from bzrlib import errors, knit, lru_cache, osutils
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
21
from bzrlib import revision as _mod_revision
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
23
0.64.5 by Ian Clatworthy
first cut at generic processing method
24
class RevisionLoader(object):
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    # NOTE: This is effectively bzrlib.repository._install_revision
26
    # refactored to be a class. When importing, we want more flexibility
27
    # in how previous revisions are cached, data is feed in, etc.
0.64.5 by Ian Clatworthy
first cut at generic processing method
28
0.64.48 by Ian Clatworthy
one revision loader instance
29
    def __init__(self, repo):
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
        """An object responsible for loading revisions into a repository.
31
32
        NOTE: Repository locking is not managed by this class. Clients
33
        should take a write lock, call load() multiple times, then release
34
        the lock.
35
36
        :param repository: the target repository
0.64.48 by Ian Clatworthy
one revision loader instance
37
        """
38
        self.repo = repo
39
40
    def load(self, rev, inv, signature, text_provider,
41
        inventories_provider=None):
42
        """Load a revision into a repository.
43
44
        :param rev: the Revision
45
        :param inv: the inventory
46
        :param signature: signing information
47
        :param text_provider: a callable expecting a file_id parameter
48
            that returns the text for that file-id
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
        :param inventories_provider: a callable expecting a repository and
50
            a list of revision-ids, that returns:
51
              * the list of revision-ids present in the repository
52
              * the list of inventories for the revision-id's,
53
                including an empty inventory for the missing revisions
54
            If None, a default implementation is provided.
55
        """
0.64.48 by Ian Clatworthy
one revision loader instance
56
        if inventories_provider is None:
57
            inventories_provider = self._default_inventories_provider
58
        present_parents, parent_invs = inventories_provider(rev.parent_ids)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
59
        self._load_texts(rev.revision_id, inv.iter_entries(), parent_invs,
0.64.5 by Ian Clatworthy
first cut at generic processing method
60
            text_provider)
61
        try:
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
62
            rev.inventory_sha1 = self._add_inventory(rev.revision_id,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
63
                inv, present_parents)
0.64.5 by Ian Clatworthy
first cut at generic processing method
64
        except errors.RevisionAlreadyPresent:
65
            pass
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
66
        repo = self.repo
0.64.5 by Ian Clatworthy
first cut at generic processing method
67
        if signature is not None:
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
68
            repo.add_signature_text(rev.revision_id, signature)
69
        # repo.add_revision(rev.revision_id, rev, inv)
70
        # There's no need to do everything repo.add_revision does and
71
        # doing so (since bzr.dev 3392) can be pretty slow for long
72
        # delta chains on inventories. Just do the essentials here ...
73
        _mod_revision.check_not_reserved_id(rev.revision_id)
74
        repo._revision_store.add_revision(rev, repo.get_transaction())
0.64.5 by Ian Clatworthy
first cut at generic processing method
75
76
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
77
        """Load texts to a repository for inventory entries.
78
        
79
        This method is provided for subclasses to use or override.
80
81
        :param revision_id: the revision identifier
82
        :param entries: iterator over the inventory entries
83
        :param parent_inv: the parent inventories
84
        :param text_provider: a callable expecting a file_id parameter
85
            that returns the text for that file-id
86
        """
87
88
        # Backwards compatibility hack: skip the root id.
89
        if not self.repo.supports_rich_root():
90
            path, root = entries.next()
91
            if root.revision != revision_id:
92
                raise errors.IncompatibleRevision(repr(self.repo))
93
        # Add the texts that are not already present
0.64.27 by Ian Clatworthy
1st cut at performance tuning
94
        tx = self.repo.get_transaction()
0.64.5 by Ian Clatworthy
first cut at generic processing method
95
        for path, ie in entries:
0.64.27 by Ian Clatworthy
1st cut at performance tuning
96
            # This test is *really* slow: over 50% of import time
97
            #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
98
            #if ie.revision in w:
99
            #    continue
0.64.29 by Ian Clatworthy
improve explanation of faster check in revisionloader
100
            # Try another way, realising that this assumes that the
101
            # version is not already there. In the general case,
102
            # a shared repository might already have the revision but
103
            # we arguably don't need that check when importing from
104
            # a foreign system.
0.64.27 by Ian Clatworthy
1st cut at performance tuning
105
            if ie.revision != revision_id:
106
                continue
107
            text_parents = []
108
            for parent_inv in parent_invs:
109
                if ie.file_id not in parent_inv:
110
                    continue
111
                parent_id = parent_inv[ie.file_id].revision
112
                if parent_id in text_parents:
113
                    continue
114
                text_parents.append(parent_id)
115
            vfile = self.repo.weave_store.get_weave_or_empty(ie.file_id,  tx)
116
            lines = text_provider(ie.file_id)
117
            vfile.add_lines(revision_id, text_parents, lines)
0.64.5 by Ian Clatworthy
first cut at generic processing method
118
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
119
    def _add_inventory(self, revision_id, inv, parents):
120
        """Add the inventory inv to the repository as revision_id.
121
        
122
        :param parents: The revision ids of the parents that revision_id
123
                        is known to have and are in the repository already.
124
125
        :returns: The validator(which is a sha1 digest, though what is sha'd is
126
            repository format specific) of the serialized inventory.
127
        """
128
        return self.repo.add_inventory(revision_id, inv, parents)
129
0.64.5 by Ian Clatworthy
first cut at generic processing method
130
    def _default_inventories_provider(self, revision_ids):
131
        """An inventories provider that queries the repository."""
132
        present = []
133
        inventories = []
134
        for revision_id in revision_ids:
135
            if self.repo.has_revision(revision_id):
136
                present.append(revision_id)
137
                rev_tree = self.repo.revision_tree(revision_id)
138
            else:
139
                rev_tree = self.repo.revision_tree(None)
140
            inventories.append(rev_tree.inventory)
141
        return present, inventories
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
142
143
144
class ImportRevisionLoader(RevisionLoader):
145
    """A RevisionLoader optimised for importing.
146
        
0.64.48 by Ian Clatworthy
one revision loader instance
147
    This implementation caches serialised inventory texts.
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
148
    """
149
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
150
    def __init__(self, repo, parent_texts_to_cache=1, random_ids=True):
0.64.48 by Ian Clatworthy
one revision loader instance
151
        """See RevisionLoader.__init__.
152
153
        :param repository: the target repository
154
        :param parent_text_to_cache: the number of parent texts to cache
155
        """
156
        RevisionLoader.__init__(self, repo)
157
        self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
158
        self.random_ids = random_ids
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
159
160
    def _add_inventory(self, revision_id, inv, parents):
161
        """See RevisionLoader._add_inventory."""
162
        # Code taken from bzrlib.repository.add_inventory
163
        assert self.repo.is_in_write_group()
164
        _mod_revision.check_not_reserved_id(revision_id)
165
        assert inv.revision_id is None or inv.revision_id == revision_id, \
166
            "Mismatch between inventory revision" \
167
            " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
168
        assert inv.root is not None
169
        inv_lines = self.repo._serialise_inventory_to_lines(inv)
170
        inv_vf = self.repo.get_inventory_weave()
171
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
172
        sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
173
            revision_id, parents, inv_lines, self.inv_parent_texts)
174
        self.inv_parent_texts[revision_id] = parent_text
175
        return sha1
176
177
    def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
178
            parent_texts):
179
        """See Repository._inventory_add_lines()."""
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
180
        final_parents = []
181
        for parent in parents:
182
            if parent in inv_vf:
183
                final_parents.append(parent)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
184
        return inv_vf.add_lines(version_id, final_parents, lines, parent_texts,
185
            random_id=self.random_ids, check_content=False)
186
187
188
class ExperimentalRevisionLoader(ImportRevisionLoader):
189
    """A RevisionLoader over optimised for importing.
190
        
191
    WARNING: This implementation uses undoumented bzrlib internals.
192
    It may not work in the future. In fact, it may not work now as
0.64.52 by Ian Clatworthy
switch on experimental mode by default
193
    it is an incubator for experimental code.
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
194
    """
195
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
196
    def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None):
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
197
        """See ImportRevisionLoader.__init__.
198
        
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
199
        :para fulltext_when: if non None, a function to call to decide
200
          whether to fulltext the inventory or not. The revision count
201
          is passed as a parameter and the result is treated as a boolean.
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
202
        """
203
        ImportRevisionLoader.__init__(self, repo, parent_texts_to_cache)
204
        self.revision_count = 0
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
205
        self.fulltext_when = fulltext_when
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
206
207
    def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
208
            parent_texts):
209
        """See Repository._inventory_add_lines()."""
210
        # setup parameters used in original code but not this API
211
        self.revision_count += 1
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
212
        if self.fulltext_when is not None:
213
            delta = not self.fulltext_when(self.revision_count)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
214
        else:
215
            delta = inv_vf.delta
216
        left_matching_blocks = None
217
        random_id = self.random_ids
218
        check_content = False
219
220
        # bzrlib.knit.add_lines() but error checking optimised
221
        inv_vf._check_add(version_id, lines, random_id, check_content)
222
223
        ####################################################################
224
        # bzrlib.knit._add() but skip checking if fulltext better than delta
225
        ####################################################################
226
227
        line_bytes = ''.join(lines)
228
        digest = osutils.sha_string(line_bytes)
229
        present_parents = []
230
        for parent in parents:
231
            if inv_vf.has_version(parent):
232
                present_parents.append(parent)
233
        if parent_texts is None:
234
            parent_texts = {}
235
236
        # can only compress against the left most present parent.
237
        if (delta and
238
            (len(present_parents) == 0 or
239
             present_parents[0] != parents[0])):
240
            delta = False
241
242
        text_length = len(line_bytes)
243
        options = []
244
        if lines:
245
            if lines[-1][-1] != '\n':
246
                # copy the contents of lines.
247
                lines = lines[:]
248
                options.append('no-eol')
249
                lines[-1] = lines[-1] + '\n'
250
                line_bytes += '\n'
251
252
        #if delta:
253
        #    # To speed the extract of texts the delta chain is limited
254
        #    # to a fixed number of deltas.  This should minimize both
255
        #    # I/O and the time spend applying deltas.
256
        #    delta = inv_vf._check_should_delta(present_parents)
257
258
        assert isinstance(version_id, str)
259
        content = inv_vf.factory.make(lines, version_id)
260
        if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
261
            # Merge annotations from parent texts if needed.
262
            delta_hunks = inv_vf._merge_annotations(content, present_parents,
263
                parent_texts, delta, inv_vf.factory.annotated,
264
                left_matching_blocks)
265
266
        if delta:
267
            options.append('line-delta')
268
            store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
269
            size, bytes = inv_vf._data._record_to_data(version_id, digest,
270
                store_lines)
271
        else:
272
            options.append('fulltext')
273
            # isinstance is slower and we have no hierarchy.
274
            if inv_vf.factory.__class__ == knit.KnitPlainFactory:
275
                # Use the already joined bytes saving iteration time in
276
                # _record_to_data.
277
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
278
                    lines, [line_bytes])
279
            else:
280
                # get mixed annotation + content and feed it into the
281
                # serialiser.
282
                store_lines = inv_vf.factory.lower_fulltext(content)
283
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
284
                    store_lines)
285
286
        access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
287
        inv_vf._index.add_versions(
288
            ((version_id, options, access_memo, parents),),
289
            random_id=random_id)
290
        return digest, text_length, content