/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.5 by Ian Clatworthy
first cut at generic processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Parameterised loading of revisions into a repository."""
18
19
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
20
from bzrlib import errors, knit, lru_cache, osutils
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
21
from bzrlib import revision as _mod_revision
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
23
0.64.79 by Ian Clatworthy
support new Repository API
24
class AbstractRevisionLoader(object):
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    # NOTE: This is effectively bzrlib.repository._install_revision
26
    # refactored to be a class. When importing, we want more flexibility
27
    # in how previous revisions are cached, data is feed in, etc.
0.64.5 by Ian Clatworthy
first cut at generic processing method
28
0.64.48 by Ian Clatworthy
one revision loader instance
29
    def __init__(self, repo):
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
        """An object responsible for loading revisions into a repository.
31
32
        NOTE: Repository locking is not managed by this class. Clients
33
        should take a write lock, call load() multiple times, then release
34
        the lock.
35
36
        :param repository: the target repository
0.64.48 by Ian Clatworthy
one revision loader instance
37
        """
38
        self.repo = repo
39
40
    def load(self, rev, inv, signature, text_provider,
41
        inventories_provider=None):
42
        """Load a revision into a repository.
43
44
        :param rev: the Revision
45
        :param inv: the inventory
46
        :param signature: signing information
47
        :param text_provider: a callable expecting a file_id parameter
48
            that returns the text for that file-id
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
        :param inventories_provider: a callable expecting a repository and
50
            a list of revision-ids, that returns:
51
              * the list of revision-ids present in the repository
52
              * the list of inventories for the revision-id's,
53
                including an empty inventory for the missing revisions
54
            If None, a default implementation is provided.
55
        """
0.64.48 by Ian Clatworthy
one revision loader instance
56
        if inventories_provider is None:
57
            inventories_provider = self._default_inventories_provider
58
        present_parents, parent_invs = inventories_provider(rev.parent_ids)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
59
        self._load_texts(rev.revision_id, inv.iter_entries(), parent_invs,
0.64.5 by Ian Clatworthy
first cut at generic processing method
60
            text_provider)
61
        try:
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
62
            rev.inventory_sha1 = self._add_inventory(rev.revision_id,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
63
                inv, present_parents)
0.64.5 by Ian Clatworthy
first cut at generic processing method
64
        except errors.RevisionAlreadyPresent:
65
            pass
66
        if signature is not None:
0.64.79 by Ian Clatworthy
support new Repository API
67
            self.repo.add_signature_text(rev.revision_id, signature)
68
        self._add_revision(rev, inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
69
70
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
71
        """Load texts to a repository for inventory entries.
72
        
73
        This method is provided for subclasses to use or override.
74
75
        :param revision_id: the revision identifier
76
        :param entries: iterator over the inventory entries
77
        :param parent_inv: the parent inventories
78
        :param text_provider: a callable expecting a file_id parameter
79
            that returns the text for that file-id
80
        """
0.64.79 by Ian Clatworthy
support new Repository API
81
        raise NotImplementedError(self._load_texts)
0.64.5 by Ian Clatworthy
first cut at generic processing method
82
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
83
    def _add_inventory(self, revision_id, inv, parents):
84
        """Add the inventory inv to the repository as revision_id.
85
        
86
        :param parents: The revision ids of the parents that revision_id
87
                        is known to have and are in the repository already.
88
89
        :returns: The validator(which is a sha1 digest, though what is sha'd is
90
            repository format specific) of the serialized inventory.
91
        """
92
        return self.repo.add_inventory(revision_id, inv, parents)
93
0.64.79 by Ian Clatworthy
support new Repository API
94
    def _add_revision(self, rev, inv):
95
        """Add a revision and its inventory to a repository.
96
97
        :param rev: the Revision
98
        :param inv: the inventory
99
        """
100
        repo.add_revision(rev.revision_id, rev, inv)
101
0.64.5 by Ian Clatworthy
first cut at generic processing method
102
    def _default_inventories_provider(self, revision_ids):
103
        """An inventories provider that queries the repository."""
104
        present = []
105
        inventories = []
106
        for revision_id in revision_ids:
107
            if self.repo.has_revision(revision_id):
108
                present.append(revision_id)
109
                rev_tree = self.repo.revision_tree(revision_id)
110
            else:
111
                rev_tree = self.repo.revision_tree(None)
112
            inventories.append(rev_tree.inventory)
113
        return present, inventories
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
114
115
0.64.79 by Ian Clatworthy
support new Repository API
116
class RevisionLoader1(AbstractRevisionLoader):
117
    """A RevisionLoader that uses the old bzrlib Repository API.
118
    
119
    The old API was present until bzr.dev rev 3510.
120
    """
121
122
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
123
        """See RevisionLoader._load_texts()."""
124
        # Backwards compatibility hack: skip the root id.
125
        if not self.repo.supports_rich_root():
126
            path, root = entries.next()
127
            if root.revision != revision_id:
128
                raise errors.IncompatibleRevision(repr(self.repo))
129
        # Add the texts that are not already present
130
        tx = self.repo.get_transaction()
131
        for path, ie in entries:
132
            # This test is *really* slow: over 50% of import time
133
            #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
134
            #if ie.revision in w:
135
            #    continue
136
            # Try another way, realising that this assumes that the
137
            # version is not already there. In the general case,
138
            # a shared repository might already have the revision but
139
            # we arguably don't need that check when importing from
140
            # a foreign system.
141
            if ie.revision != revision_id:
142
                continue
143
            text_parents = []
144
            for parent_inv in parent_invs:
145
                if ie.file_id not in parent_inv:
146
                    continue
147
                parent_id = parent_inv[ie.file_id].revision
148
                if parent_id in text_parents:
149
                    continue
150
                text_parents.append(parent_id)
151
            lines = text_provider(ie.file_id)
152
            vfile = self.repo.weave_store.get_weave_or_empty(ie.file_id,  tx)
153
            vfile.add_lines(revision_id, text_parents, lines)
154
155
    def _add_revision(self, rev, inv):
156
        # There's no need to do everything repo.add_revision does and
157
        # doing so (since bzr.dev 3392) can be pretty slow for long
158
        # delta chains on inventories. Just do the essentials here ...
159
        _mod_revision.check_not_reserved_id(rev.revision_id)
160
        self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
161
162
163
class RevisionLoader2(AbstractRevisionLoader):
164
    """A RevisionLoader that uses the new bzrlib Repository API."""
165
166
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
167
        """See RevisionLoader._load_texts()."""
168
        # Backwards compatibility hack: skip the root id.
169
        if not self.repo.supports_rich_root():
170
            path, root = entries.next()
171
            if root.revision != revision_id:
172
                raise errors.IncompatibleRevision(repr(self.repo))
173
        text_keys = {}
174
        for path, ie in entries:
175
            text_keys[(ie.file_id, ie.revision)] = ie
176
        text_parent_map = self.repo.texts.get_parent_map(text_keys)
177
        missing_texts = set(text_keys) - set(text_parent_map)
178
        # Add the texts that are not already present
179
        for text_key in missing_texts:
180
            ie = text_keys[text_key]
181
            text_parents = []
182
            for parent_inv in parent_invs:
183
                if ie.file_id not in parent_inv:
184
                    continue
185
                parent_id = parent_inv[ie.file_id].revision
186
                if parent_id in text_parents:
187
                    continue
188
                text_parents.append((ie.file_id, parent_id))
189
            lines = text_provider(ie.file_id)
190
            self.repo.texts.add_lines(text_key, text_parents, lines)
191
192
    def _add_revision(self, rev, inv):
193
        # There's no need to do everything repo.add_revision does and
194
        # doing so (since bzr.dev 3392) can be pretty slow for long
195
        # delta chains on inventories. Just do the essentials here ...
196
        _mod_revision.check_not_reserved_id(rev.revision_id)
197
        self.repo._add_revision(rev)
198
 
199
200
class ImportRevisionLoader1(RevisionLoader1):
201
    """A RevisionLoader (old Repository API) optimised for importing.
202
203
    This implementation caches serialised inventory texts and provides
204
    fine-grained control over when inventories are stored as fulltexts.
205
    """
206
207
    def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
208
        random_ids=True):
209
        """See AbstractRevisionLoader.__init__.
0.64.48 by Ian Clatworthy
one revision loader instance
210
211
        :param repository: the target repository
212
        :param parent_text_to_cache: the number of parent texts to cache
0.64.79 by Ian Clatworthy
support new Repository API
213
        :para fulltext_when: if non None, a function to call to decide
214
          whether to fulltext the inventory or not. The revision count
215
          is passed as a parameter and the result is treated as a boolean.
0.64.48 by Ian Clatworthy
one revision loader instance
216
        """
0.64.79 by Ian Clatworthy
support new Repository API
217
        RevisionLoader1.__init__(self, repo)
0.64.48 by Ian Clatworthy
one revision loader instance
218
        self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
0.64.79 by Ian Clatworthy
support new Repository API
219
        self.fulltext_when = fulltext_when
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
220
        self.random_ids = random_ids
0.64.79 by Ian Clatworthy
support new Repository API
221
        self.revision_count = 0
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
222
223
    def _add_inventory(self, revision_id, inv, parents):
224
        """See RevisionLoader._add_inventory."""
225
        # Code taken from bzrlib.repository.add_inventory
226
        assert self.repo.is_in_write_group()
227
        _mod_revision.check_not_reserved_id(revision_id)
228
        assert inv.revision_id is None or inv.revision_id == revision_id, \
229
            "Mismatch between inventory revision" \
230
            " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
231
        assert inv.root is not None
232
        inv_lines = self.repo._serialise_inventory_to_lines(inv)
233
        inv_vf = self.repo.get_inventory_weave()
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
234
        sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
235
            revision_id, parents, inv_lines, self.inv_parent_texts)
236
        self.inv_parent_texts[revision_id] = parent_text
237
        return sha1
238
239
    def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
240
            parent_texts):
241
        """See Repository._inventory_add_lines()."""
242
        # setup parameters used in original code but not this API
243
        self.revision_count += 1
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
244
        if self.fulltext_when is not None:
245
            delta = not self.fulltext_when(self.revision_count)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
246
        else:
247
            delta = inv_vf.delta
248
        left_matching_blocks = None
249
        random_id = self.random_ids
250
        check_content = False
251
252
        # bzrlib.knit.add_lines() but error checking optimised
253
        inv_vf._check_add(version_id, lines, random_id, check_content)
254
255
        ####################################################################
256
        # bzrlib.knit._add() but skip checking if fulltext better than delta
257
        ####################################################################
258
259
        line_bytes = ''.join(lines)
260
        digest = osutils.sha_string(line_bytes)
261
        present_parents = []
262
        for parent in parents:
263
            if inv_vf.has_version(parent):
264
                present_parents.append(parent)
265
        if parent_texts is None:
266
            parent_texts = {}
267
268
        # can only compress against the left most present parent.
269
        if (delta and
270
            (len(present_parents) == 0 or
271
             present_parents[0] != parents[0])):
272
            delta = False
273
274
        text_length = len(line_bytes)
275
        options = []
276
        if lines:
277
            if lines[-1][-1] != '\n':
278
                # copy the contents of lines.
279
                lines = lines[:]
280
                options.append('no-eol')
281
                lines[-1] = lines[-1] + '\n'
282
                line_bytes += '\n'
283
284
        #if delta:
285
        #    # To speed the extract of texts the delta chain is limited
286
        #    # to a fixed number of deltas.  This should minimize both
287
        #    # I/O and the time spend applying deltas.
288
        #    delta = inv_vf._check_should_delta(present_parents)
289
290
        assert isinstance(version_id, str)
291
        content = inv_vf.factory.make(lines, version_id)
292
        if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
293
            # Merge annotations from parent texts if needed.
294
            delta_hunks = inv_vf._merge_annotations(content, present_parents,
295
                parent_texts, delta, inv_vf.factory.annotated,
296
                left_matching_blocks)
297
298
        if delta:
299
            options.append('line-delta')
300
            store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
301
            size, bytes = inv_vf._data._record_to_data(version_id, digest,
302
                store_lines)
303
        else:
304
            options.append('fulltext')
305
            # isinstance is slower and we have no hierarchy.
306
            if inv_vf.factory.__class__ == knit.KnitPlainFactory:
307
                # Use the already joined bytes saving iteration time in
308
                # _record_to_data.
309
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
310
                    lines, [line_bytes])
311
            else:
312
                # get mixed annotation + content and feed it into the
313
                # serialiser.
314
                store_lines = inv_vf.factory.lower_fulltext(content)
315
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
316
                    store_lines)
317
318
        access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
319
        inv_vf._index.add_versions(
320
            ((version_id, options, access_memo, parents),),
321
            random_id=random_id)
322
        return digest, text_length, content
0.64.79 by Ian Clatworthy
support new Repository API
323
324
325
class ImportRevisionLoader2(RevisionLoader2):
326
    """A RevisionLoader (new Repository API) optimised for importing.
327
328
    This implementation caches serialised inventory texts.
329
    Fine-grained control over when inventories are stored as fulltexts
330
    IS PLANNED LATER.
331
    """
332
333
    def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
334
        random_ids=True):
335
        """See AbstractRevisionLoader.__init__.
336
337
        :param repository: the target repository
338
        :param parent_text_to_cache: the number of parent texts to cache
339
        :para fulltext_when: if non None, a function to call to decide
340
          whether to fulltext the inventory or not. The revision count
341
          is passed as a parameter and the result is treated as a boolean.
342
        """
343
        RevisionLoader2.__init__(self, repo)
344
        self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
345
        self.fulltext_when = fulltext_when
346
        self.random_ids = random_ids
347
        self.revision_count = 0
348
349
    def _add_inventory(self, revision_id, inv, parents):
350
        """See RevisionLoader._add_inventory."""
351
        # Code taken from bzrlib.repository.add_inventory
352
        repo = self.repo
353
        if not repo.is_in_write_group():
354
            raise AssertionError("%r not in write group" % (repo,))
355
        _mod_revision.check_not_reserved_id(revision_id)
356
        if not (inv.revision_id is None or inv.revision_id == revision_id):
357
            raise AssertionError(
358
                "Mismatch between inventory revision"
359
                " id and insertion revid (%r, %r)"
360
                % (inv.revision_id, revision_id))
361
        if inv.root is None:
362
            raise AssertionError()
363
        inv_lines = repo._serialise_inventory_to_lines(inv)
364
        parents = [(parent,) for parent in parents]
365
        sha1, num_bytes, parent_text = repo.inventories.add_lines(
366
            (revision_id,), parents, inv_lines, check_content=False)
367
        self.inv_parent_texts[revision_id] = parent_text
368
        return sha1