/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.5 by Ian Clatworthy
first cut at generic processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Parameterised loading of revisions into a repository."""
18
19
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
20
from bzrlib import errors, knit, lru_cache, osutils
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
21
from bzrlib import revision as _mod_revision
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
23
0.64.79 by Ian Clatworthy
support new Repository API
24
class AbstractRevisionLoader(object):
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
25
    # NOTE: This is effectively bzrlib.repository._install_revision
26
    # refactored to be a class. When importing, we want more flexibility
27
    # in how previous revisions are cached, data is feed in, etc.
0.64.5 by Ian Clatworthy
first cut at generic processing method
28
0.64.48 by Ian Clatworthy
one revision loader instance
29
    def __init__(self, repo):
0.64.5 by Ian Clatworthy
first cut at generic processing method
30
        """An object responsible for loading revisions into a repository.
31
32
        NOTE: Repository locking is not managed by this class. Clients
33
        should take a write lock, call load() multiple times, then release
34
        the lock.
35
36
        :param repository: the target repository
0.64.48 by Ian Clatworthy
one revision loader instance
37
        """
38
        self.repo = repo
39
40
    def load(self, rev, inv, signature, text_provider,
41
        inventories_provider=None):
42
        """Load a revision into a repository.
43
44
        :param rev: the Revision
45
        :param inv: the inventory
46
        :param signature: signing information
47
        :param text_provider: a callable expecting a file_id parameter
48
            that returns the text for that file-id
0.64.5 by Ian Clatworthy
first cut at generic processing method
49
        :param inventories_provider: a callable expecting a repository and
50
            a list of revision-ids, that returns:
51
              * the list of revision-ids present in the repository
52
              * the list of inventories for the revision-id's,
53
                including an empty inventory for the missing revisions
54
            If None, a default implementation is provided.
55
        """
0.64.48 by Ian Clatworthy
one revision loader instance
56
        if inventories_provider is None:
57
            inventories_provider = self._default_inventories_provider
58
        present_parents, parent_invs = inventories_provider(rev.parent_ids)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
59
        self._load_texts(rev.revision_id, inv.iter_entries(), parent_invs,
0.64.5 by Ian Clatworthy
first cut at generic processing method
60
            text_provider)
61
        try:
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
62
            rev.inventory_sha1 = self._add_inventory(rev.revision_id,
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
63
                inv, present_parents)
0.64.5 by Ian Clatworthy
first cut at generic processing method
64
        except errors.RevisionAlreadyPresent:
65
            pass
66
        if signature is not None:
0.64.79 by Ian Clatworthy
support new Repository API
67
            self.repo.add_signature_text(rev.revision_id, signature)
68
        self._add_revision(rev, inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
69
70
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
71
        """Load texts to a repository for inventory entries.
72
        
73
        This method is provided for subclasses to use or override.
74
75
        :param revision_id: the revision identifier
76
        :param entries: iterator over the inventory entries
77
        :param parent_inv: the parent inventories
78
        :param text_provider: a callable expecting a file_id parameter
79
            that returns the text for that file-id
80
        """
0.64.79 by Ian Clatworthy
support new Repository API
81
        raise NotImplementedError(self._load_texts)
0.64.5 by Ian Clatworthy
first cut at generic processing method
82
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
83
    def _add_inventory(self, revision_id, inv, parents):
84
        """Add the inventory inv to the repository as revision_id.
85
        
86
        :param parents: The revision ids of the parents that revision_id
87
                        is known to have and are in the repository already.
88
89
        :returns: The validator(which is a sha1 digest, though what is sha'd is
90
            repository format specific) of the serialized inventory.
91
        """
92
        return self.repo.add_inventory(revision_id, inv, parents)
93
0.64.79 by Ian Clatworthy
support new Repository API
94
    def _add_revision(self, rev, inv):
95
        """Add a revision and its inventory to a repository.
96
97
        :param rev: the Revision
98
        :param inv: the inventory
99
        """
100
        repo.add_revision(rev.revision_id, rev, inv)
101
0.64.5 by Ian Clatworthy
first cut at generic processing method
102
    def _default_inventories_provider(self, revision_ids):
103
        """An inventories provider that queries the repository."""
104
        present = []
105
        inventories = []
106
        for revision_id in revision_ids:
107
            if self.repo.has_revision(revision_id):
108
                present.append(revision_id)
109
                rev_tree = self.repo.revision_tree(revision_id)
110
            else:
111
                rev_tree = self.repo.revision_tree(None)
112
            inventories.append(rev_tree.inventory)
113
        return present, inventories
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
114
115
0.64.79 by Ian Clatworthy
support new Repository API
116
class RevisionLoader1(AbstractRevisionLoader):
117
    """A RevisionLoader that uses the old bzrlib Repository API.
118
    
119
    The old API was present until bzr.dev rev 3510.
120
    """
121
122
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
123
        """See RevisionLoader._load_texts()."""
124
        # Backwards compatibility hack: skip the root id.
125
        if not self.repo.supports_rich_root():
126
            path, root = entries.next()
127
            if root.revision != revision_id:
128
                raise errors.IncompatibleRevision(repr(self.repo))
129
        # Add the texts that are not already present
130
        tx = self.repo.get_transaction()
131
        for path, ie in entries:
132
            # This test is *really* slow: over 50% of import time
133
            #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
134
            #if ie.revision in w:
135
            #    continue
136
            # Try another way, realising that this assumes that the
137
            # version is not already there. In the general case,
138
            # a shared repository might already have the revision but
139
            # we arguably don't need that check when importing from
140
            # a foreign system.
141
            if ie.revision != revision_id:
142
                continue
143
            text_parents = []
144
            for parent_inv in parent_invs:
145
                if ie.file_id not in parent_inv:
146
                    continue
147
                parent_id = parent_inv[ie.file_id].revision
148
                if parent_id in text_parents:
149
                    continue
150
                text_parents.append(parent_id)
151
            lines = text_provider(ie.file_id)
152
            vfile = self.repo.weave_store.get_weave_or_empty(ie.file_id,  tx)
153
            vfile.add_lines(revision_id, text_parents, lines)
154
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
155
    def _get_lines(self, file_id, revision_id):
156
        tx = self.repo.get_transaction()
157
        w = self.repo.weave_store.get_weave(ie.file_id, tx)
158
        return w.get_lines(revision_id)
159
0.64.79 by Ian Clatworthy
support new Repository API
160
    def _add_revision(self, rev, inv):
161
        # There's no need to do everything repo.add_revision does and
162
        # doing so (since bzr.dev 3392) can be pretty slow for long
163
        # delta chains on inventories. Just do the essentials here ...
164
        _mod_revision.check_not_reserved_id(rev.revision_id)
165
        self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
166
167
168
class RevisionLoader2(AbstractRevisionLoader):
169
    """A RevisionLoader that uses the new bzrlib Repository API."""
170
171
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
172
        """See RevisionLoader._load_texts()."""
173
        # Backwards compatibility hack: skip the root id.
174
        if not self.repo.supports_rich_root():
175
            path, root = entries.next()
176
            if root.revision != revision_id:
177
                raise errors.IncompatibleRevision(repr(self.repo))
178
        text_keys = {}
179
        for path, ie in entries:
180
            text_keys[(ie.file_id, ie.revision)] = ie
181
        text_parent_map = self.repo.texts.get_parent_map(text_keys)
182
        missing_texts = set(text_keys) - set(text_parent_map)
183
        # Add the texts that are not already present
184
        for text_key in missing_texts:
185
            ie = text_keys[text_key]
186
            text_parents = []
187
            for parent_inv in parent_invs:
188
                if ie.file_id not in parent_inv:
189
                    continue
190
                parent_id = parent_inv[ie.file_id].revision
191
                if parent_id in text_parents:
192
                    continue
193
                text_parents.append((ie.file_id, parent_id))
194
            lines = text_provider(ie.file_id)
195
            self.repo.texts.add_lines(text_key, text_parents, lines)
196
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
197
    def _get_lines(self, file_id, revision_id):
198
        record = self.repo.texts.get_record_stream([(file_id, revision_id)],
199
            'unordered', True).next()
200
        if record.storage_kind == 'absent':
201
            raise errors.RevisionNotPresent(record.key, self.repo)
202
        return osutils.split_lines(record.get_bytes_as('fulltext'))
203
0.64.79 by Ian Clatworthy
support new Repository API
204
    def _add_revision(self, rev, inv):
205
        # There's no need to do everything repo.add_revision does and
206
        # doing so (since bzr.dev 3392) can be pretty slow for long
207
        # delta chains on inventories. Just do the essentials here ...
208
        _mod_revision.check_not_reserved_id(rev.revision_id)
209
        self.repo._add_revision(rev)
210
 
211
212
class ImportRevisionLoader1(RevisionLoader1):
213
    """A RevisionLoader (old Repository API) optimised for importing.
214
215
    This implementation caches serialised inventory texts and provides
216
    fine-grained control over when inventories are stored as fulltexts.
217
    """
218
219
    def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
220
        random_ids=True):
221
        """See AbstractRevisionLoader.__init__.
0.64.48 by Ian Clatworthy
one revision loader instance
222
223
        :param repository: the target repository
224
        :param parent_text_to_cache: the number of parent texts to cache
0.64.79 by Ian Clatworthy
support new Repository API
225
        :para fulltext_when: if non None, a function to call to decide
226
          whether to fulltext the inventory or not. The revision count
227
          is passed as a parameter and the result is treated as a boolean.
0.64.48 by Ian Clatworthy
one revision loader instance
228
        """
0.64.79 by Ian Clatworthy
support new Repository API
229
        RevisionLoader1.__init__(self, repo)
0.64.48 by Ian Clatworthy
one revision loader instance
230
        self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
0.64.79 by Ian Clatworthy
support new Repository API
231
        self.fulltext_when = fulltext_when
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
232
        self.random_ids = random_ids
0.64.79 by Ian Clatworthy
support new Repository API
233
        self.revision_count = 0
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
234
235
    def _add_inventory(self, revision_id, inv, parents):
236
        """See RevisionLoader._add_inventory."""
237
        # Code taken from bzrlib.repository.add_inventory
238
        assert self.repo.is_in_write_group()
239
        _mod_revision.check_not_reserved_id(revision_id)
240
        assert inv.revision_id is None or inv.revision_id == revision_id, \
241
            "Mismatch between inventory revision" \
242
            " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
243
        assert inv.root is not None
244
        inv_lines = self.repo._serialise_inventory_to_lines(inv)
245
        inv_vf = self.repo.get_inventory_weave()
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
246
        sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
247
            revision_id, parents, inv_lines, self.inv_parent_texts)
248
        self.inv_parent_texts[revision_id] = parent_text
249
        return sha1
250
251
    def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
252
            parent_texts):
253
        """See Repository._inventory_add_lines()."""
254
        # setup parameters used in original code but not this API
255
        self.revision_count += 1
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
256
        if self.fulltext_when is not None:
257
            delta = not self.fulltext_when(self.revision_count)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
258
        else:
259
            delta = inv_vf.delta
260
        left_matching_blocks = None
261
        random_id = self.random_ids
262
        check_content = False
263
264
        # bzrlib.knit.add_lines() but error checking optimised
265
        inv_vf._check_add(version_id, lines, random_id, check_content)
266
267
        ####################################################################
268
        # bzrlib.knit._add() but skip checking if fulltext better than delta
269
        ####################################################################
270
271
        line_bytes = ''.join(lines)
272
        digest = osutils.sha_string(line_bytes)
273
        present_parents = []
274
        for parent in parents:
275
            if inv_vf.has_version(parent):
276
                present_parents.append(parent)
277
        if parent_texts is None:
278
            parent_texts = {}
279
280
        # can only compress against the left most present parent.
281
        if (delta and
282
            (len(present_parents) == 0 or
283
             present_parents[0] != parents[0])):
284
            delta = False
285
286
        text_length = len(line_bytes)
287
        options = []
288
        if lines:
289
            if lines[-1][-1] != '\n':
290
                # copy the contents of lines.
291
                lines = lines[:]
292
                options.append('no-eol')
293
                lines[-1] = lines[-1] + '\n'
294
                line_bytes += '\n'
295
296
        #if delta:
297
        #    # To speed the extract of texts the delta chain is limited
298
        #    # to a fixed number of deltas.  This should minimize both
299
        #    # I/O and the time spend applying deltas.
300
        #    delta = inv_vf._check_should_delta(present_parents)
301
302
        assert isinstance(version_id, str)
303
        content = inv_vf.factory.make(lines, version_id)
304
        if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
305
            # Merge annotations from parent texts if needed.
306
            delta_hunks = inv_vf._merge_annotations(content, present_parents,
307
                parent_texts, delta, inv_vf.factory.annotated,
308
                left_matching_blocks)
309
310
        if delta:
311
            options.append('line-delta')
312
            store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
313
            size, bytes = inv_vf._data._record_to_data(version_id, digest,
314
                store_lines)
315
        else:
316
            options.append('fulltext')
317
            # isinstance is slower and we have no hierarchy.
318
            if inv_vf.factory.__class__ == knit.KnitPlainFactory:
319
                # Use the already joined bytes saving iteration time in
320
                # _record_to_data.
321
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
322
                    lines, [line_bytes])
323
            else:
324
                # get mixed annotation + content and feed it into the
325
                # serialiser.
326
                store_lines = inv_vf.factory.lower_fulltext(content)
327
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
328
                    store_lines)
329
330
        access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
331
        inv_vf._index.add_versions(
332
            ((version_id, options, access_memo, parents),),
333
            random_id=random_id)
334
        return digest, text_length, content