/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.64.5 by Ian Clatworthy
first cut at generic processing method
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
17
"""An abstraction of a repository providing just the bits importing needs."""
0.64.5 by Ian Clatworthy
first cut at generic processing method
18
19
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
20
from bzrlib import errors, knit, lru_cache, osutils
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
21
from bzrlib import revision as _mod_revision
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
22
23
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
24
class AbstractRevisionStore(object):
0.64.5 by Ian Clatworthy
first cut at generic processing method
25
0.64.48 by Ian Clatworthy
one revision loader instance
26
    def __init__(self, repo):
0.64.5 by Ian Clatworthy
first cut at generic processing method
27
        """An object responsible for loading revisions into a repository.
28
29
        NOTE: Repository locking is not managed by this class. Clients
30
        should take a write lock, call load() multiple times, then release
31
        the lock.
32
33
        :param repository: the target repository
0.64.48 by Ian Clatworthy
one revision loader instance
34
        """
35
        self.repo = repo
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
36
        self.try_inv_deltas = getattr(self.repo._format, '_commit_inv_deltas',
37
            False)
38
39
    def expects_rich_root(self):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
40
        """Does this store expect inventories with rich roots?"""
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
41
        return self.repo.supports_rich_root()
0.64.48 by Ian Clatworthy
one revision loader instance
42
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
43
    def get_inventory(self, revision_id):
44
        """Get a stored inventory."""
45
        return self.repo.get_inventory(revision_id)
46
47
    def get_file_text(self, revision_id, file_id):
48
        """Get the text stored for a file in a given revision."""
49
        revtree = self.repo.revision_tree(revision_id)
50
        return revtree.get_file_text(file_id)
51
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
52
    def get_file_lines(self, revision_id, file_id):
53
        """Get the lines stored for a file in a given revision."""
54
        return osutils.split_lines(revtree.get_file_text(file_id))
55
0.64.48 by Ian Clatworthy
one revision loader instance
56
    def load(self, rev, inv, signature, text_provider,
57
        inventories_provider=None):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
58
        """Load a revision.
0.64.48 by Ian Clatworthy
one revision loader instance
59
60
        :param rev: the Revision
61
        :param inv: the inventory
62
        :param signature: signing information
63
        :param text_provider: a callable expecting a file_id parameter
64
            that returns the text for that file-id
0.64.5 by Ian Clatworthy
first cut at generic processing method
65
        :param inventories_provider: a callable expecting a repository and
66
            a list of revision-ids, that returns:
67
              * the list of revision-ids present in the repository
68
              * the list of inventories for the revision-id's,
69
                including an empty inventory for the missing revisions
70
            If None, a default implementation is provided.
71
        """
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
72
        # HACK for testing performance
73
        #return
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
74
        # NOTE: This is bzrlib.repository._install_revision refactored to
75
        # to provide more flexibility in how previous revisions are cached,
76
        # data is feed in, etc.
0.64.48 by Ian Clatworthy
one revision loader instance
77
        if inventories_provider is None:
78
            inventories_provider = self._default_inventories_provider
79
        present_parents, parent_invs = inventories_provider(rev.parent_ids)
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
80
        self._load_texts(rev.revision_id, inv.iter_entries(), parent_invs,
0.64.5 by Ian Clatworthy
first cut at generic processing method
81
            text_provider)
82
        try:
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
83
            rev.inventory_sha1 = self._add_inventory(rev.revision_id,
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
84
                inv, present_parents, parent_invs)
0.64.5 by Ian Clatworthy
first cut at generic processing method
85
        except errors.RevisionAlreadyPresent:
86
            pass
87
        if signature is not None:
0.64.79 by Ian Clatworthy
support new Repository API
88
            self.repo.add_signature_text(rev.revision_id, signature)
89
        self._add_revision(rev, inv)
0.64.5 by Ian Clatworthy
first cut at generic processing method
90
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
91
    def load_using_delta(self, rev, basis_inv, inv_delta, signature,
92
        text_provider, inventories_provider=None):
93
        """Load a revision.
94
95
        :param rev: the Revision
96
        :param basis_inv: the basis inventory
97
        :param inv_delta: the inventory delta
98
        :param signature: signing information
99
        :param text_provider: a callable expecting a file_id parameter
100
            that returns the text for that file-id
101
        :param inventories_provider: a callable expecting a repository and
102
            a list of revision-ids, that returns:
103
              * the list of revision-ids present in the repository
104
              * the list of inventories for the revision-id's,
105
                including an empty inventory for the missing revisions
106
            If None, a default implementation is provided.
107
        """
108
        inv = basis_inv.copy()
109
        inv.apply_delta(inv_delta)
110
        inv.root.revision = rev.revision_id
111
        self.load(rev, inv, signature, text_provider, inventories_provider)
112
        return inv
113
0.64.5 by Ian Clatworthy
first cut at generic processing method
114
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
115
        """Load texts to a repository for inventory entries.
116
        
117
        This method is provided for subclasses to use or override.
118
119
        :param revision_id: the revision identifier
120
        :param entries: iterator over the inventory entries
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
121
        :param parent_invs: the parent inventories
0.64.5 by Ian Clatworthy
first cut at generic processing method
122
        :param text_provider: a callable expecting a file_id parameter
123
            that returns the text for that file-id
124
        """
0.64.79 by Ian Clatworthy
support new Repository API
125
        raise NotImplementedError(self._load_texts)
0.64.5 by Ian Clatworthy
first cut at generic processing method
126
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
127
    def _add_inventory(self, revision_id, inv, parents, parent_invs):
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
128
        """Add the inventory inv to the repository as revision_id.
129
        
130
        :param parents: The revision ids of the parents that revision_id
131
                        is known to have and are in the repository already.
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
132
        :param parent_invs: the parent inventories
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
133
134
        :returns: The validator(which is a sha1 digest, though what is sha'd is
135
            repository format specific) of the serialized inventory.
136
        """
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
137
        if self.try_inv_deltas and len(parents):
138
            # Do we need to search for the first non-empty inventory?
139
            # parent_invs can be a longer list than parents if there
140
            # are ghosts????
141
            basis_inv = parent_invs[0]
142
            delta = inv._make_delta(basis_inv)
143
            return self.repo.add_inventory_by_delta(parents[0], delta,
144
                revision_id, parents)
145
        else:
146
            return self.repo.add_inventory(revision_id, inv, parents)
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
147
0.64.79 by Ian Clatworthy
support new Repository API
148
    def _add_revision(self, rev, inv):
149
        """Add a revision and its inventory to a repository.
150
151
        :param rev: the Revision
152
        :param inv: the inventory
153
        """
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
154
        self.repo.add_revision(rev.revision_id, rev, inv)
0.64.79 by Ian Clatworthy
support new Repository API
155
0.64.5 by Ian Clatworthy
first cut at generic processing method
156
    def _default_inventories_provider(self, revision_ids):
157
        """An inventories provider that queries the repository."""
158
        present = []
159
        inventories = []
160
        for revision_id in revision_ids:
161
            if self.repo.has_revision(revision_id):
162
                present.append(revision_id)
163
                rev_tree = self.repo.revision_tree(revision_id)
164
            else:
165
                rev_tree = self.repo.revision_tree(None)
166
            inventories.append(rev_tree.inventory)
167
        return present, inventories
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
168
169
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
170
class RevisionStore1(AbstractRevisionStore):
171
    """A RevisionStore that uses the old bzrlib Repository API.
0.64.79 by Ian Clatworthy
support new Repository API
172
    
173
    The old API was present until bzr.dev rev 3510.
174
    """
175
176
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
177
        """See RevisionStore._load_texts()."""
0.64.79 by Ian Clatworthy
support new Repository API
178
        # Backwards compatibility hack: skip the root id.
179
        if not self.repo.supports_rich_root():
180
            path, root = entries.next()
181
            if root.revision != revision_id:
182
                raise errors.IncompatibleRevision(repr(self.repo))
183
        # Add the texts that are not already present
184
        tx = self.repo.get_transaction()
185
        for path, ie in entries:
186
            # This test is *really* slow: over 50% of import time
187
            #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
188
            #if ie.revision in w:
189
            #    continue
190
            # Try another way, realising that this assumes that the
191
            # version is not already there. In the general case,
192
            # a shared repository might already have the revision but
193
            # we arguably don't need that check when importing from
194
            # a foreign system.
195
            if ie.revision != revision_id:
196
                continue
197
            text_parents = []
198
            for parent_inv in parent_invs:
199
                if ie.file_id not in parent_inv:
200
                    continue
201
                parent_id = parent_inv[ie.file_id].revision
202
                if parent_id in text_parents:
203
                    continue
204
                text_parents.append(parent_id)
205
            lines = text_provider(ie.file_id)
206
            vfile = self.repo.weave_store.get_weave_or_empty(ie.file_id,  tx)
207
            vfile.add_lines(revision_id, text_parents, lines)
208
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
209
    def get_file_lines(self, revision_id, file_id):
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
210
        tx = self.repo.get_transaction()
211
        w = self.repo.weave_store.get_weave(ie.file_id, tx)
212
        return w.get_lines(revision_id)
213
0.64.79 by Ian Clatworthy
support new Repository API
214
    def _add_revision(self, rev, inv):
215
        # There's no need to do everything repo.add_revision does and
216
        # doing so (since bzr.dev 3392) can be pretty slow for long
217
        # delta chains on inventories. Just do the essentials here ...
218
        _mod_revision.check_not_reserved_id(rev.revision_id)
219
        self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
220
221
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
222
class RevisionStore2(AbstractRevisionStore):
223
    """A RevisionStore that uses the new bzrlib Repository API."""
0.64.79 by Ian Clatworthy
support new Repository API
224
225
    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
226
        """See RevisionStore._load_texts()."""
0.64.79 by Ian Clatworthy
support new Repository API
227
        # Backwards compatibility hack: skip the root id.
228
        if not self.repo.supports_rich_root():
229
            path, root = entries.next()
230
            if root.revision != revision_id:
231
                raise errors.IncompatibleRevision(repr(self.repo))
232
        text_keys = {}
233
        for path, ie in entries:
234
            text_keys[(ie.file_id, ie.revision)] = ie
235
        text_parent_map = self.repo.texts.get_parent_map(text_keys)
236
        missing_texts = set(text_keys) - set(text_parent_map)
237
        # Add the texts that are not already present
238
        for text_key in missing_texts:
239
            ie = text_keys[text_key]
240
            text_parents = []
241
            for parent_inv in parent_invs:
242
                if ie.file_id not in parent_inv:
243
                    continue
244
                parent_id = parent_inv[ie.file_id].revision
245
                if parent_id in text_parents:
246
                    continue
247
                text_parents.append((ie.file_id, parent_id))
248
            lines = text_provider(ie.file_id)
249
            self.repo.texts.add_lines(text_key, text_parents, lines)
250
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
251
    def get_file_lines(self, revision_id, file_id):
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
252
        record = self.repo.texts.get_record_stream([(file_id, revision_id)],
253
            'unordered', True).next()
254
        if record.storage_kind == 'absent':
255
            raise errors.RevisionNotPresent(record.key, self.repo)
256
        return osutils.split_lines(record.get_bytes_as('fulltext'))
257
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
258
    # This is breaking imports into brisbane-core currently
259
    #def _add_revision(self, rev, inv):
260
    #    # There's no need to do everything repo.add_revision does and
261
    #    # doing so (since bzr.dev 3392) can be pretty slow for long
262
    #    # delta chains on inventories. Just do the essentials here ...
263
    #    _mod_revision.check_not_reserved_id(rev.revision_id)
264
    #    self.repo._add_revision(rev)
0.64.79 by Ian Clatworthy
support new Repository API
265
 
266
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
267
class ImportRevisionStore1(RevisionStore1):
268
    """A RevisionStore (old Repository API) optimised for importing.
0.64.79 by Ian Clatworthy
support new Repository API
269
270
    This implementation caches serialised inventory texts and provides
271
    fine-grained control over when inventories are stored as fulltexts.
272
    """
273
274
    def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
275
        random_ids=True):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
276
        """See AbstractRevisionStore.__init__.
0.64.48 by Ian Clatworthy
one revision loader instance
277
278
        :param repository: the target repository
279
        :param parent_text_to_cache: the number of parent texts to cache
0.64.79 by Ian Clatworthy
support new Repository API
280
        :para fulltext_when: if non None, a function to call to decide
281
          whether to fulltext the inventory or not. The revision count
282
          is passed as a parameter and the result is treated as a boolean.
0.64.48 by Ian Clatworthy
one revision loader instance
283
        """
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
284
        RevisionStore1.__init__(self, repo)
0.64.48 by Ian Clatworthy
one revision loader instance
285
        self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
0.64.79 by Ian Clatworthy
support new Repository API
286
        self.fulltext_when = fulltext_when
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
287
        self.random_ids = random_ids
0.64.79 by Ian Clatworthy
support new Repository API
288
        self.revision_count = 0
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
289
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
290
    def _add_inventory(self, revision_id, inv, parents, parent_invs):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
291
        """See RevisionStore._add_inventory."""
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
292
        # Code taken from bzrlib.repository.add_inventory
293
        assert self.repo.is_in_write_group()
294
        _mod_revision.check_not_reserved_id(revision_id)
295
        assert inv.revision_id is None or inv.revision_id == revision_id, \
296
            "Mismatch between inventory revision" \
297
            " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
298
        assert inv.root is not None
299
        inv_lines = self.repo._serialise_inventory_to_lines(inv)
300
        inv_vf = self.repo.get_inventory_weave()
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
301
        sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
302
            revision_id, parents, inv_lines, self.inv_parent_texts)
303
        self.inv_parent_texts[revision_id] = parent_text
304
        return sha1
305
306
    def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
307
            parent_texts):
308
        """See Repository._inventory_add_lines()."""
309
        # setup parameters used in original code but not this API
310
        self.revision_count += 1
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
311
        if self.fulltext_when is not None:
312
            delta = not self.fulltext_when(self.revision_count)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
313
        else:
314
            delta = inv_vf.delta
315
        left_matching_blocks = None
316
        random_id = self.random_ids
317
        check_content = False
318
319
        # bzrlib.knit.add_lines() but error checking optimised
320
        inv_vf._check_add(version_id, lines, random_id, check_content)
321
322
        ####################################################################
323
        # bzrlib.knit._add() but skip checking if fulltext better than delta
324
        ####################################################################
325
326
        line_bytes = ''.join(lines)
327
        digest = osutils.sha_string(line_bytes)
328
        present_parents = []
329
        for parent in parents:
330
            if inv_vf.has_version(parent):
331
                present_parents.append(parent)
332
        if parent_texts is None:
333
            parent_texts = {}
334
335
        # can only compress against the left most present parent.
336
        if (delta and
337
            (len(present_parents) == 0 or
338
             present_parents[0] != parents[0])):
339
            delta = False
340
341
        text_length = len(line_bytes)
342
        options = []
343
        if lines:
344
            if lines[-1][-1] != '\n':
345
                # copy the contents of lines.
346
                lines = lines[:]
347
                options.append('no-eol')
348
                lines[-1] = lines[-1] + '\n'
349
                line_bytes += '\n'
350
351
        #if delta:
352
        #    # To speed the extract of texts the delta chain is limited
353
        #    # to a fixed number of deltas.  This should minimize both
354
        #    # I/O and the time spend applying deltas.
355
        #    delta = inv_vf._check_should_delta(present_parents)
356
357
        assert isinstance(version_id, str)
358
        content = inv_vf.factory.make(lines, version_id)
359
        if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
360
            # Merge annotations from parent texts if needed.
361
            delta_hunks = inv_vf._merge_annotations(content, present_parents,
362
                parent_texts, delta, inv_vf.factory.annotated,
363
                left_matching_blocks)
364
365
        if delta:
366
            options.append('line-delta')
367
            store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
368
            size, bytes = inv_vf._data._record_to_data(version_id, digest,
369
                store_lines)
370
        else:
371
            options.append('fulltext')
372
            # isinstance is slower and we have no hierarchy.
373
            if inv_vf.factory.__class__ == knit.KnitPlainFactory:
374
                # Use the already joined bytes saving iteration time in
375
                # _record_to_data.
376
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
377
                    lines, [line_bytes])
378
            else:
379
                # get mixed annotation + content and feed it into the
380
                # serialiser.
381
                store_lines = inv_vf.factory.lower_fulltext(content)
382
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
383
                    store_lines)
384
385
        access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
386
        inv_vf._index.add_versions(
387
            ((version_id, options, access_memo, parents),),
388
            random_id=random_id)
389
        return digest, text_length, content