/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.115.12 by John Arbash Meinel
Add a bunch of direct tests for the _TreeShim interface.
1
# Copyright (C) 2008, 2009 Canonical Ltd
0.64.5 by Ian Clatworthy
first cut at generic processing method
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
17
"""An abstraction of a repository providing just the bits importing needs."""
0.64.5 by Ian Clatworthy
first cut at generic processing method
18
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
19
import cStringIO
0.64.5 by Ian Clatworthy
first cut at generic processing method
20
0.116.1 by John Arbash Meinel
Use the new KnownGraph.add_node() functionality.
21
from bzrlib import (
22
    errors,
23
    graph as _mod_graph,
24
    inventory,
25
    knit,
26
    lru_cache,
27
    osutils,
28
    revision as _mod_revision,
29
    trace,
30
    )
0.64.6 by Ian Clatworthy
generic processing method working for one revision in one branch
31
32
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
33
class _TreeShim(object):
34
    """Fake a Tree implementation.
35
36
    This implements just enough of the tree api to make commit builder happy.
37
    """
38
0.115.7 by John Arbash Meinel
Fall back to the repository for cases where the content is not present in the stream yet.
39
    def __init__(self, repo, basis_inv, inv_delta, content_provider):
40
        self._repo = repo
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
41
        self._content_provider = content_provider
42
        self._basis_inv = basis_inv
43
        self._inv_delta = inv_delta
44
        self._new_info_by_id = dict([(file_id, (new_path, ie))
45
                                    for _, new_path, file_id, ie in inv_delta])
46
47
    def id2path(self, file_id):
48
        if file_id in self._new_info_by_id:
49
            new_path = self._new_info_by_id[file_id][0]
50
            if new_path is None:
0.115.12 by John Arbash Meinel
Add a bunch of direct tests for the _TreeShim interface.
51
                raise errors.NoSuchId(self, file_id)
52
            return new_path
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
53
        return self._basis_inv.id2path(file_id)
54
55
    def path2id(self, path):
0.115.12 by John Arbash Meinel
Add a bunch of direct tests for the _TreeShim interface.
56
        # CommitBuilder currently only requires access to the root id. We don't
57
        # build a map of renamed files, etc. One possibility if we ever *do*
58
        # need more than just root, is to defer to basis_inv.path2id() and then
59
        # check if the file_id is in our _new_info_by_id dict. And in that
60
        # case, return _new_info_by_id[file_id][0]
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
61
        if path != '':
62
            raise NotImplementedError(_TreeShim.path2id)
63
        # TODO: Handle root renames?
64
        return self._basis_inv.root.file_id
65
66
    def get_file_with_stat(self, file_id, path=None):
0.123.16 by Jelmer Vernooij
Support get_file_text in _TreeShim.
67
        content = self.get_file_text(file_id, path)
68
        sio = cStringIO.StringIO(content)
69
        return sio, None
70
71
    def get_file_text(self, file_id, path=None):
0.115.7 by John Arbash Meinel
Fall back to the repository for cases where the content is not present in the stream yet.
72
        try:
0.123.16 by Jelmer Vernooij
Support get_file_text in _TreeShim.
73
            return self._content_provider(file_id)
0.115.7 by John Arbash Meinel
Fall back to the repository for cases where the content is not present in the stream yet.
74
        except KeyError:
75
            # The content wasn't shown as 'new'. Just validate this fact
76
            assert file_id not in self._new_info_by_id
77
            old_ie = self._basis_inv[file_id]
78
            old_text_key = (file_id, old_ie.revision)
79
            stream = self._repo.texts.get_record_stream([old_text_key],
80
                                                        'unordered', True)
0.123.16 by Jelmer Vernooij
Support get_file_text in _TreeShim.
81
            return stream.next().get_bytes_as('fulltext')
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
82
83
    def get_symlink_target(self, file_id):
84
        if file_id in self._new_info_by_id:
85
            ie = self._new_info_by_id[file_id][1]
86
            return ie.symlink_target
87
        return self._basis_inv[file_id].symlink_target
88
89
    def get_reference_revision(self, file_id, path=None):
90
        raise NotImplementedError(_TreeShim.get_reference_revision)
91
92
    def _delta_to_iter_changes(self):
93
        """Convert the inv_delta into an iter_changes repr."""
94
        # iter_changes is:
95
        #   (file_id,
96
        #    (old_path, new_path),
97
        #    content_changed,
98
        #    (old_versioned, new_versioned),
99
        #    (old_parent_id, new_parent_id),
100
        #    (old_name, new_name),
101
        #    (old_kind, new_kind),
102
        #    (old_exec, new_exec),
103
        #   )
104
        basis_inv = self._basis_inv
105
        for old_path, new_path, file_id, ie in self._inv_delta:
0.115.12 by John Arbash Meinel
Add a bunch of direct tests for the _TreeShim interface.
106
            # Perf: Would this be faster if we did 'if file_id in basis_inv'?
107
            # Since the *very* common case is that the file already exists, it
108
            # probably is better to optimize for that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
109
            try:
110
                old_ie = basis_inv[file_id]
111
            except errors.NoSuchId:
112
                old_ie = None
0.115.6 by John Arbash Meinel
We need to handle when the object has been deleted.
113
                if ie is None:
114
                    raise AssertionError('How is both old and new None?')
115
                    change = (file_id,
116
                        (old_path, new_path),
117
                        False,
118
                        (False, False),
119
                        (None, None),
120
                        (None, None),
121
                        (None, None),
122
                        (None, None),
123
                        )
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
124
                change = (file_id,
125
                    (old_path, new_path),
126
                    True,
127
                    (False, True),
128
                    (None, ie.parent_id),
129
                    (None, ie.name),
130
                    (None, ie.kind),
131
                    (None, ie.executable),
132
                    )
133
            else:
0.115.6 by John Arbash Meinel
We need to handle when the object has been deleted.
134
                if ie is None:
135
                    change = (file_id,
136
                        (old_path, new_path),
137
                        True,
138
                        (True, False),
139
                        (old_ie.parent_id, None),
140
                        (old_ie.name, None),
141
                        (old_ie.kind, None),
142
                        (old_ie.executable, None),
143
                        )
144
                else:
145
                    content_modified = (ie.text_sha1 != old_ie.text_sha1
146
                                        or ie.text_size != old_ie.text_size)
0.115.7 by John Arbash Meinel
Fall back to the repository for cases where the content is not present in the stream yet.
147
                    # TODO: ie.kind != old_ie.kind
0.115.12 by John Arbash Meinel
Add a bunch of direct tests for the _TreeShim interface.
148
                    # TODO: symlinks changing targets, content_modified?
0.115.6 by John Arbash Meinel
We need to handle when the object has been deleted.
149
                    change = (file_id,
150
                        (old_path, new_path),
151
                        content_modified,
152
                        (True, True),
153
                        (old_ie.parent_id, ie.parent_id),
154
                        (old_ie.name, ie.name),
155
                        (old_ie.kind, ie.kind),
156
                        (old_ie.executable, ie.executable),
157
                        )
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
158
            yield change
159
160
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
161
class AbstractRevisionStore(object):
0.64.5 by Ian Clatworthy
first cut at generic processing method
162
0.64.48 by Ian Clatworthy
one revision loader instance
163
    def __init__(self, repo):
0.64.5 by Ian Clatworthy
first cut at generic processing method
164
        """An object responsible for loading revisions into a repository.
165
166
        NOTE: Repository locking is not managed by this class. Clients
167
        should take a write lock, call load() multiple times, then release
168
        the lock.
169
170
        :param repository: the target repository
0.64.48 by Ian Clatworthy
one revision loader instance
171
        """
172
        self.repo = repo
0.116.1 by John Arbash Meinel
Use the new KnownGraph.add_node() functionality.
173
        self._graph = None
174
        self._use_known_graph = True
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
175
        self._supports_chks = getattr(repo._format, 'supports_chks', False)
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
176
177
    def expects_rich_root(self):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
178
        """Does this store expect inventories with rich roots?"""
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
179
        return self.repo.supports_rich_root()
0.64.48 by Ian Clatworthy
one revision loader instance
180
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
181
    def init_inventory(self, revision_id):
182
        """Generate an inventory for a parentless revision."""
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
183
        if self._supports_chks:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
184
            inv = self._init_chk_inventory(revision_id, inventory.ROOT_ID)
185
        else:
186
            inv = inventory.Inventory(revision_id=revision_id)
0.84.6 by Ian Clatworthy
set maximum_size & key_width for initial parent_id_basename_to_file_id map
187
            if self.expects_rich_root():
188
                # The very first root needs to have the right revision
189
                inv.root.revision = revision_id
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
190
        return inv
191
192
    def _init_chk_inventory(self, revision_id, root_id):
193
        """Generate a CHKInventory for a parentless revision."""
194
        from bzrlib import chk_map
195
        # Get the creation parameters
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
196
        chk_store = self.repo.chk_bytes
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
197
        serializer = self.repo._format._serializer
198
        search_key_name = serializer.search_key_name
199
        maximum_size = serializer.maximum_size
200
201
        # Maybe the rest of this ought to be part of the CHKInventory API?
202
        inv = inventory.CHKInventory(search_key_name)
203
        inv.revision_id = revision_id
204
        inv.root_id = root_id
205
        search_key_func = chk_map.search_key_registry.get(search_key_name)
206
        inv.id_to_entry = chk_map.CHKMap(chk_store, None, search_key_func)
207
        inv.id_to_entry._root_node.set_maximum_size(maximum_size)
0.64.151 by Ian Clatworthy
parent_id_to_basename_index is no longer a serializer attribute - always required now
208
        inv.parent_id_basename_to_file_id = chk_map.CHKMap(chk_store,
209
            None, search_key_func)
210
        inv.parent_id_basename_to_file_id._root_node.set_maximum_size(
211
            maximum_size)
212
        inv.parent_id_basename_to_file_id._root_node._key_width = 2
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
213
        return inv
214
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
215
    def get_inventory(self, revision_id):
216
        """Get a stored inventory."""
217
        return self.repo.get_inventory(revision_id)
218
219
    def get_file_text(self, revision_id, file_id):
220
        """Get the text stored for a file in a given revision."""
221
        revtree = self.repo.revision_tree(revision_id)
222
        return revtree.get_file_text(file_id)
223
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
224
    def get_file_lines(self, revision_id, file_id):
225
        """Get the lines stored for a file in a given revision."""
0.64.156 by Ian Clatworthy
minor revision_store clean-ups
226
        revtree = self.repo.revision_tree(revision_id)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
227
        return osutils.split_lines(revtree.get_file_text(file_id))
228
0.85.2 by Ian Clatworthy
improve per-file graph generation
229
    def start_new_revision(self, revision, parents, parent_invs):
230
        """Init the metadata needed for get_parents_and_revision_for_entry().
231
232
        :param revision: a Revision object
233
        """
234
        self._current_rev_id = revision.revision_id
235
        self._rev_parents = parents
236
        self._rev_parent_invs = parent_invs
237
        # We don't know what the branch will be so there's no real BranchConfig.
238
        # That means we won't be triggering any hooks and that's a good thing.
239
        # Without a config though, we must pass in the committer below so that
240
        # the commit builder doesn't try to look up the config.
241
        config = None
242
        # We can't use self.repo.get_commit_builder() here because it starts a
243
        # new write group. We want one write group around a batch of imports
244
        # where the default batch size is currently 10000. IGC 20090312
245
        self._commit_builder = self.repo._commit_builder_class(self.repo,
246
            parents, config, timestamp=revision.timestamp,
247
            timezone=revision.timezone, committer=revision.committer,
248
            revprops=revision.properties, revision_id=revision.revision_id)
249
250
    def get_parents_and_revision_for_entry(self, ie):
251
        """Get the parents and revision for an inventory entry.
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
252
 
0.85.2 by Ian Clatworthy
improve per-file graph generation
253
        :param ie: the inventory entry
254
        :return parents, revision_id where
0.64.160 by Ian Clatworthy
make per-file parents tuples and fix text loading in chk formats
255
            parents is the tuple of parent revision_ids for the per-file graph
0.85.2 by Ian Clatworthy
improve per-file graph generation
256
            revision_id is the revision_id to use for this entry
257
        """
258
        # Check for correct API usage
259
        if self._current_rev_id is None:
260
            raise AssertionError("start_new_revision() must be called"
261
                " before get_parents_and_revision_for_entry()")
262
        if ie.revision != self._current_rev_id:
263
            raise AssertionError("start_new_revision() registered a different"
264
                " revision (%s) to that in the inventory entry (%s)" %
265
                (self._current_rev_id, ie.revision))
266
267
        # Find the heads. This code is lifted from
268
        # repository.CommitBuilder.record_entry_contents().
269
        parent_candidate_entries = ie.parent_candidates(self._rev_parent_invs)
270
        head_set = self._commit_builder._heads(ie.file_id,
271
            parent_candidate_entries.keys())
272
        heads = []
273
        for inv in self._rev_parent_invs:
274
            if ie.file_id in inv:
275
                old_rev = inv[ie.file_id].revision
276
                if old_rev in head_set:
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
277
                    rev_id = inv[ie.file_id].revision
278
                    heads.append(rev_id)
279
                    head_set.remove(rev_id)
0.85.2 by Ian Clatworthy
improve per-file graph generation
280
281
        # Find the revision to use. If the content has not changed
282
        # since the parent, record the parent's revision.
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
283
        if len(heads) == 0:
284
            return (), ie.revision
0.85.2 by Ian Clatworthy
improve per-file graph generation
285
        parent_entry = parent_candidate_entries[heads[0]]
286
        changed = False
287
        if len(heads) > 1:
288
            changed = True
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
289
        elif (parent_entry.name != ie.name or parent_entry.kind != ie.kind or
0.85.2 by Ian Clatworthy
improve per-file graph generation
290
            parent_entry.parent_id != ie.parent_id): 
291
            changed = True
292
        elif ie.kind == 'file':
293
            if (parent_entry.text_sha1 != ie.text_sha1 or
294
                parent_entry.executable != ie.executable):
295
                changed = True
296
        elif ie.kind == 'symlink':
297
            if parent_entry.symlink_target != ie.symlink_target:
298
                changed = True
299
        if changed:
300
            rev_id = ie.revision
301
        else:
302
            rev_id = parent_entry.revision
0.64.160 by Ian Clatworthy
make per-file parents tuples and fix text loading in chk formats
303
        return tuple(heads), rev_id
0.85.2 by Ian Clatworthy
improve per-file graph generation
304
305
    def load(self, rev, inv, signature, text_provider, parents_provider,
0.64.48 by Ian Clatworthy
one revision loader instance
306
        inventories_provider=None):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
307
        """Load a revision.
0.64.48 by Ian Clatworthy
one revision loader instance
308
309
        :param rev: the Revision
310
        :param inv: the inventory
311
        :param signature: signing information
312
        :param text_provider: a callable expecting a file_id parameter
313
            that returns the text for that file-id
0.85.2 by Ian Clatworthy
improve per-file graph generation
314
        :param parents_provider: a callable expecting a file_id parameter
315
            that return the list of parent-ids for that file-id
0.64.5 by Ian Clatworthy
first cut at generic processing method
316
        :param inventories_provider: a callable expecting a repository and
317
            a list of revision-ids, that returns:
318
              * the list of revision-ids present in the repository
319
              * the list of inventories for the revision-id's,
320
                including an empty inventory for the missing revisions
321
            If None, a default implementation is provided.
322
        """
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
323
        # NOTE: This is bzrlib.repository._install_revision refactored to
324
        # to provide more flexibility in how previous revisions are cached,
325
        # data is feed in, etc.
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
326
327
        # Get the non-ghost parents and their inventories
328
        if inventories_provider is None:
329
            inventories_provider = self._default_inventories_provider
330
        present_parents, parent_invs = inventories_provider(rev.parent_ids)
331
332
        # Load the inventory
333
        try:
334
            rev.inventory_sha1 = self._add_inventory(rev.revision_id,
335
                inv, present_parents, parent_invs)
336
        except errors.RevisionAlreadyPresent:
337
            pass
338
339
        # Load the texts, signature and revision
340
        entries = self._non_root_entries_iter(inv, rev.revision_id)
0.85.2 by Ian Clatworthy
improve per-file graph generation
341
        self._load_texts(rev.revision_id, entries, text_provider,
342
            parents_provider)
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
343
        if signature is not None:
344
            self.repo.add_signature_text(rev.revision_id, signature)
345
        self._add_revision(rev, inv)
346
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
347
    def load_using_delta(self, rev, basis_inv, inv_delta, signature,
0.85.2 by Ian Clatworthy
improve per-file graph generation
348
        text_provider, parents_provider, inventories_provider=None):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
349
        """Load a revision by applying a delta to a (CHK)Inventory.
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
350
351
        :param rev: the Revision
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
352
        :param basis_inv: the basis Inventory or CHKInventory
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
353
        :param inv_delta: the inventory delta
354
        :param signature: signing information
355
        :param text_provider: a callable expecting a file_id parameter
356
            that returns the text for that file-id
0.85.2 by Ian Clatworthy
improve per-file graph generation
357
        :param parents_provider: a callable expecting a file_id parameter
358
            that return the list of parent-ids for that file-id
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
359
        :param inventories_provider: a callable expecting a repository and
360
            a list of revision-ids, that returns:
361
              * the list of revision-ids present in the repository
362
              * the list of inventories for the revision-id's,
363
                including an empty inventory for the missing revisions
364
            If None, a default implementation is provided.
365
        """
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
366
        # TODO: set revision_id = rev.revision_id
367
        builder = self.repo._commit_builder_class(self.repo,
368
            parents=rev.parent_ids, config=None, timestamp=rev.timestamp,
369
            timezone=rev.timezone, committer=rev.committer,
370
            revprops=rev.properties, revision_id=rev.revision_id)
0.116.1 by John Arbash Meinel
Use the new KnownGraph.add_node() functionality.
371
        if self._graph is None and self._use_known_graph:
0.64.290 by Jelmer Vernooij
Avoid use of Repository.revisions, which may not be set.
372
            if (getattr(_mod_graph, 'GraphThunkIdsToKeys', None) and
373
                getattr(_mod_graph.GraphThunkIdsToKeys, "add_node", None) and
374
                getattr(self.repo, "get_known_graph_ancestry", None)):
375
                self._graph = self.repo.get_known_graph_ancestry(
376
                    rev.parent_ids)
377
            else:
0.116.1 by John Arbash Meinel
Use the new KnownGraph.add_node() functionality.
378
                self._use_known_graph = False
379
        if self._graph is not None:
0.116.2 by John Arbash Meinel
Some debugging code. It looks like the main bugs involve files that are deleted and restored.
380
            orig_heads = builder._heads
0.116.1 by John Arbash Meinel
Use the new KnownGraph.add_node() functionality.
381
            def thunked_heads(file_id, revision_ids):
382
                # self._graph thinks in terms of keys, not ids, so translate
383
                # them
0.116.2 by John Arbash Meinel
Some debugging code. It looks like the main bugs involve files that are deleted and restored.
384
                # old_res = orig_heads(file_id, revision_ids)
0.116.1 by John Arbash Meinel
Use the new KnownGraph.add_node() functionality.
385
                if len(revision_ids) < 2:
0.116.2 by John Arbash Meinel
Some debugging code. It looks like the main bugs involve files that are deleted and restored.
386
                    res = set(revision_ids)
387
                else:
0.64.290 by Jelmer Vernooij
Avoid use of Repository.revisions, which may not be set.
388
                    res = set(self._graph.heads(revision_ids))
0.116.2 by John Arbash Meinel
Some debugging code. It looks like the main bugs involve files that are deleted and restored.
389
                # if old_res != res:
390
                #     import pdb; pdb.set_trace()
391
                return res
0.116.1 by John Arbash Meinel
Use the new KnownGraph.add_node() functionality.
392
            builder._heads = thunked_heads
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
393
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
394
        if rev.parent_ids:
395
            basis_rev_id = rev.parent_ids[0]
396
        else:
397
            basis_rev_id = _mod_revision.NULL_REVISION
0.115.7 by John Arbash Meinel
Fall back to the repository for cases where the content is not present in the stream yet.
398
        tree = _TreeShim(self.repo, basis_inv, inv_delta, text_provider)
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
399
        changes = tree._delta_to_iter_changes()
400
        for (file_id, path, fs_hash) in builder.record_iter_changes(
401
                tree, basis_rev_id, changes):
402
            # So far, we don't *do* anything with the result
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
403
            pass
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
404
        builder.finish_inventory()
0.115.5 by John Arbash Meinel
Found a bug in CommitBuilder.finish_inventory().
405
        # TODO: This is working around a bug in the bzrlib code base.
406
        # 'builder.finish_inventory()' ends up doing:
407
        # self.inv_sha1 = self.repository.add_inventory_by_delta(...)
408
        # However, add_inventory_by_delta returns (sha1, inv)
409
        # And we *want* to keep a handle on both of those objects
410
        if isinstance(builder.inv_sha1, tuple):
411
            builder.inv_sha1, builder.new_inventory = builder.inv_sha1
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
412
        # This is a duplicate of Builder.commit() since we already have the
413
        # Revision object, and we *don't* want to call commit_write_group()
414
        rev.inv_sha1 = builder.inv_sha1
415
        builder.repository.add_revision(builder._new_revision_id, rev,
416
            builder.new_inventory, builder._config)
0.116.1 by John Arbash Meinel
Use the new KnownGraph.add_node() functionality.
417
        if self._graph is not None:
418
            # TODO: Use StaticTuple and .intern() for these things
0.64.290 by Jelmer Vernooij
Avoid use of Repository.revisions, which may not be set.
419
            self._graph.add_node(builder._new_revision_id, rev.parent_ids)
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
420
421
        if signature is not None:
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
422
            raise AssertionError('signatures not guaranteed yet')
0.64.290 by Jelmer Vernooij
Avoid use of Repository.revisions, which may not be set.
423
            self.repo.add_signature_text(rev.revision_id, signature)
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
424
        # self._add_revision(rev, inv)
0.115.11 by John Arbash Meinel
The fix that landed in bzr did not set builder.new_inventory. :(
425
        return builder.revision_tree().inventory
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
426
427
    def _non_root_entries_iter(self, inv, revision_id):
0.84.11 by Ian Clatworthy
use iter_non_root_entries if it exists
428
        if hasattr(inv, 'iter_non_root_entries'):
429
            entries = inv.iter_non_root_entries()
430
        else:
431
            path_entries = inv.iter_entries()
432
            # Backwards compatibility hack: skip the root id.
433
            if not self.repo.supports_rich_root():
434
                path, root = path_entries.next()
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
435
                if root.revision != revision_id:
0.84.11 by Ian Clatworthy
use iter_non_root_entries if it exists
436
                    raise errors.IncompatibleRevision(repr(self.repo))
437
            entries = iter([ie for path, ie in path_entries])
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
438
        return entries
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
439
0.85.2 by Ian Clatworthy
improve per-file graph generation
440
    def _load_texts(self, revision_id, entries, text_provider,
441
        parents_provider):
0.64.5 by Ian Clatworthy
first cut at generic processing method
442
        """Load texts to a repository for inventory entries.
443
        
444
        This method is provided for subclasses to use or override.
445
446
        :param revision_id: the revision identifier
447
        :param entries: iterator over the inventory entries
448
        :param text_provider: a callable expecting a file_id parameter
449
            that returns the text for that file-id
0.85.2 by Ian Clatworthy
improve per-file graph generation
450
        :param parents_provider: a callable expecting a file_id parameter
451
            that return the list of parent-ids for that file-id
0.64.5 by Ian Clatworthy
first cut at generic processing method
452
        """
0.64.79 by Ian Clatworthy
support new Repository API
453
        raise NotImplementedError(self._load_texts)
0.64.5 by Ian Clatworthy
first cut at generic processing method
454
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
455
    def _add_inventory(self, revision_id, inv, parents, parent_invs):
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
456
        """Add the inventory inv to the repository as revision_id.
457
        
458
        :param parents: The revision ids of the parents that revision_id
459
                        is known to have and are in the repository already.
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
460
        :param parent_invs: the parent inventories
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
461
462
        :returns: The validator(which is a sha1 digest, though what is sha'd is
463
            repository format specific) of the serialized inventory.
464
        """
0.64.156 by Ian Clatworthy
minor revision_store clean-ups
465
        return self.repo.add_inventory(revision_id, inv, parents)
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
466
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
467
    def _add_inventory_by_delta(self, revision_id, basis_inv, inv_delta,
468
        parents, parent_invs):
469
        """Add the inventory to the repository as revision_id.
470
        
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
471
        :param basis_inv: the basis Inventory or CHKInventory
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
472
        :param inv_delta: the inventory delta
473
        :param parents: The revision ids of the parents that revision_id
474
                        is known to have and are in the repository already.
475
        :param parent_invs: the parent inventories
476
0.64.156 by Ian Clatworthy
minor revision_store clean-ups
477
        :returns: (validator, inv) where validator is the validator
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
478
          (which is a sha1 digest, though what is sha'd is repository format
479
          specific) of the serialized inventory;
480
          inv is the generated inventory
481
        """
482
        if len(parents):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
483
            if self._supports_chks:
0.99.3 by Ian Clatworthy
Improve diagnostics collection when inconsistent delta found
484
                try:
485
                    validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
486
                        inv_delta, revision_id, parents, basis_inv=basis_inv,
487
                        propagate_caches=False)
488
                except errors.InconsistentDelta:
489
                    #print "BASIS INV IS\n%s\n" % "\n".join([str(i) for i in basis_inv.iter_entries_by_dir()])
490
                    trace.mutter("INCONSISTENT DELTA IS:\n%s\n" % "\n".join([str(i) for i in inv_delta]))
491
                    raise
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
492
            else:
493
                validator, new_inv = self.repo.add_inventory_by_delta(parents[0],
494
                    inv_delta, revision_id, parents)
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
495
        else:
0.64.257 by Ian Clatworthy
Get fastimport working on non-chk repositories again for bzr versions after 2.0.0
496
            if isinstance(basis_inv, inventory.CHKInventory):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
497
                new_inv = basis_inv.create_by_apply_delta(inv_delta, revision_id)
498
            else:
499
                new_inv = inventory.Inventory(revision_id=revision_id)
500
                # This is set in the delta so remove it to prevent a duplicate
501
                del new_inv[inventory.ROOT_ID]
502
                new_inv.apply_delta(inv_delta)
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
503
            validator = self.repo.add_inventory(revision_id, new_inv, parents)
504
        return validator, new_inv
505
0.64.79 by Ian Clatworthy
support new Repository API
506
    def _add_revision(self, rev, inv):
507
        """Add a revision and its inventory to a repository.
508
509
        :param rev: the Revision
510
        :param inv: the inventory
511
        """
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
512
        self.repo.add_revision(rev.revision_id, rev, inv)
0.64.79 by Ian Clatworthy
support new Repository API
513
0.64.5 by Ian Clatworthy
first cut at generic processing method
514
    def _default_inventories_provider(self, revision_ids):
515
        """An inventories provider that queries the repository."""
516
        present = []
517
        inventories = []
518
        for revision_id in revision_ids:
519
            if self.repo.has_revision(revision_id):
520
                present.append(revision_id)
521
                rev_tree = self.repo.revision_tree(revision_id)
522
            else:
523
                rev_tree = self.repo.revision_tree(None)
524
            inventories.append(rev_tree.inventory)
525
        return present, inventories
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
526
527
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
528
class RevisionStore1(AbstractRevisionStore):
529
    """A RevisionStore that uses the old bzrlib Repository API.
0.64.79 by Ian Clatworthy
support new Repository API
530
    
531
    The old API was present until bzr.dev rev 3510.
532
    """
533
0.85.2 by Ian Clatworthy
improve per-file graph generation
534
    def _load_texts(self, revision_id, entries, text_provider, parents_provider):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
535
        """See RevisionStore._load_texts()."""
0.64.79 by Ian Clatworthy
support new Repository API
536
        # Add the texts that are not already present
537
        tx = self.repo.get_transaction()
0.84.11 by Ian Clatworthy
use iter_non_root_entries if it exists
538
        for ie in entries:
0.64.79 by Ian Clatworthy
support new Repository API
539
            # This test is *really* slow: over 50% of import time
540
            #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
541
            #if ie.revision in w:
542
            #    continue
543
            # Try another way, realising that this assumes that the
544
            # version is not already there. In the general case,
545
            # a shared repository might already have the revision but
546
            # we arguably don't need that check when importing from
547
            # a foreign system.
548
            if ie.revision != revision_id:
549
                continue
0.85.2 by Ian Clatworthy
improve per-file graph generation
550
            file_id = ie.file_id
551
            text_parents = [(file_id, p) for p in parents_provider(file_id)]
552
            lines = text_provider(file_id)
553
            vfile = self.repo.weave_store.get_weave_or_empty(file_id,  tx)
0.64.79 by Ian Clatworthy
support new Repository API
554
            vfile.add_lines(revision_id, text_parents, lines)
555
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
556
    def get_file_lines(self, revision_id, file_id):
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
557
        tx = self.repo.get_transaction()
0.64.139 by Ian Clatworthy
fix ie reference in _get_file_lines()
558
        w = self.repo.weave_store.get_weave(file_id, tx)
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
559
        return w.get_lines(revision_id)
560
0.64.79 by Ian Clatworthy
support new Repository API
561
    def _add_revision(self, rev, inv):
562
        # There's no need to do everything repo.add_revision does and
563
        # doing so (since bzr.dev 3392) can be pretty slow for long
564
        # delta chains on inventories. Just do the essentials here ...
565
        _mod_revision.check_not_reserved_id(rev.revision_id)
566
        self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
567
568
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
569
class RevisionStore2(AbstractRevisionStore):
570
    """A RevisionStore that uses the new bzrlib Repository API."""
0.64.79 by Ian Clatworthy
support new Repository API
571
0.85.2 by Ian Clatworthy
improve per-file graph generation
572
    def _load_texts(self, revision_id, entries, text_provider, parents_provider):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
573
        """See RevisionStore._load_texts()."""
0.64.79 by Ian Clatworthy
support new Repository API
574
        text_keys = {}
0.84.11 by Ian Clatworthy
use iter_non_root_entries if it exists
575
        for ie in entries:
0.64.79 by Ian Clatworthy
support new Repository API
576
            text_keys[(ie.file_id, ie.revision)] = ie
577
        text_parent_map = self.repo.texts.get_parent_map(text_keys)
578
        missing_texts = set(text_keys) - set(text_parent_map)
0.85.2 by Ian Clatworthy
improve per-file graph generation
579
        self._load_texts_for_file_rev_ids(missing_texts, text_provider,
580
            parents_provider)
0.64.79 by Ian Clatworthy
support new Repository API
581
0.85.2 by Ian Clatworthy
improve per-file graph generation
582
    def _load_texts_for_file_rev_ids(self, file_rev_ids, text_provider,
583
        parents_provider):
0.64.155 by Ian Clatworthy
store empty texts for non-files for chk formats, not just other formats
584
        """Load texts to a repository for file-ids, revision-id tuples.
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
585
        
0.64.155 by Ian Clatworthy
store empty texts for non-files for chk formats, not just other formats
586
        :param file_rev_ids: iterator over the (file_id, revision_id) tuples
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
587
        :param text_provider: a callable expecting a file_id parameter
588
            that returns the text for that file-id
0.85.2 by Ian Clatworthy
improve per-file graph generation
589
        :param parents_provider: a callable expecting a file_id parameter
590
            that return the list of parent-ids for that file-id
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
591
        """
0.64.155 by Ian Clatworthy
store empty texts for non-files for chk formats, not just other formats
592
        for file_id, revision_id in file_rev_ids:
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
593
            text_key = (file_id, revision_id)
0.85.2 by Ian Clatworthy
improve per-file graph generation
594
            text_parents = [(file_id, p) for p in parents_provider(file_id)]
595
            lines = text_provider(file_id)
596
            #print "adding text for %s\n\tparents:%s" % (text_key,text_parents)
0.84.13 by Ian Clatworthy
smarter RevisionStore.chk_load()
597
            self.repo.texts.add_lines(text_key, text_parents, lines)
598
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
599
    def get_file_lines(self, revision_id, file_id):
0.74.1 by John Arbash Meinel
Change the rename code to create a new text entry.
600
        record = self.repo.texts.get_record_stream([(file_id, revision_id)],
601
            'unordered', True).next()
602
        if record.storage_kind == 'absent':
603
            raise errors.RevisionNotPresent(record.key, self.repo)
604
        return osutils.split_lines(record.get_bytes_as('fulltext'))
605
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
606
    # This is breaking imports into brisbane-core currently
607
    #def _add_revision(self, rev, inv):
608
    #    # There's no need to do everything repo.add_revision does and
609
    #    # doing so (since bzr.dev 3392) can be pretty slow for long
610
    #    # delta chains on inventories. Just do the essentials here ...
611
    #    _mod_revision.check_not_reserved_id(rev.revision_id)
612
    #    self.repo._add_revision(rev)
0.64.79 by Ian Clatworthy
support new Repository API
613
 
614
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
615
class ImportRevisionStore1(RevisionStore1):
616
    """A RevisionStore (old Repository API) optimised for importing.
0.64.79 by Ian Clatworthy
support new Repository API
617
618
    This implementation caches serialised inventory texts and provides
619
    fine-grained control over when inventories are stored as fulltexts.
620
    """
621
622
    def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
623
        random_ids=True):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
624
        """See AbstractRevisionStore.__init__.
0.64.48 by Ian Clatworthy
one revision loader instance
625
626
        :param repository: the target repository
627
        :param parent_text_to_cache: the number of parent texts to cache
0.64.79 by Ian Clatworthy
support new Repository API
628
        :para fulltext_when: if non None, a function to call to decide
629
          whether to fulltext the inventory or not. The revision count
630
          is passed as a parameter and the result is treated as a boolean.
0.64.48 by Ian Clatworthy
one revision loader instance
631
        """
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
632
        RevisionStore1.__init__(self, repo)
0.64.48 by Ian Clatworthy
one revision loader instance
633
        self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
0.64.79 by Ian Clatworthy
support new Repository API
634
        self.fulltext_when = fulltext_when
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
635
        self.random_ids = random_ids
0.64.79 by Ian Clatworthy
support new Repository API
636
        self.revision_count = 0
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
637
0.81.3 by Ian Clatworthy
enhance RevisionLoader to try inventory deltas & decide on rich-roots
638
    def _add_inventory(self, revision_id, inv, parents, parent_invs):
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
639
        """See RevisionStore._add_inventory."""
0.64.44 by Ian Clatworthy
smart caching of serialised inventories
640
        # Code taken from bzrlib.repository.add_inventory
641
        assert self.repo.is_in_write_group()
642
        _mod_revision.check_not_reserved_id(revision_id)
643
        assert inv.revision_id is None or inv.revision_id == revision_id, \
644
            "Mismatch between inventory revision" \
645
            " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
646
        assert inv.root is not None
647
        inv_lines = self.repo._serialise_inventory_to_lines(inv)
648
        inv_vf = self.repo.get_inventory_weave()
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
649
        sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
650
            revision_id, parents, inv_lines, self.inv_parent_texts)
651
        self.inv_parent_texts[revision_id] = parent_text
652
        return sha1
653
654
    def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
655
            parent_texts):
656
        """See Repository._inventory_add_lines()."""
657
        # setup parameters used in original code but not this API
658
        self.revision_count += 1
0.64.77 by Ian Clatworthy
add inv-fulltext option and improve speed
659
        if self.fulltext_when is not None:
660
            delta = not self.fulltext_when(self.revision_count)
0.64.49 by Ian Clatworthy
skip check re fulltext storage better than delta for inventories when in experimental mode
661
        else:
662
            delta = inv_vf.delta
663
        left_matching_blocks = None
664
        random_id = self.random_ids
665
        check_content = False
666
667
        # bzrlib.knit.add_lines() but error checking optimised
668
        inv_vf._check_add(version_id, lines, random_id, check_content)
669
670
        ####################################################################
671
        # bzrlib.knit._add() but skip checking if fulltext better than delta
672
        ####################################################################
673
674
        line_bytes = ''.join(lines)
675
        digest = osutils.sha_string(line_bytes)
676
        present_parents = []
677
        for parent in parents:
678
            if inv_vf.has_version(parent):
679
                present_parents.append(parent)
680
        if parent_texts is None:
681
            parent_texts = {}
682
683
        # can only compress against the left most present parent.
684
        if (delta and
685
            (len(present_parents) == 0 or
686
             present_parents[0] != parents[0])):
687
            delta = False
688
689
        text_length = len(line_bytes)
690
        options = []
691
        if lines:
692
            if lines[-1][-1] != '\n':
693
                # copy the contents of lines.
694
                lines = lines[:]
695
                options.append('no-eol')
696
                lines[-1] = lines[-1] + '\n'
697
                line_bytes += '\n'
698
699
        #if delta:
700
        #    # To speed the extract of texts the delta chain is limited
701
        #    # to a fixed number of deltas.  This should minimize both
702
        #    # I/O and the time spend applying deltas.
703
        #    delta = inv_vf._check_should_delta(present_parents)
704
705
        assert isinstance(version_id, str)
706
        content = inv_vf.factory.make(lines, version_id)
707
        if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
708
            # Merge annotations from parent texts if needed.
709
            delta_hunks = inv_vf._merge_annotations(content, present_parents,
710
                parent_texts, delta, inv_vf.factory.annotated,
711
                left_matching_blocks)
712
713
        if delta:
714
            options.append('line-delta')
715
            store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
716
            size, bytes = inv_vf._data._record_to_data(version_id, digest,
717
                store_lines)
718
        else:
719
            options.append('fulltext')
720
            # isinstance is slower and we have no hierarchy.
721
            if inv_vf.factory.__class__ == knit.KnitPlainFactory:
722
                # Use the already joined bytes saving iteration time in
723
                # _record_to_data.
724
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
725
                    lines, [line_bytes])
726
            else:
727
                # get mixed annotation + content and feed it into the
728
                # serialiser.
729
                store_lines = inv_vf.factory.lower_fulltext(content)
730
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
731
                    store_lines)
732
733
        access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
734
        inv_vf._index.add_versions(
735
            ((version_id, options, access_memo, parents),),
736
            random_id=random_id)
737
        return digest, text_length, content