/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
2220.2.1 by Martin Pool
Start adding space for tags stored in the repository
1
# Copyright (C) 2005, 2006, 2007 Canonical Ltd
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
2
#
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
7
#
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
12
#
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1185.65.10 by Robert Collins
Rename Controlfiles to LockableFiles.
16
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
17
from cStringIO import StringIO
18
19
from bzrlib.lazy_import import lazy_import
20
lazy_import(globals(), """
1740.3.7 by Jelmer Vernooij
Move committer, log, revprops, timestamp and timezone to CommitBuilder.
21
import re
22
import time
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
23
1910.2.22 by Aaron Bentley
Make commits preserve root entry data
24
from bzrlib import (
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
25
    bzrdir,
26
    check,
2745.1.1 by Robert Collins
Add a number of -Devil checkpoints.
27
    debug,
2490.2.21 by Aaron Bentley
Rename graph to deprecated_graph
28
    deprecated_graph,
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
29
    errors,
2116.4.1 by John Arbash Meinel
Update file and revision id generators.
30
    generate_ids,
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
31
    gpg,
32
    graph,
2163.2.1 by John Arbash Meinel
Speed up the fileids_altered_by_revision_ids processing
33
    lazy_regex,
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
34
    lockable_files,
35
    lockdir,
2988.1.5 by Robert Collins
Use a LRU cache when generating the text index to reduce inventory deserialisations.
36
    lru_cache,
1910.2.22 by Aaron Bentley
Make commits preserve root entry data
37
    osutils,
2241.1.1 by Martin Pool
Change RepositoryFormat to use a Registry rather than ad-hoc dictionary
38
    registry,
2018.5.163 by Andrew Bennetts
Deal with various review comments from Robert.
39
    remote,
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
40
    revision as _mod_revision,
41
    symbol_versioning,
1910.2.22 by Aaron Bentley
Make commits preserve root entry data
42
    transactions,
2988.1.3 by Robert Collins
Add a new repositoy method _generate_text_key_index for use by reconcile/check.
43
    tsort,
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
44
    ui,
45
    )
2520.4.54 by Aaron Bentley
Hang a create_bundle method off repository
46
from bzrlib.bundle import serializer
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
47
from bzrlib.revisiontree import RevisionTree
48
from bzrlib.store.versioned import VersionedFileStore
49
from bzrlib.store.text import TextStore
50
from bzrlib.testament import Testament
2535.3.40 by Andrew Bennetts
Tidy up more XXXs.
51
from bzrlib.util import bencode
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
52
""")
53
1534.4.28 by Robert Collins
first cut at merge from integration.
54
from bzrlib.decorators import needs_read_lock, needs_write_lock
1563.2.12 by Robert Collins
Checkpointing: created InterObject to factor out common inter object worker code, added InterVersionedFile and tests to allow making join work between any versionedfile.
55
from bzrlib.inter import InterObject
1910.2.3 by Aaron Bentley
All tests pass
56
from bzrlib.inventory import Inventory, InventoryDirectory, ROOT_ID
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
57
from bzrlib.symbol_versioning import (
58
        deprecated_method,
3341.2.2 by Alexander Belchenko
Tree.print_file and Repository.print_file are deprecated.
59
        one_one,
60
        one_two,
61
        one_three,
62
        one_four,
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
63
        )
2745.1.1 by Robert Collins
Add a number of -Devil checkpoints.
64
from bzrlib.trace import mutter, mutter_callsite, note, warning
1185.70.3 by Martin Pool
Various updates to make storage branch mergeable:
65
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
66
1904.2.5 by Martin Pool
Fix format warning inside test suite and add test
67
# Old formats display a warning, but only once
68
_deprecation_warning_done = False
69
70
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
71
class CommitBuilder(object):
72
    """Provides an interface to build up a commit.
73
74
    This allows describing a tree to be committed without needing to 
75
    know the internals of the format of the repository.
76
    """
77
    
78
    # all clients should supply tree roots.
79
    record_root_entry = True
2825.5.2 by Robert Collins
Review feedback, and fix pointless commits with nested trees to raise PointlessCommit appropriately.
80
    # the default CommitBuilder does not manage trees whose root is versioned.
81
    _versioned_root = False
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
82
2979.2.2 by Robert Collins
Per-file graph heads detection during commit for pack repositories.
83
    def __init__(self, repository, parents, config, timestamp=None,
84
                 timezone=None, committer=None, revprops=None,
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
85
                 revision_id=None):
86
        """Initiate a CommitBuilder.
87
88
        :param repository: Repository to commit to.
89
        :param parents: Revision ids of the parents of the new revision.
90
        :param config: Configuration to use.
91
        :param timestamp: Optional timestamp recorded for commit.
92
        :param timezone: Optional timezone for timestamp.
93
        :param committer: Optional committer to set for commit.
94
        :param revprops: Optional dictionary of revision properties.
95
        :param revision_id: Optional revision id.
96
        """
97
        self._config = config
98
99
        if committer is None:
100
            self._committer = self._config.username()
101
        else:
102
            assert isinstance(committer, basestring), type(committer)
103
            self._committer = committer
104
105
        self.new_inventory = Inventory(None)
2858.2.1 by Martin Pool
Remove most calls to safe_file_id and safe_revision_id.
106
        self._new_revision_id = revision_id
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
107
        self.parents = parents
108
        self.repository = repository
109
110
        self._revprops = {}
111
        if revprops is not None:
112
            self._revprops.update(revprops)
113
114
        if timestamp is None:
115
            timestamp = time.time()
116
        # Restrict resolution to 1ms
117
        self._timestamp = round(timestamp, 3)
118
119
        if timezone is None:
120
            self._timezone = osutils.local_time_offset()
121
        else:
122
            self._timezone = int(timezone)
123
124
        self._generate_revision_if_needed()
2979.2.5 by Robert Collins
Make CommitBuilder.heads be _heads as its internal to CommitBuilder only.
125
        self.__heads = graph.HeadsCache(repository.get_graph()).heads
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
126
127
    def commit(self, message):
128
        """Make the actual commit.
129
130
        :return: The revision id of the recorded revision.
131
        """
132
        rev = _mod_revision.Revision(
133
                       timestamp=self._timestamp,
134
                       timezone=self._timezone,
135
                       committer=self._committer,
136
                       message=message,
137
                       inventory_sha1=self.inv_sha1,
138
                       revision_id=self._new_revision_id,
139
                       properties=self._revprops)
140
        rev.parent_ids = self.parents
141
        self.repository.add_revision(self._new_revision_id, rev,
142
            self.new_inventory, self._config)
143
        self.repository.commit_write_group()
144
        return self._new_revision_id
145
146
    def abort(self):
147
        """Abort the commit that is being built.
148
        """
149
        self.repository.abort_write_group()
150
151
    def revision_tree(self):
152
        """Return the tree that was just committed.
153
154
        After calling commit() this can be called to get a RevisionTree
155
        representing the newly committed tree. This is preferred to
156
        calling Repository.revision_tree() because that may require
157
        deserializing the inventory, while we already have a copy in
158
        memory.
159
        """
160
        return RevisionTree(self.repository, self.new_inventory,
161
                            self._new_revision_id)
162
163
    def finish_inventory(self):
164
        """Tell the builder that the inventory is finished."""
165
        if self.new_inventory.root is None:
2903.2.9 by Martin Pool
Review cleanups, mostly documentation
166
            raise AssertionError('Root entry should be supplied to'
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
167
                ' record_entry_contents, as of bzr 0.10.',
168
                 DeprecationWarning, stacklevel=2)
169
            self.new_inventory.add(InventoryDirectory(ROOT_ID, '', None))
170
        self.new_inventory.revision_id = self._new_revision_id
171
        self.inv_sha1 = self.repository.add_inventory(
172
            self._new_revision_id,
173
            self.new_inventory,
174
            self.parents
175
            )
176
177
    def _gen_revision_id(self):
178
        """Return new revision-id."""
179
        return generate_ids.gen_revision_id(self._config.username(),
180
                                            self._timestamp)
181
182
    def _generate_revision_if_needed(self):
183
        """Create a revision id if None was supplied.
184
        
185
        If the repository can not support user-specified revision ids
186
        they should override this function and raise CannotSetRevisionId
187
        if _new_revision_id is not None.
188
189
        :raises: CannotSetRevisionId
190
        """
191
        if self._new_revision_id is None:
192
            self._new_revision_id = self._gen_revision_id()
193
            self.random_revid = True
194
        else:
195
            self.random_revid = False
196
2979.2.5 by Robert Collins
Make CommitBuilder.heads be _heads as its internal to CommitBuilder only.
197
    def _heads(self, file_id, revision_ids):
2979.2.1 by Robert Collins
Make it possible for different commit builders to override heads().
198
        """Calculate the graph heads for revision_ids in the graph of file_id.
199
200
        This can use either a per-file graph or a global revision graph as we
201
        have an identity relationship between the two graphs.
202
        """
2979.2.5 by Robert Collins
Make CommitBuilder.heads be _heads as its internal to CommitBuilder only.
203
        return self.__heads(revision_ids)
2979.2.1 by Robert Collins
Make it possible for different commit builders to override heads().
204
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
205
    def _check_root(self, ie, parent_invs, tree):
206
        """Helper for record_entry_contents.
207
208
        :param ie: An entry being added.
209
        :param parent_invs: The inventories of the parent revisions of the
210
            commit.
211
        :param tree: The tree that is being committed.
212
        """
2871.1.2 by Robert Collins
* ``CommitBuilder.record_entry_contents`` now requires the root entry of a
213
        # In this revision format, root entries have no knit or weave When
214
        # serializing out to disk and back in root.revision is always
215
        # _new_revision_id
216
        ie.revision = self._new_revision_id
2818.3.1 by Robert Collins
Change CommitBuilder factory delegation to allow simple declaration.
217
2871.1.4 by Robert Collins
Merge bzr.dev.
218
    def _get_delta(self, ie, basis_inv, path):
219
        """Get a delta against the basis inventory for ie."""
220
        if ie.file_id not in basis_inv:
221
            # add
222
            return (None, path, ie.file_id, ie)
223
        elif ie != basis_inv[ie.file_id]:
224
            # common but altered
225
            # TODO: avoid tis id2path call.
226
            return (basis_inv.id2path(ie.file_id), path, ie.file_id, ie)
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
227
        else:
2871.1.4 by Robert Collins
Merge bzr.dev.
228
            # common, unaltered
229
            return None
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
230
231
    def record_entry_contents(self, ie, parent_invs, path, tree,
232
        content_summary):
233
        """Record the content of ie from tree into the commit if needed.
234
235
        Side effect: sets ie.revision when unchanged
236
237
        :param ie: An inventory entry present in the commit.
238
        :param parent_invs: The inventories of the parent revisions of the
239
            commit.
240
        :param path: The path the entry is at in the tree.
241
        :param tree: The tree which contains this entry and should be used to 
242
            obtain content.
243
        :param content_summary: Summary data from the tree about the paths
244
            content - stat, length, exec, sha/link target. This is only
245
            accessed when the entry has a revision of None - that is when it is
246
            a candidate to commit.
2871.1.3 by Robert Collins
* The CommitBuilder method ``record_entry_contents`` now returns summary
247
        :return: A tuple (change_delta, version_recorded). change_delta is 
248
            an inventory_delta change for this entry against the basis tree of
249
            the commit, or None if no change occured against the basis tree.
250
            version_recorded is True if a new version of the entry has been
251
            recorded. For instance, committing a merge where a file was only
252
            changed on the other side will return (delta, False).
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
253
        """
254
        if self.new_inventory.root is None:
2871.1.2 by Robert Collins
* ``CommitBuilder.record_entry_contents`` now requires the root entry of a
255
            if ie.parent_id is not None:
256
                raise errors.RootMissing()
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
257
            self._check_root(ie, parent_invs, tree)
258
        if ie.revision is None:
259
            kind = content_summary[0]
260
        else:
261
            # ie is carried over from a prior commit
262
            kind = ie.kind
263
        # XXX: repository specific check for nested tree support goes here - if
264
        # the repo doesn't want nested trees we skip it ?
265
        if (kind == 'tree-reference' and
266
            not self.repository._format.supports_tree_reference):
267
            # mismatch between commit builder logic and repository:
268
            # this needs the entry creation pushed down into the builder.
2776.4.18 by Robert Collins
Review feedback.
269
            raise NotImplementedError('Missing repository subtree support.')
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
270
        self.new_inventory.add(ie)
271
2871.1.3 by Robert Collins
* The CommitBuilder method ``record_entry_contents`` now returns summary
272
        # TODO: slow, take it out of the inner loop.
273
        try:
274
            basis_inv = parent_invs[0]
275
        except IndexError:
276
            basis_inv = Inventory(root_id=None)
277
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
278
        # ie.revision is always None if the InventoryEntry is considered
2776.4.13 by Robert Collins
Merge bzr.dev.
279
        # for committing. We may record the previous parents revision if the
280
        # content is actually unchanged against a sole head.
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
281
        if ie.revision is not None:
2903.2.5 by Martin Pool
record_entry_contents should give back deltas for changed roots; clean it up a bit
282
            if not self._versioned_root and path == '':
2871.1.3 by Robert Collins
* The CommitBuilder method ``record_entry_contents`` now returns summary
283
                # repositories that do not version the root set the root's
284
                # revision to the new commit even when no change occurs, and
285
                # this masks when a change may have occurred against the basis,
286
                # so calculate if one happened.
2903.2.5 by Martin Pool
record_entry_contents should give back deltas for changed roots; clean it up a bit
287
                if ie.file_id in basis_inv:
288
                    delta = (basis_inv.id2path(ie.file_id), path,
289
                        ie.file_id, ie)
290
                else:
2871.1.3 by Robert Collins
* The CommitBuilder method ``record_entry_contents`` now returns summary
291
                    # add
292
                    delta = (None, path, ie.file_id, ie)
2903.2.5 by Martin Pool
record_entry_contents should give back deltas for changed roots; clean it up a bit
293
                return delta, False
294
            else:
295
                # we don't need to commit this, because the caller already
296
                # determined that an existing revision of this file is
297
                # appropriate.
2903.2.9 by Martin Pool
Review cleanups, mostly documentation
298
                return None, (ie.revision == self._new_revision_id)
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
299
        # XXX: Friction: parent_candidates should return a list not a dict
300
        #      so that we don't have to walk the inventories again.
301
        parent_candiate_entries = ie.parent_candidates(parent_invs)
2979.2.5 by Robert Collins
Make CommitBuilder.heads be _heads as its internal to CommitBuilder only.
302
        head_set = self._heads(ie.file_id, parent_candiate_entries.keys())
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
303
        heads = []
304
        for inv in parent_invs:
305
            if ie.file_id in inv:
306
                old_rev = inv[ie.file_id].revision
307
                if old_rev in head_set:
308
                    heads.append(inv[ie.file_id].revision)
309
                    head_set.remove(inv[ie.file_id].revision)
310
311
        store = False
312
        # now we check to see if we need to write a new record to the
313
        # file-graph.
314
        # We write a new entry unless there is one head to the ancestors, and
315
        # the kind-derived content is unchanged.
316
317
        # Cheapest check first: no ancestors, or more the one head in the
318
        # ancestors, we write a new node.
319
        if len(heads) != 1:
320
            store = True
321
        if not store:
322
            # There is a single head, look it up for comparison
323
            parent_entry = parent_candiate_entries[heads[0]]
324
            # if the non-content specific data has changed, we'll be writing a
325
            # node:
326
            if (parent_entry.parent_id != ie.parent_id or
327
                parent_entry.name != ie.name):
328
                store = True
329
        # now we need to do content specific checks:
330
        if not store:
331
            # if the kind changed the content obviously has
332
            if kind != parent_entry.kind:
333
                store = True
334
        if kind == 'file':
2911.5.3 by John Arbash Meinel
Revert the changes to CommitBuilder.record_entry_contents.
335
            assert content_summary[2] is not None, \
336
                "Files must not have executable = None"
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
337
            if not store:
338
                if (# if the file length changed we have to store:
339
                    parent_entry.text_size != content_summary[1] or
340
                    # if the exec bit has changed we have to store:
341
                    parent_entry.executable != content_summary[2]):
342
                    store = True
343
                elif parent_entry.text_sha1 == content_summary[3]:
344
                    # all meta and content is unchanged (using a hash cache
345
                    # hit to check the sha)
346
                    ie.revision = parent_entry.revision
347
                    ie.text_size = parent_entry.text_size
348
                    ie.text_sha1 = parent_entry.text_sha1
349
                    ie.executable = parent_entry.executable
2871.1.4 by Robert Collins
Merge bzr.dev.
350
                    return self._get_delta(ie, basis_inv, path), False
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
351
                else:
352
                    # Either there is only a hash change(no hash cache entry,
353
                    # or same size content change), or there is no change on
354
                    # this file at all.
2776.4.19 by Robert Collins
Final review tweaks.
355
                    # Provide the parent's hash to the store layer, so that the
356
                    # content is unchanged we will not store a new node.
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
357
                    nostore_sha = parent_entry.text_sha1
358
            if store:
2776.4.18 by Robert Collins
Review feedback.
359
                # We want to record a new node regardless of the presence or
360
                # absence of a content change in the file.
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
361
                nostore_sha = None
2776.4.18 by Robert Collins
Review feedback.
362
            ie.executable = content_summary[2]
363
            lines = tree.get_file(ie.file_id, path).readlines()
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
364
            try:
365
                ie.text_sha1, ie.text_size = self._add_text_to_weave(
366
                    ie.file_id, lines, heads, nostore_sha)
367
            except errors.ExistingContent:
2776.4.18 by Robert Collins
Review feedback.
368
                # Turns out that the file content was unchanged, and we were
369
                # only going to store a new node if it was changed. Carry over
370
                # the entry.
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
371
                ie.revision = parent_entry.revision
372
                ie.text_size = parent_entry.text_size
373
                ie.text_sha1 = parent_entry.text_sha1
374
                ie.executable = parent_entry.executable
2871.1.4 by Robert Collins
Merge bzr.dev.
375
                return self._get_delta(ie, basis_inv, path), False
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
376
        elif kind == 'directory':
377
            if not store:
378
                # all data is meta here, nothing specific to directory, so
379
                # carry over:
380
                ie.revision = parent_entry.revision
2871.1.4 by Robert Collins
Merge bzr.dev.
381
                return self._get_delta(ie, basis_inv, path), False
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
382
            lines = []
383
            self._add_text_to_weave(ie.file_id, lines, heads, None)
384
        elif kind == 'symlink':
385
            current_link_target = content_summary[3]
386
            if not store:
2776.4.18 by Robert Collins
Review feedback.
387
                # symlink target is not generic metadata, check if it has
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
388
                # changed.
389
                if current_link_target != parent_entry.symlink_target:
390
                    store = True
391
            if not store:
392
                # unchanged, carry over.
393
                ie.revision = parent_entry.revision
394
                ie.symlink_target = parent_entry.symlink_target
2871.1.4 by Robert Collins
Merge bzr.dev.
395
                return self._get_delta(ie, basis_inv, path), False
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
396
            ie.symlink_target = current_link_target
397
            lines = []
398
            self._add_text_to_weave(ie.file_id, lines, heads, None)
399
        elif kind == 'tree-reference':
400
            if not store:
401
                if content_summary[3] != parent_entry.reference_revision:
402
                    store = True
403
            if not store:
404
                # unchanged, carry over.
405
                ie.reference_revision = parent_entry.reference_revision
406
                ie.revision = parent_entry.revision
2871.1.4 by Robert Collins
Merge bzr.dev.
407
                return self._get_delta(ie, basis_inv, path), False
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
408
            ie.reference_revision = content_summary[3]
409
            lines = []
410
            self._add_text_to_weave(ie.file_id, lines, heads, None)
411
        else:
412
            raise NotImplementedError('unknown kind')
413
        ie.revision = self._new_revision_id
2871.1.4 by Robert Collins
Merge bzr.dev.
414
        return self._get_delta(ie, basis_inv, path), True
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
415
416
    def _add_text_to_weave(self, file_id, new_lines, parents, nostore_sha):
417
        versionedfile = self.repository.weave_store.get_weave_or_empty(
418
            file_id, self.repository.get_transaction())
419
        # Don't change this to add_lines - add_lines_with_ghosts is cheaper
420
        # than add_lines, and allows committing when a parent is ghosted for
421
        # some reason.
2592.3.136 by Robert Collins
Merge bzr.dev.
422
        # Note: as we read the content directly from the tree, we know its not
423
        # been turned into unicode or badly split - but a broken tree
424
        # implementation could give us bad output from readlines() so this is
425
        # not a guarantee of safety. What would be better is always checking
426
        # the content during test suite execution. RBC 20070912
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
427
        try:
428
            return versionedfile.add_lines_with_ghosts(
429
                self._new_revision_id, parents, new_lines,
2592.3.136 by Robert Collins
Merge bzr.dev.
430
                nostore_sha=nostore_sha, random_id=self.random_revid,
431
                check_content=False)[0:2]
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
432
        finally:
433
            versionedfile.clear_cache()
434
435
436
class RootCommitBuilder(CommitBuilder):
437
    """This commitbuilder actually records the root id"""
438
    
2825.5.2 by Robert Collins
Review feedback, and fix pointless commits with nested trees to raise PointlessCommit appropriately.
439
    # the root entry gets versioned properly by this builder.
2840.1.1 by Ian Clatworthy
faster pointless commit detection (Robert Collins)
440
    _versioned_root = True
2825.5.2 by Robert Collins
Review feedback, and fix pointless commits with nested trees to raise PointlessCommit appropriately.
441
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
442
    def _check_root(self, ie, parent_invs, tree):
443
        """Helper for record_entry_contents.
444
445
        :param ie: An entry being added.
446
        :param parent_invs: The inventories of the parent revisions of the
447
            commit.
448
        :param tree: The tree that is being committed.
449
        """
450
451
2220.2.3 by Martin Pool
Add tag: revision namespace.
452
######################################################################
453
# Repositories
454
1185.66.5 by Aaron Bentley
Renamed RevisionStorage to Repository
455
class Repository(object):
1185.70.3 by Martin Pool
Various updates to make storage branch mergeable:
456
    """Repository holding history for one or more branches.
457
458
    The repository holds and retrieves historical information including
459
    revisions and file history.  It's normally accessed only by the Branch,
460
    which views a particular line of development through that history.
461
462
    The Repository builds on top of Stores and a Transport, which respectively 
463
    describe the disk data format and the way of accessing the (possibly 
464
    remote) disk.
465
    """
1185.65.17 by Robert Collins
Merge from integration, mode-changes are broken.
466
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
467
    # What class to use for a CommitBuilder. Often its simpler to change this
468
    # in a Repository class subclass rather than to override
469
    # get_commit_builder.
470
    _commit_builder_class = CommitBuilder
471
    # The search regex used by xml based repositories to determine what things
472
    # where changed in a single commit.
2163.2.1 by John Arbash Meinel
Speed up the fileids_altered_by_revision_ids processing
473
    _file_ids_altered_regex = lazy_regex.lazy_compile(
474
        r'file_id="(?P<file_id>[^"]+)"'
2776.4.6 by Robert Collins
Fixup various commit test failures falling out from the other commit changes.
475
        r'.* revision="(?P<revision_id>[^"]+)"'
2163.2.1 by John Arbash Meinel
Speed up the fileids_altered_by_revision_ids processing
476
        )
477
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
478
    def abort_write_group(self):
479
        """Commit the contents accrued within the current write group.
480
481
        :seealso: start_write_group.
482
        """
483
        if self._write_group is not self.get_transaction():
484
            # has an unlock or relock occured ?
485
            raise errors.BzrError('mismatched lock context and write group.')
486
        self._abort_write_group()
487
        self._write_group = None
488
489
    def _abort_write_group(self):
490
        """Template method for per-repository write group cleanup.
491
        
492
        This is called during abort before the write group is considered to be 
493
        finished and should cleanup any internal state accrued during the write
494
        group. There is no requirement that data handed to the repository be
495
        *not* made available - this is not a rollback - but neither should any
496
        attempt be made to ensure that data added is fully commited. Abort is
497
        invoked when an error has occured so futher disk or network operations
498
        may not be possible or may error and if possible should not be
499
        attempted.
500
        """
501
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
502
    def add_inventory(self, revision_id, inv, parents):
503
        """Add the inventory inv to the repository as revision_id.
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
504
        
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
505
        :param parents: The revision ids of the parents that revision_id
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
506
                        is known to have and are in the repository already.
507
3169.2.1 by Robert Collins
New method ``iter_inventories`` on Repository for access to many
508
        :returns: The validator(which is a sha1 digest, though what is sha'd is
509
            repository format specific) of the serialized inventory.
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
510
        """
2592.3.61 by Robert Collins
Remove inventory.kndx.
511
        assert self.is_in_write_group()
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
512
        _mod_revision.check_not_reserved_id(revision_id)
513
        assert inv.revision_id is None or inv.revision_id == revision_id, \
1740.2.2 by Aaron Bentley
Add test for the basis inventory automatically adding the revision id.
514
            "Mismatch between inventory revision" \
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
515
            " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
1910.2.6 by Aaron Bentley
Update for merge review, handle deprecations
516
        assert inv.root is not None
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
517
        inv_lines = self._serialise_inventory_to_lines(inv)
2592.3.61 by Robert Collins
Remove inventory.kndx.
518
        inv_vf = self.get_inventory_weave()
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
519
        return self._inventory_add_lines(inv_vf, revision_id, parents,
520
            inv_lines, check_content=False)
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
521
2805.6.7 by Robert Collins
Review feedback.
522
    def _inventory_add_lines(self, inv_vf, revision_id, parents, lines,
523
        check_content=True):
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
524
        """Store lines in inv_vf and return the sha1 of the inventory."""
1740.3.6 by Jelmer Vernooij
Move inventory writing to the commit builder.
525
        final_parents = []
526
        for parent in parents:
527
            if parent in inv_vf:
528
                final_parents.append(parent)
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
529
        return inv_vf.add_lines(revision_id, final_parents, lines,
530
            check_content=check_content)[0]
1740.3.6 by Jelmer Vernooij
Move inventory writing to the commit builder.
531
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
532
    def add_revision(self, revision_id, rev, inv=None, config=None):
533
        """Add rev to the revision store as revision_id.
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
534
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
535
        :param revision_id: the revision id to use.
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
536
        :param rev: The revision object.
537
        :param inv: The inventory for the revision. if None, it will be looked
538
                    up in the inventory storer
539
        :param config: If None no digital signature will be created.
540
                       If supplied its signature_needed method will be used
541
                       to determine if a signature should be made.
542
        """
2249.5.13 by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids.
543
        # TODO: jam 20070210 Shouldn't we check rev.revision_id and
544
        #       rev.parent_ids?
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
545
        _mod_revision.check_not_reserved_id(revision_id)
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
546
        if config is not None and config.signature_needed():
547
            if inv is None:
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
548
                inv = self.get_inventory(revision_id)
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
549
            plaintext = Testament(rev, inv).as_short_text()
550
            self.store_revision_signature(
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
551
                gpg.GPGStrategy(config), plaintext, revision_id)
552
        if not revision_id in self.get_inventory_weave():
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
553
            if inv is None:
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
554
                raise errors.WeaveRevisionNotPresent(revision_id,
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
555
                                                     self.get_inventory_weave())
556
            else:
557
                # yes, this is not suitable for adding with ghosts.
3305.1.1 by Jelmer Vernooij
Make sure that specifying the inv= argument to add_revision() sets the
558
                rev.inventory_sha1 = self.add_inventory(revision_id, inv, 
559
                                                        rev.parent_ids)
1608.2.1 by Martin Pool
[merge] Storage filename escaping
560
        self._revision_store.add_revision(rev, self.get_transaction())
1570.1.2 by Robert Collins
Import bzrtools' 'fix' command as 'bzr reconcile.'
561
2520.4.10 by Aaron Bentley
Enable installation of revisions
562
    def _add_revision_text(self, revision_id, text):
563
        revision = self._revision_store._serializer.read_revision_from_string(
564
            text)
565
        self._revision_store._add_revision(revision, StringIO(text),
566
                                           self.get_transaction())
567
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
568
    def all_revision_ids(self):
569
        """Returns a list of all the revision ids in the repository. 
570
571
        This is deprecated because code should generally work on the graph
572
        reachable from a particular revision, and ignore any other revisions
573
        that might be present.  There is no direct replacement method.
574
        """
2592.3.114 by Robert Collins
More evil mutterings.
575
        if 'evil' in debug.debug_flags:
2850.3.1 by Robert Collins
Move various weave specific code out of the base Repository class to weaverepo.py.
576
            mutter_callsite(2, "all_revision_ids is linear with history.")
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
577
        return self._all_revision_ids()
578
579
    def _all_revision_ids(self):
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
580
        """Returns a list of all the revision ids in the repository. 
581
582
        These are in as much topological order as the underlying store can 
2850.3.1 by Robert Collins
Move various weave specific code out of the base Repository class to weaverepo.py.
583
        present.
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
584
        """
2850.3.1 by Robert Collins
Move various weave specific code out of the base Repository class to weaverepo.py.
585
        raise NotImplementedError(self._all_revision_ids)
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
586
1687.1.7 by Robert Collins
Teach Repository about break_lock.
587
    def break_lock(self):
588
        """Break a lock if one is present from another instance.
589
590
        Uses the ui factory to ask for confirmation if the lock may be from
591
        an active process.
592
        """
593
        self.control_files.break_lock()
594
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
595
    @needs_read_lock
596
    def _eliminate_revisions_not_present(self, revision_ids):
597
        """Check every revision id in revision_ids to see if we have it.
598
599
        Returns a set of the present revisions.
600
        """
1534.4.41 by Robert Collins
Branch now uses BzrDir reasonably sanely.
601
        result = []
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
602
        for id in revision_ids:
1534.4.41 by Robert Collins
Branch now uses BzrDir reasonably sanely.
603
            if self.has_revision(id):
604
               result.append(id)
605
        return result
606
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
607
    @staticmethod
608
    def create(a_bzrdir):
609
        """Construct the current default format repository in a_bzrdir."""
610
        return RepositoryFormat.get_default_format().initialize(a_bzrdir)
611
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
612
    def __init__(self, _format, a_bzrdir, control_files, _revision_store, control_store, text_store):
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
613
        """instantiate a Repository.
614
615
        :param _format: The format of the repository on disk.
616
        :param a_bzrdir: The BzrDir of the repository.
617
618
        In the future we will have a single api for all stores for
619
        getting file texts, inventories and revisions, then
620
        this construct will accept instances of those things.
621
        """
1608.2.1 by Martin Pool
[merge] Storage filename escaping
622
        super(Repository, self).__init__()
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
623
        self._format = _format
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
624
        # the following are part of the public API for Repository:
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
625
        self.bzrdir = a_bzrdir
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
626
        self.control_files = control_files
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
627
        self._revision_store = _revision_store
1759.2.1 by Jelmer Vernooij
Fix some types (found using aspell).
628
        # backwards compatibility
1563.2.17 by Robert Collins
Change knits repositories to use a knit versioned file store for file texts.
629
        self.weave_store = text_store
2671.4.2 by Robert Collins
Review feedback.
630
        # for tests
631
        self._reconcile_does_inventory_gc = True
2745.6.16 by Aaron Bentley
Update from review
632
        self._reconcile_fixes_text_parents = False
2951.1.3 by Robert Collins
Partial support for native reconcile with packs.
633
        self._reconcile_backsup_inventory = True
1563.2.23 by Robert Collins
Add add_revision and get_revision methods to RevisionStore
634
        # not right yet - should be more semantically clear ? 
635
        # 
636
        self.control_store = control_store
637
        self.control_weaves = control_store
1608.2.1 by Martin Pool
[merge] Storage filename escaping
638
        # TODO: make sure to construct the right store classes, etc, depending
639
        # on whether escaping is required.
1904.2.3 by Martin Pool
Give a warning on access to old repository formats
640
        self._warn_if_deprecated()
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
641
        self._write_group = None
2592.4.5 by Martin Pool
Add Repository.base on all repositories.
642
        self.base = control_files._transport.base
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
643
1668.1.3 by Martin Pool
[patch] use the correct transaction when committing snapshot (Malone: #43959)
644
    def __repr__(self):
2592.4.5 by Martin Pool
Add Repository.base on all repositories.
645
        return '%s(%r)' % (self.__class__.__name__,
646
                           self.base)
1668.1.3 by Martin Pool
[patch] use the correct transaction when committing snapshot (Malone: #43959)
647
2671.1.4 by Andrew Bennetts
Rename is_same_repository to has_same_location, thanks Aaron!
648
    def has_same_location(self, other):
2671.1.3 by Andrew Bennetts
Remove Repository.__eq__/__ne__ methods, replace with is_same_repository method.
649
        """Returns a boolean indicating if this repository is at the same
650
        location as another repository.
651
652
        This might return False even when two repository objects are accessing
653
        the same physical repository via different URLs.
654
        """
2592.3.162 by Robert Collins
Remove some arbitrary differences from bzr.dev.
655
        if self.__class__ is not other.__class__:
656
            return False
657
        return (self.control_files._transport.base ==
658
                other.control_files._transport.base)
2671.1.1 by Andrew Bennetts
Add support for comparing Repositories with == and != operators.
659
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
660
    def is_in_write_group(self):
661
        """Return True if there is an open write group.
662
663
        :seealso: start_write_group.
664
        """
665
        return self._write_group is not None
666
1694.2.6 by Martin Pool
[merge] bzr.dev
667
    def is_locked(self):
668
        return self.control_files.is_locked()
669
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
670
    def is_write_locked(self):
671
        """Return True if this object is write locked."""
672
        return self.is_locked() and self.control_files._lock_mode == 'w'
673
2018.5.75 by Andrew Bennetts
Add Repository.{dont_,}leave_lock_in_place.
674
    def lock_write(self, token=None):
675
        """Lock this repository for writing.
2617.6.8 by Robert Collins
Review feedback and documentation.
676
677
        This causes caching within the repository obejct to start accumlating
678
        data during reads, and allows a 'write_group' to be obtained. Write
679
        groups must be used for actual data insertion.
2018.5.75 by Andrew Bennetts
Add Repository.{dont_,}leave_lock_in_place.
680
        
681
        :param token: if this is already locked, then lock_write will fail
682
            unless the token matches the existing lock.
683
        :returns: a token if this instance supports tokens, otherwise None.
684
        :raises TokenLockingNotSupported: when a token is given but this
685
            instance doesn't support using token locks.
686
        :raises MismatchedToken: if the specified token doesn't match the token
687
            of the existing lock.
2617.6.8 by Robert Collins
Review feedback and documentation.
688
        :seealso: start_write_group.
2018.5.75 by Andrew Bennetts
Add Repository.{dont_,}leave_lock_in_place.
689
2018.5.145 by Andrew Bennetts
Add a brief explanation of what tokens are used for to lock_write docstrings.
690
        A token should be passed in if you know that you have locked the object
691
        some other way, and need to synchronise this object's state with that
692
        fact.
693
2018.5.75 by Andrew Bennetts
Add Repository.{dont_,}leave_lock_in_place.
694
        XXX: this docstring is duplicated in many places, e.g. lockable_files.py
695
        """
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
696
        result = self.control_files.lock_write(token=token)
697
        self._refresh_data()
698
        return result
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
699
700
    def lock_read(self):
1553.5.55 by Martin Pool
[revert] broken changes
701
        self.control_files.lock_read()
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
702
        self._refresh_data()
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
703
1694.2.6 by Martin Pool
[merge] bzr.dev
704
    def get_physical_lock_status(self):
705
        return self.control_files.get_physical_lock_status()
1624.3.36 by Olaf Conradi
Rename is_transport_locked() to get_physical_lock_status() as the
706
2018.5.75 by Andrew Bennetts
Add Repository.{dont_,}leave_lock_in_place.
707
    def leave_lock_in_place(self):
708
        """Tell this repository not to release the physical lock when this
709
        object is unlocked.
2018.5.76 by Andrew Bennetts
Testing that repository.{dont_,}leave_lock_in_place raises NotImplementedError if lock_write returns None.
710
        
711
        If lock_write doesn't return a token, then this method is not supported.
2018.5.75 by Andrew Bennetts
Add Repository.{dont_,}leave_lock_in_place.
712
        """
713
        self.control_files.leave_in_place()
714
715
    def dont_leave_lock_in_place(self):
716
        """Tell this repository to release the physical lock when this
717
        object is unlocked, even if it didn't originally acquire it.
2018.5.76 by Andrew Bennetts
Testing that repository.{dont_,}leave_lock_in_place raises NotImplementedError if lock_write returns None.
718
719
        If lock_write doesn't return a token, then this method is not supported.
2018.5.75 by Andrew Bennetts
Add Repository.{dont_,}leave_lock_in_place.
720
        """
721
        self.control_files.dont_leave_in_place()
722
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
723
    @needs_read_lock
2258.1.2 by Robert Collins
New version of gather_stats which gathers aggregate data too.
724
    def gather_stats(self, revid=None, committers=None):
2258.1.1 by Robert Collins
Move info branch statistics gathering into the repository to allow smart server optimisation (Robert Collins).
725
        """Gather statistics from a revision id.
726
2258.1.2 by Robert Collins
New version of gather_stats which gathers aggregate data too.
727
        :param revid: The revision id to gather statistics from, if None, then
728
            no revision specific statistics are gathered.
2258.1.1 by Robert Collins
Move info branch statistics gathering into the repository to allow smart server optimisation (Robert Collins).
729
        :param committers: Optional parameter controlling whether to grab
2258.1.2 by Robert Collins
New version of gather_stats which gathers aggregate data too.
730
            a count of committers from the revision specific statistics.
2258.1.1 by Robert Collins
Move info branch statistics gathering into the repository to allow smart server optimisation (Robert Collins).
731
        :return: A dictionary of statistics. Currently this contains:
732
            committers: The number of committers if requested.
733
            firstrev: A tuple with timestamp, timezone for the penultimate left
734
                most ancestor of revid, if revid is not the NULL_REVISION.
735
            latestrev: A tuple with timestamp, timezone for revid, if revid is
736
                not the NULL_REVISION.
2258.1.2 by Robert Collins
New version of gather_stats which gathers aggregate data too.
737
            revisions: The total revision count in the repository.
738
            size: An estimate disk size of the repository in bytes.
2258.1.1 by Robert Collins
Move info branch statistics gathering into the repository to allow smart server optimisation (Robert Collins).
739
        """
740
        result = {}
2258.1.2 by Robert Collins
New version of gather_stats which gathers aggregate data too.
741
        if revid and committers:
2258.1.1 by Robert Collins
Move info branch statistics gathering into the repository to allow smart server optimisation (Robert Collins).
742
            result['committers'] = 0
2258.1.2 by Robert Collins
New version of gather_stats which gathers aggregate data too.
743
        if revid and revid != _mod_revision.NULL_REVISION:
744
            if committers:
745
                all_committers = set()
746
            revisions = self.get_ancestry(revid)
747
            # pop the leading None
748
            revisions.pop(0)
749
            first_revision = None
750
            if not committers:
751
                # ignore the revisions in the middle - just grab first and last
752
                revisions = revisions[0], revisions[-1]
753
            for revision in self.get_revisions(revisions):
754
                if not first_revision:
755
                    first_revision = revision
756
                if committers:
757
                    all_committers.add(revision.committer)
758
            last_revision = revision
759
            if committers:
760
                result['committers'] = len(all_committers)
761
            result['firstrev'] = (first_revision.timestamp,
762
                first_revision.timezone)
763
            result['latestrev'] = (last_revision.timestamp,
764
                last_revision.timezone)
765
766
        # now gather global repository information
767
        if self.bzrdir.root_transport.listable():
768
            c, t = self._revision_store.total_size(self.get_transaction())
769
            result['revisions'] = c
770
            result['size'] = t
2258.1.1 by Robert Collins
Move info branch statistics gathering into the repository to allow smart server optimisation (Robert Collins).
771
        return result
772
3140.1.2 by Aaron Bentley
Add ability to find branches inside repositories
773
    def find_branches(self, using=False):
774
        """Find branches underneath this repository.
775
3140.1.7 by Aaron Bentley
Update docs
776
        This will include branches inside other branches.
777
3140.1.2 by Aaron Bentley
Add ability to find branches inside repositories
778
        :param using: If True, list only branches using this repository.
779
        """
3140.1.9 by Aaron Bentley
Optimize find_branches for standalone repositories
780
        if using and not self.is_shared():
781
            try:
782
                return [self.bzrdir.open_branch()]
783
            except errors.NotBranchError:
784
                return []
3140.1.2 by Aaron Bentley
Add ability to find branches inside repositories
785
        class Evaluator(object):
786
787
            def __init__(self):
788
                self.first_call = True
789
790
            def __call__(self, bzrdir):
791
                # On the first call, the parameter is always the bzrdir
792
                # containing the current repo.
793
                if not self.first_call:
794
                    try:
795
                        repository = bzrdir.open_repository()
796
                    except errors.NoRepositoryPresent:
797
                        pass
798
                    else:
799
                        return False, (None, repository)
800
                self.first_call = False
801
                try:
802
                    value = (bzrdir.open_branch(), None)
803
                except errors.NotBranchError:
804
                    value = (None, None)
805
                return True, value
806
807
        branches = []
808
        for branch, repository in bzrdir.BzrDir.find_bzrdirs(
809
                self.bzrdir.root_transport, evaluate=Evaluator()):
810
            if branch is not None:
811
                branches.append(branch)
812
            if not using and repository is not None:
813
                branches.extend(repository.find_branches())
814
        return branches
815
2535.3.12 by Andrew Bennetts
Add a first cut of a get_data_stream method to Repository.
816
    def get_data_stream(self, revision_ids):
817
        raise NotImplementedError(self.get_data_stream)
818
3184.1.9 by Robert Collins
* ``Repository.get_data_stream`` is now deprecated in favour of
819
    def get_data_stream_for_search(self, search_result):
820
        """Get a data stream that can be inserted to a repository.
821
822
        :param search_result: A bzrlib.graph.SearchResult selecting the
823
            revisions to get.
824
        :return: A data stream that can be inserted into a repository using
825
            insert_data_stream.
826
        """
827
        raise NotImplementedError(self.get_data_stream_for_search)
828
2535.3.17 by Andrew Bennetts
[broken] Closer to a working Repository.fetch_revisions smart request.
829
    def insert_data_stream(self, stream):
2592.3.214 by Robert Collins
Merge bzr.dev.
830
        """XXX What does this really do? 
831
        
832
        Is it a substitute for fetch? 
833
        Should it manage its own write group ?
834
        """
2535.3.50 by Andrew Bennetts
Use tuple names in data streams rather than concatenated strings.
835
        for item_key, bytes in stream:
836
            if item_key[0] == 'file':
837
                (file_id,) = item_key[1:]
2535.3.27 by Andrew Bennetts
Merge from bzr.dev.
838
                knit = self.weave_store.get_weave_or_empty(
2535.3.17 by Andrew Bennetts
[broken] Closer to a working Repository.fetch_revisions smart request.
839
                    file_id, self.get_transaction())
2535.3.50 by Andrew Bennetts
Use tuple names in data streams rather than concatenated strings.
840
            elif item_key == ('inventory',):
2535.3.17 by Andrew Bennetts
[broken] Closer to a working Repository.fetch_revisions smart request.
841
                knit = self.get_inventory_weave()
2535.3.50 by Andrew Bennetts
Use tuple names in data streams rather than concatenated strings.
842
            elif item_key == ('revisions',):
2535.3.28 by Andrew Bennetts
Use self._revision_store.get_revision_file rather that self.control_store.get_weave('revisions', ...)
843
                knit = self._revision_store.get_revision_file(
844
                    self.get_transaction())
2535.3.50 by Andrew Bennetts
Use tuple names in data streams rather than concatenated strings.
845
            elif item_key == ('signatures',):
2535.3.25 by Andrew Bennetts
Fetch signatures too.
846
                knit = self._revision_store.get_signature_file(
847
                    self.get_transaction())
2535.3.17 by Andrew Bennetts
[broken] Closer to a working Repository.fetch_revisions smart request.
848
            else:
3184.5.1 by Lukáš Lalinský
Fix handling of some error cases in insert_data_stream
849
                raise errors.RepositoryDataStreamError(
2535.3.50 by Andrew Bennetts
Use tuple names in data streams rather than concatenated strings.
850
                    "Unrecognised data stream key '%s'" % (item_key,))
2535.3.17 by Andrew Bennetts
[broken] Closer to a working Repository.fetch_revisions smart request.
851
            decoded_list = bencode.bdecode(bytes)
852
            format = decoded_list.pop(0)
853
            data_list = []
854
            knit_bytes = ''
855
            for version, options, parents, some_bytes in decoded_list:
856
                data_list.append((version, options, len(some_bytes), parents))
857
                knit_bytes += some_bytes
3172.2.1 by Andrew Bennetts
Enable use of smart revision streaming between repos with compatible models, not just between identical format repos.
858
            buffer = StringIO(knit_bytes)
859
            def reader_func(count):
860
                if count is None:
861
                    return buffer.read()
862
                else:
863
                    return buffer.read(count)
2535.3.40 by Andrew Bennetts
Tidy up more XXXs.
864
            knit.insert_data_stream(
3172.2.1 by Andrew Bennetts
Enable use of smart revision streaming between repos with compatible models, not just between identical format repos.
865
                (format, data_list, reader_func))
2535.3.17 by Andrew Bennetts
[broken] Closer to a working Repository.fetch_revisions smart request.
866
2258.1.1 by Robert Collins
Move info branch statistics gathering into the repository to allow smart server optimisation (Robert Collins).
867
    @needs_read_lock
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
868
    def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
869
        """Return the revision ids that other has that this does not.
870
        
871
        These are returned in topological order.
872
873
        revision_id: only return revision ids included by revision_id.
874
        """
875
        return InterRepository.get(other, self).search_missing_revision_ids(
876
            revision_id, find_ghosts)
877
3341.2.2 by Alexander Belchenko
Tree.print_file and Repository.print_file are deprecated.
878
    @deprecated_method(one_two)
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
879
    @needs_read_lock
3010.1.5 by Robert Collins
Test that missing_revision_ids handles the case of the source not having the requested revision correctly with and without find_ghosts.
880
    def missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
881
        """Return the revision ids that other has that this does not.
882
        
883
        These are returned in topological order.
884
885
        revision_id: only return revision ids included by revision_id.
886
        """
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
887
        keys =  self.search_missing_revision_ids(
888
            other, revision_id, find_ghosts).get_keys()
889
        other.lock_read()
890
        try:
891
            parents = other.get_graph().get_parent_map(keys)
892
        finally:
893
            other.unlock()
894
        return tsort.topo_sort(parents)
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
895
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
896
    @staticmethod
897
    def open(base):
898
        """Open the repository rooted at base.
899
900
        For instance, if the repository is at URL/.bzr/repository,
901
        Repository.open(URL) -> a Repository instance.
902
        """
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
903
        control = bzrdir.BzrDir.open(base)
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
904
        return control.open_repository()
905
2387.1.1 by Robert Collins
Remove the --basis parameter to clone etc. (Robert Collins)
906
    def copy_content_into(self, destination, revision_id=None):
1534.6.6 by Robert Collins
Move find_repository to bzrdir, its not quite ideal there but its simpler and until someone chooses to vary the search by branch type its completely sufficient.
907
        """Make a complete copy of the content in self into destination.
908
        
909
        This is a destructive operation! Do not use it on existing 
910
        repositories.
911
        """
2387.1.1 by Robert Collins
Remove the --basis parameter to clone etc. (Robert Collins)
912
        return InterRepository.get(self, destination).copy_content(revision_id)
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
913
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
914
    def commit_write_group(self):
915
        """Commit the contents accrued within the current write group.
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
916
917
        :seealso: start_write_group.
918
        """
919
        if self._write_group is not self.get_transaction():
920
            # has an unlock or relock occured ?
2592.3.38 by Robert Collins
All experimental format tests passing again.
921
            raise errors.BzrError('mismatched lock context %r and '
922
                'write group %r.' %
923
                (self.get_transaction(), self._write_group))
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
924
        self._commit_write_group()
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
925
        self._write_group = None
926
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
927
    def _commit_write_group(self):
928
        """Template method for per-repository write group cleanup.
929
        
930
        This is called before the write group is considered to be 
931
        finished and should ensure that all data handed to the repository
932
        for writing during the write group is safely committed (to the 
933
        extent possible considering file system caching etc).
934
        """
935
2949.1.1 by Robert Collins
Change Repository.fetch to provide a find_ghosts parameter which triggers ghost filling.
936
    def fetch(self, source, revision_id=None, pb=None, find_ghosts=False):
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
937
        """Fetch the content required to construct revision_id from source.
938
939
        If revision_id is None all content is copied.
2949.1.1 by Robert Collins
Change Repository.fetch to provide a find_ghosts parameter which triggers ghost filling.
940
        :param find_ghosts: Find and copy revisions in the source that are
941
            ghosts in the target (and not reachable directly by walking out to
942
            the first-present revision in target from revision_id).
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
943
        """
2592.3.115 by Robert Collins
Move same repository check up to Repository.fetch to allow all fetch implementations to benefit.
944
        # fast path same-url fetch operations
945
        if self.has_same_location(source):
946
            # check that last_revision is in 'from' and then return a
947
            # no-operation.
948
            if (revision_id is not None and
949
                not _mod_revision.is_null(revision_id)):
950
                self.get_revision(revision_id)
951
            return 0, []
2323.8.3 by Aaron Bentley
Reduce scope of try/except, update NEWS
952
        inter = InterRepository.get(source, self)
2323.8.2 by Aaron Bentley
Give a nicer error on fetch when repos are in incompatible formats
953
        try:
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
954
            return inter.fetch(revision_id=revision_id, pb=pb, find_ghosts=find_ghosts)
2323.8.2 by Aaron Bentley
Give a nicer error on fetch when repos are in incompatible formats
955
        except NotImplementedError:
956
            raise errors.IncompatibleRepositories(source, self)
1534.4.41 by Robert Collins
Branch now uses BzrDir reasonably sanely.
957
2520.4.54 by Aaron Bentley
Hang a create_bundle method off repository
958
    def create_bundle(self, target, base, fileobj, format=None):
959
        return serializer.write_bundle(self, target, base, fileobj, format)
960
2803.2.1 by Robert Collins
* CommitBuilder now advertises itself as requiring the root entry to be
961
    def get_commit_builder(self, branch, parents, config, timestamp=None,
962
                           timezone=None, committer=None, revprops=None,
1740.3.7 by Jelmer Vernooij
Move committer, log, revprops, timestamp and timezone to CommitBuilder.
963
                           revision_id=None):
964
        """Obtain a CommitBuilder for this repository.
965
        
966
        :param branch: Branch to commit to.
967
        :param parents: Revision ids of the parents of the new revision.
968
        :param config: Configuration to use.
969
        :param timestamp: Optional timestamp recorded for commit.
970
        :param timezone: Optional timezone for timestamp.
971
        :param committer: Optional committer to set for commit.
972
        :param revprops: Optional dictionary of revision properties.
973
        :param revision_id: Optional revision id.
974
        """
2818.3.2 by Robert Collins
Review feedback.
975
        result = self._commit_builder_class(self, parents, config,
2592.3.135 by Robert Collins
Do not create many transient knit objects, saving 4% on commit.
976
            timestamp, timezone, committer, revprops, revision_id)
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
977
        self.start_write_group()
978
        return result
1740.3.1 by Jelmer Vernooij
Introduce and use CommitBuilder objects.
979
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
980
    def unlock(self):
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
981
        if (self.control_files._lock_count == 1 and
982
            self.control_files._lock_mode == 'w'):
983
            if self._write_group is not None:
2592.3.244 by Martin Pool
unlock while in a write group now aborts the write group, unlocks, and errors.
984
                self.abort_write_group()
985
                self.control_files.unlock()
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
986
                raise errors.BzrError(
987
                    'Must end write groups before releasing write locks.')
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
988
        self.control_files.unlock()
989
1185.65.27 by Robert Collins
Tweak storage towards mergability.
990
    @needs_read_lock
2387.1.1 by Robert Collins
Remove the --basis parameter to clone etc. (Robert Collins)
991
    def clone(self, a_bzrdir, revision_id=None):
1534.4.41 by Robert Collins
Branch now uses BzrDir reasonably sanely.
992
        """Clone this repository into a_bzrdir using the current format.
993
994
        Currently no check is made that the format of this repository and
995
        the bzrdir format are compatible. FIXME RBC 20060201.
2241.1.4 by Martin Pool
Moved old weave-based repository formats into bzrlib.repofmt.weaverepo.
996
997
        :return: The newly created destination repository.
1534.4.41 by Robert Collins
Branch now uses BzrDir reasonably sanely.
998
        """
2440.1.1 by Martin Pool
Add new Repository.sprout,
999
        # TODO: deprecate after 0.16; cloning this with all its settings is
1000
        # probably not very useful -- mbp 20070423
1001
        dest_repo = self._create_sprouting_repo(a_bzrdir, shared=self.is_shared())
1002
        self.copy_content_into(dest_repo, revision_id)
1003
        return dest_repo
1004
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
1005
    def start_write_group(self):
1006
        """Start a write group in the repository.
1007
1008
        Write groups are used by repositories which do not have a 1:1 mapping
1009
        between file ids and backend store to manage the insertion of data from
1010
        both fetch and commit operations.
1011
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
1012
        A write lock is required around the start_write_group/commit_write_group
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
1013
        for the support of lock-requiring repository formats.
2617.6.8 by Robert Collins
Review feedback and documentation.
1014
1015
        One can only insert data into a repository inside a write group.
1016
2617.6.6 by Robert Collins
Some review feedback.
1017
        :return: None.
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
1018
        """
2592.3.188 by Robert Collins
Allow pack repositories to have multiple writers active at one time, for greater concurrency.
1019
        if not self.is_write_locked():
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
1020
            raise errors.NotWriteLocked(self)
1021
        if self._write_group:
1022
            raise errors.BzrError('already in a write group')
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
1023
        self._start_write_group()
1024
        # so we can detect unlock/relock - the write group is now entered.
2617.6.1 by Robert Collins
* New method on Repository - ``start_write_group``, ``end_write_group``
1025
        self._write_group = self.get_transaction()
1026
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
1027
    def _start_write_group(self):
1028
        """Template method for per-repository write group startup.
1029
        
1030
        This is called before the write group is considered to be 
1031
        entered.
1032
        """
1033
2440.1.1 by Martin Pool
Add new Repository.sprout,
1034
    @needs_read_lock
1035
    def sprout(self, to_bzrdir, revision_id=None):
1036
        """Create a descendent repository for new development.
1037
1038
        Unlike clone, this does not copy the settings of the repository.
1039
        """
1040
        dest_repo = self._create_sprouting_repo(to_bzrdir, shared=False)
1041
        dest_repo.fetch(self, revision_id=revision_id)
1042
        return dest_repo
1043
1044
    def _create_sprouting_repo(self, a_bzrdir, shared):
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
1045
        if not isinstance(a_bzrdir._format, self.bzrdir._format.__class__):
1046
            # use target default format.
2241.1.4 by Martin Pool
Moved old weave-based repository formats into bzrlib.repofmt.weaverepo.
1047
            dest_repo = a_bzrdir.create_repository()
1534.4.50 by Robert Collins
Got the bzrdir api straightened out, plenty of refactoring to use it pending, but the api is up and running.
1048
        else:
2241.1.4 by Martin Pool
Moved old weave-based repository formats into bzrlib.repofmt.weaverepo.
1049
            # Most control formats need the repository to be specifically
1050
            # created, but on some old all-in-one formats it's not needed
1051
            try:
2440.1.1 by Martin Pool
Add new Repository.sprout,
1052
                dest_repo = self._format.initialize(a_bzrdir, shared=shared)
2241.1.4 by Martin Pool
Moved old weave-based repository formats into bzrlib.repofmt.weaverepo.
1053
            except errors.UninitializableFormat:
1054
                dest_repo = a_bzrdir.open_repository()
1055
        return dest_repo
1534.4.41 by Robert Collins
Branch now uses BzrDir reasonably sanely.
1056
1563.2.22 by Robert Collins
Move responsibility for repository.has_revision into RevisionStore
1057
    @needs_read_lock
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1058
    def has_revision(self, revision_id):
1563.2.22 by Robert Collins
Move responsibility for repository.has_revision into RevisionStore
1059
        """True if this repository has a copy of the revision."""
3172.3.1 by Robert Collins
Repository has a new method ``has_revisions`` which signals the presence
1060
        return revision_id in self.has_revisions((revision_id,))
1061
1062
    def has_revisions(self, revision_ids):
1063
        """Probe to find out the presence of multiple revisions.
1064
1065
        :param revision_ids: An iterable of revision_ids.
1066
        :return: A set of the revision_ids that were present.
1067
        """
1068
        raise NotImplementedError(self.has_revisions)
1069
1563.2.22 by Robert Collins
Move responsibility for repository.has_revision into RevisionStore
1070
        return self._revision_store.has_revision_id(revision_id,
1071
                                                    self.get_transaction())
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1072
1185.65.27 by Robert Collins
Tweak storage towards mergability.
1073
    @needs_read_lock
2850.3.1 by Robert Collins
Move various weave specific code out of the base Repository class to weaverepo.py.
1074
    def get_revision(self, revision_id):
1075
        """Return the Revision object for a named revision."""
1076
        return self.get_revisions([revision_id])[0]
1077
1078
    @needs_read_lock
1570.1.13 by Robert Collins
Check for incorrect revision parentage in the weave during revision access.
1079
    def get_revision_reconcile(self, revision_id):
1080
        """'reconcile' helper routine that allows access to a revision always.
1081
        
1082
        This variant of get_revision does not cross check the weave graph
1083
        against the revision one as get_revision does: but it should only
1084
        be used by reconcile, or reconcile-alike commands that are correcting
1085
        or testing the revision graph.
1086
        """
2850.3.1 by Robert Collins
Move various weave specific code out of the base Repository class to weaverepo.py.
1087
        return self._get_revisions([revision_id])[0]
2249.5.13 by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids.
1088
1756.1.2 by Aaron Bentley
Show logs using get_revisions
1089
    @needs_read_lock
1090
    def get_revisions(self, revision_ids):
2850.3.1 by Robert Collins
Move various weave specific code out of the base Repository class to weaverepo.py.
1091
        """Get many revisions at once."""
1092
        return self._get_revisions(revision_ids)
1093
1094
    @needs_read_lock
1095
    def _get_revisions(self, revision_ids):
1096
        """Core work logic to get many revisions without sanity checks."""
1097
        for rev_id in revision_ids:
1098
            if not rev_id or not isinstance(rev_id, basestring):
1099
                raise errors.InvalidRevisionId(revision_id=rev_id, branch=self)
2249.5.13 by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids.
1100
        revs = self._revision_store.get_revisions(revision_ids,
1756.1.2 by Aaron Bentley
Show logs using get_revisions
1101
                                                  self.get_transaction())
2249.5.13 by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids.
1102
        for rev in revs:
1103
            assert not isinstance(rev.revision_id, unicode)
1104
            for parent_id in rev.parent_ids:
1105
                assert not isinstance(parent_id, unicode)
1106
        return revs
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1107
1185.65.27 by Robert Collins
Tweak storage towards mergability.
1108
    @needs_read_lock
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
1109
    def get_revision_xml(self, revision_id):
2249.5.13 by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids.
1110
        # TODO: jam 20070210 This shouldn't be necessary since get_revision
1111
        #       would have already do it.
1112
        # TODO: jam 20070210 Just use _serializer.write_revision_to_string()
1113
        rev = self.get_revision(revision_id)
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
1114
        rev_tmp = StringIO()
1115
        # the current serializer..
1116
        self._revision_store._serializer.write_revision(rev, rev_tmp)
1117
        rev_tmp.seek(0)
1118
        return rev_tmp.getvalue()
1119
1120
    @needs_read_lock
1756.3.22 by Aaron Bentley
Tweaks from review
1121
    def get_deltas_for_revisions(self, revisions):
1756.3.19 by Aaron Bentley
Documentation and cleanups
1122
        """Produce a generator of revision deltas.
1123
        
1124
        Note that the input is a sequence of REVISIONS, not revision_ids.
1125
        Trees will be held in memory until the generator exits.
1126
        Each delta is relative to the revision's lefthand predecessor.
1127
        """
1756.3.3 by Aaron Bentley
More refactoring, introduce revision_trees.
1128
        required_trees = set()
1129
        for revision in revisions:
1130
            required_trees.add(revision.revision_id)
1131
            required_trees.update(revision.parent_ids[:1])
1132
        trees = dict((t.get_revision_id(), t) for 
1133
                     t in self.revision_trees(required_trees))
1134
        for revision in revisions:
1135
            if not revision.parent_ids:
1852.5.1 by Robert Collins
Deprecate EmptyTree in favour of using Repository.revision_tree.
1136
                old_tree = self.revision_tree(None)
1756.3.3 by Aaron Bentley
More refactoring, introduce revision_trees.
1137
            else:
1138
                old_tree = trees[revision.parent_ids[0]]
1852.10.3 by Robert Collins
Remove all uses of compare_trees and replace with Tree.changes_from throughout bzrlib.
1139
            yield trees[revision.revision_id].changes_from(old_tree)
1756.3.3 by Aaron Bentley
More refactoring, introduce revision_trees.
1140
1756.3.19 by Aaron Bentley
Documentation and cleanups
1141
    @needs_read_lock
1744.2.2 by Johan Rydberg
Add get_revision_delta to Repository; and make Branch.get_revision_delta use it.
1142
    def get_revision_delta(self, revision_id):
1143
        """Return the delta for one revision.
1144
1145
        The delta is relative to the left-hand predecessor of the
1146
        revision.
1147
        """
1756.3.3 by Aaron Bentley
More refactoring, introduce revision_trees.
1148
        r = self.get_revision(revision_id)
1756.3.22 by Aaron Bentley
Tweaks from review
1149
        return list(self.get_deltas_for_revisions([r]))[0]
1744.2.2 by Johan Rydberg
Add get_revision_delta to Repository; and make Branch.get_revision_delta use it.
1150
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1151
    @needs_write_lock
1152
    def store_revision_signature(self, gpg_strategy, plaintext, revision_id):
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
1153
        signature = gpg_strategy.sign(plaintext)
2996.2.4 by Aaron Bentley
Rename function to add_signature_text
1154
        self.add_signature_text(revision_id, signature)
2996.2.3 by Aaron Bentley
Add tests for install_revisions and add_signature
1155
1156
    @needs_write_lock
2996.2.4 by Aaron Bentley
Rename function to add_signature_text
1157
    def add_signature_text(self, revision_id, signature):
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
1158
        self._revision_store.add_revision_signature_text(revision_id,
1159
                                                         signature,
1160
                                                         self.get_transaction())
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1161
2988.1.2 by Robert Collins
New Repository API find_text_key_references for use by reconcile and check.
1162
    def find_text_key_references(self):
1163
        """Find the text key references within the repository.
1164
1165
        :return: a dictionary mapping (file_id, revision_id) tuples to altered file-ids to an iterable of
1166
        revision_ids. Each altered file-ids has the exact revision_ids that
1167
        altered it listed explicitly.
1168
        :return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1169
            to whether they were referred to by the inventory of the
1170
            revision_id that they contain. The inventory texts from all present
1171
            revision ids are assessed to generate this report.
1172
        """
1173
        revision_ids = self.all_revision_ids()
1174
        w = self.get_inventory_weave()
1175
        pb = ui.ui_factory.nested_progress_bar()
1176
        try:
1177
            return self._find_text_key_references_from_xml_inventory_lines(
1178
                w.iter_lines_added_or_present_in_versions(revision_ids, pb=pb))
1179
        finally:
1180
            pb.finished()
1181
2988.1.1 by Robert Collins
Refactor fetch's xml inventory parsing into a core routine that extracts the data and a separate one that filters for fetch.
1182
    def _find_text_key_references_from_xml_inventory_lines(self,
1183
        line_iterator):
1184
        """Core routine for extracting references to texts from inventories.
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
1185
1186
        This performs the translation of xml lines to revision ids.
1187
2975.3.1 by Robert Collins
Change (without backwards compatibility) the
1188
        :param line_iterator: An iterator of lines, origin_version_id
2988.1.1 by Robert Collins
Refactor fetch's xml inventory parsing into a core routine that extracts the data and a separate one that filters for fetch.
1189
        :return: A dictionary mapping text keys ((fileid, revision_id) tuples)
1190
            to whether they were referred to by the inventory of the
1191
            revision_id that they contain. Note that if that revision_id was
1192
            not part of the line_iterator's output then False will be given -
1193
            even though it may actually refer to that key.
1534.4.41 by Robert Collins
Branch now uses BzrDir reasonably sanely.
1194
        """
2988.2.2 by Robert Collins
Review feedback.
1195
        if not self._serializer.support_altered_by_hack:
1196
            raise AssertionError(
1197
                "_find_text_key_references_from_xml_inventory_lines only "
1198
                "supported for branches which store inventory as unnested xml"
1199
                ", not on %r" % self)
1694.2.6 by Martin Pool
[merge] bzr.dev
1200
        result = {}
1563.2.35 by Robert Collins
cleanup deprecation warnings and finish conversion so the inventory is knit based too.
1201
1694.2.6 by Martin Pool
[merge] bzr.dev
1202
        # this code needs to read every new line in every inventory for the
1203
        # inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
1759.2.1 by Jelmer Vernooij
Fix some types (found using aspell).
1204
        # not present in one of those inventories is unnecessary but not 
1594.2.6 by Robert Collins
Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved.
1205
        # harmful because we are filtering by the revision id marker in the
1694.2.6 by Martin Pool
[merge] bzr.dev
1206
        # inventory lines : we only select file ids altered in one of those  
1759.2.2 by Jelmer Vernooij
Revert some of my spelling fixes and fix some typos after review by Aaron.
1207
        # revisions. We don't need to see all lines in the inventory because
1594.2.6 by Robert Collins
Introduce a api specifically for looking at lines in some versions of the inventory, for fileid_involved.
1208
        # only those added in an inventory in rev X can contain a revision=X
1209
        # line.
2163.2.3 by John Arbash Meinel
Change to local variables to save another 300ms
1210
        unescape_revid_cache = {}
1211
        unescape_fileid_cache = {}
1212
2163.2.5 by John Arbash Meinel
Inline the cache lookup, and explain why
1213
        # jam 20061218 In a big fetch, this handles hundreds of thousands
1214
        # of lines, so it has had a lot of inlining and optimizing done.
1215
        # Sorry that it is a little bit messy.
2163.2.3 by John Arbash Meinel
Change to local variables to save another 300ms
1216
        # Move several functions to be local variables, since this is a long
1217
        # running loop.
1218
        search = self._file_ids_altered_regex.search
2163.2.5 by John Arbash Meinel
Inline the cache lookup, and explain why
1219
        unescape = _unescape_xml
2163.2.3 by John Arbash Meinel
Change to local variables to save another 300ms
1220
        setdefault = result.setdefault
2975.3.1 by Robert Collins
Change (without backwards compatibility) the
1221
        for line, version_id in line_iterator:
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
1222
            match = search(line)
1223
            if match is None:
1224
                continue
1225
            # One call to match.group() returning multiple items is quite a
1226
            # bit faster than 2 calls to match.group() each returning 1
1227
            file_id, revision_id = match.group('file_id', 'revision_id')
1228
1229
            # Inlining the cache lookups helps a lot when you make 170,000
1230
            # lines and 350k ids, versus 8.4 unique ids.
1231
            # Using a cache helps in 2 ways:
1232
            #   1) Avoids unnecessary decoding calls
1233
            #   2) Re-uses cached strings, which helps in future set and
1234
            #      equality checks.
1235
            # (2) is enough that removing encoding entirely along with
1236
            # the cache (so we are using plain strings) results in no
1237
            # performance improvement.
1238
            try:
1239
                revision_id = unescape_revid_cache[revision_id]
1240
            except KeyError:
1241
                unescaped = unescape(revision_id)
1242
                unescape_revid_cache[revision_id] = unescaped
1243
                revision_id = unescaped
1244
2988.2.2 by Robert Collins
Review feedback.
1245
            # Note that unconditionally unescaping means that we deserialise
1246
            # every fileid, which for general 'pull' is not great, but we don't
1247
            # really want to have some many fulltexts that this matters anyway.
1248
            # RBC 20071114.
2988.1.1 by Robert Collins
Refactor fetch's xml inventory parsing into a core routine that extracts the data and a separate one that filters for fetch.
1249
            try:
1250
                file_id = unescape_fileid_cache[file_id]
1251
            except KeyError:
1252
                unescaped = unescape(file_id)
1253
                unescape_fileid_cache[file_id] = unescaped
1254
                file_id = unescaped
1255
1256
            key = (file_id, revision_id)
1257
            setdefault(key, False)
1258
            if revision_id == version_id:
1259
                result[key] = True
1260
        return result
1261
1262
    def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
1263
        revision_ids):
1264
        """Helper routine for fileids_altered_by_revision_ids.
1265
1266
        This performs the translation of xml lines to revision ids.
1267
1268
        :param line_iterator: An iterator of lines, origin_version_id
1269
        :param revision_ids: The revision ids to filter for. This should be a
1270
            set or other type which supports efficient __contains__ lookups, as
1271
            the revision id from each parsed line will be looked up in the
1272
            revision_ids filter.
1273
        :return: a dictionary mapping altered file-ids to an iterable of
1274
        revision_ids. Each altered file-ids has the exact revision_ids that
1275
        altered it listed explicitly.
1276
        """
1277
        result = {}
1278
        setdefault = result.setdefault
1279
        for file_id, revision_id in \
1280
            self._find_text_key_references_from_xml_inventory_lines(
1281
                line_iterator).iterkeys():
2975.3.1 by Robert Collins
Change (without backwards compatibility) the
1282
            # once data is all ensured-consistent; then this is
1283
            # if revision_id == version_id
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
1284
            if revision_id in revision_ids:
1285
                setdefault(file_id, set()).add(revision_id)
1286
        return result
1287
1288
    def fileids_altered_by_revision_ids(self, revision_ids):
1289
        """Find the file ids and versions affected by revisions.
1290
1291
        :param revisions: an iterable containing revision ids.
1292
        :return: a dictionary mapping altered file-ids to an iterable of
1293
        revision_ids. Each altered file-ids has the exact revision_ids that
1294
        altered it listed explicitly.
1295
        """
2592.3.185 by Robert Collins
Merge bzr.dev.
1296
        selected_revision_ids = set(revision_ids)
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
1297
        w = self.get_inventory_weave()
2039.1.1 by Aaron Bentley
Clean up progress properly when interrupted during fetch (#54000)
1298
        pb = ui.ui_factory.nested_progress_bar()
1299
        try:
2592.3.110 by Robert Collins
Filter out texts and signatures not referenced by the revisions being copied during pack to pack fetching.
1300
            return self._find_file_ids_from_xml_inventory_lines(
1301
                w.iter_lines_added_or_present_in_versions(
1302
                    selected_revision_ids, pb=pb),
1303
                selected_revision_ids)
2039.1.1 by Aaron Bentley
Clean up progress properly when interrupted during fetch (#54000)
1304
        finally:
1305
            pb.finished()
1534.4.41 by Robert Collins
Branch now uses BzrDir reasonably sanely.
1306
2708.1.7 by Aaron Bentley
Rename extract_files_bytes to iter_files_bytes
1307
    def iter_files_bytes(self, desired_files):
2708.1.9 by Aaron Bentley
Clean-up docs and imports
1308
        """Iterate through file versions.
1309
2708.1.10 by Aaron Bentley
Update docstrings
1310
        Files will not necessarily be returned in the order they occur in
1311
        desired_files.  No specific order is guaranteed.
1312
2708.1.9 by Aaron Bentley
Clean-up docs and imports
1313
        Yields pairs of identifier, bytes_iterator.  identifier is an opaque
2708.1.10 by Aaron Bentley
Update docstrings
1314
        value supplied by the caller as part of desired_files.  It should
1315
        uniquely identify the file version in the caller's context.  (Examples:
1316
        an index number or a TreeTransform trans_id.)
1317
1318
        bytes_iterator is an iterable of bytestrings for the file.  The
1319
        kind of iterable and length of the bytestrings are unspecified, but for
1320
        this implementation, it is a list of lines produced by
1321
        VersionedFile.get_lines().
1322
2708.1.9 by Aaron Bentley
Clean-up docs and imports
1323
        :param desired_files: a list of (file_id, revision_id, identifier)
2708.1.10 by Aaron Bentley
Update docstrings
1324
            triples
2708.1.9 by Aaron Bentley
Clean-up docs and imports
1325
        """
2708.1.3 by Aaron Bentley
Implement extract_files_bytes on Repository
1326
        transaction = self.get_transaction()
1327
        for file_id, revision_id, callable_data in desired_files:
2708.1.11 by Aaron Bentley
Test and tweak error handling
1328
            try:
1329
                weave = self.weave_store.get_weave(file_id, transaction)
1330
            except errors.NoSuchFile:
1331
                raise errors.NoSuchIdInRepository(self, file_id)
2708.1.6 by Aaron Bentley
Turn extract_files_bytes into an iterator
1332
            yield callable_data, weave.get_lines(revision_id)
2708.1.3 by Aaron Bentley
Implement extract_files_bytes on Repository
1333
3063.2.1 by Robert Collins
Solve reconciling erroring when multiple portions of a single delta chain are being reinserted.
1334
    def _generate_text_key_index(self, text_key_references=None,
1335
        ancestors=None):
2988.1.3 by Robert Collins
Add a new repositoy method _generate_text_key_index for use by reconcile/check.
1336
        """Generate a new text key index for the repository.
1337
1338
        This is an expensive function that will take considerable time to run.
1339
1340
        :return: A dict mapping text keys ((file_id, revision_id) tuples) to a
1341
            list of parents, also text keys. When a given key has no parents,
1342
            the parents list will be [NULL_REVISION].
1343
        """
1344
        # All revisions, to find inventory parents.
3063.2.1 by Robert Collins
Solve reconciling erroring when multiple portions of a single delta chain are being reinserted.
1345
        if ancestors is None:
3287.6.1 by Robert Collins
* ``VersionedFile.get_graph`` is deprecated, with no replacement method.
1346
            graph = self.get_graph()
1347
            ancestors = graph.get_parent_map(self.all_revision_ids())
2951.2.9 by Robert Collins
* ``pack-0.92`` repositories can now be reconciled.
1348
        if text_key_references is None:
1349
            text_key_references = self.find_text_key_references()
2988.3.1 by Robert Collins
Handle the progress bar in _generate_text_key_index correctly.
1350
        pb = ui.ui_factory.nested_progress_bar()
1351
        try:
1352
            return self._do_generate_text_key_index(ancestors,
1353
                text_key_references, pb)
1354
        finally:
1355
            pb.finished()
1356
1357
    def _do_generate_text_key_index(self, ancestors, text_key_references, pb):
1358
        """Helper for _generate_text_key_index to avoid deep nesting."""
2988.1.3 by Robert Collins
Add a new repositoy method _generate_text_key_index for use by reconcile/check.
1359
        revision_order = tsort.topo_sort(ancestors)
1360
        invalid_keys = set()
1361
        revision_keys = {}
1362
        for revision_id in revision_order:
1363
            revision_keys[revision_id] = set()
1364
        text_count = len(text_key_references)
1365
        # a cache of the text keys to allow reuse; costs a dict of all the
1366
        # keys, but saves a 2-tuple for every child of a given key.
1367
        text_key_cache = {}
1368
        for text_key, valid in text_key_references.iteritems():
1369
            if not valid:
1370
                invalid_keys.add(text_key)
1371
            else:
1372
                revision_keys[text_key[1]].add(text_key)
1373
            text_key_cache[text_key] = text_key
1374
        del text_key_references
1375
        text_index = {}
1376
        text_graph = graph.Graph(graph.DictParentsProvider(text_index))
1377
        NULL_REVISION = _mod_revision.NULL_REVISION
2988.1.5 by Robert Collins
Use a LRU cache when generating the text index to reduce inventory deserialisations.
1378
        # Set a cache with a size of 10 - this suffices for bzr.dev but may be
1379
        # too small for large or very branchy trees. However, for 55K path
1380
        # trees, it would be easy to use too much memory trivially. Ideally we
1381
        # could gauge this by looking at available real memory etc, but this is
1382
        # always a tricky proposition.
1383
        inventory_cache = lru_cache.LRUCache(10)
2988.1.3 by Robert Collins
Add a new repositoy method _generate_text_key_index for use by reconcile/check.
1384
        batch_size = 10 # should be ~150MB on a 55K path tree
1385
        batch_count = len(revision_order) / batch_size + 1
1386
        processed_texts = 0
1387
        pb.update("Calculating text parents.", processed_texts, text_count)
1388
        for offset in xrange(batch_count):
1389
            to_query = revision_order[offset * batch_size:(offset + 1) *
1390
                batch_size]
1391
            if not to_query:
1392
                break
1393
            for rev_tree in self.revision_trees(to_query):
1394
                revision_id = rev_tree.get_revision_id()
1395
                parent_ids = ancestors[revision_id]
1396
                for text_key in revision_keys[revision_id]:
1397
                    pb.update("Calculating text parents.", processed_texts)
1398
                    processed_texts += 1
1399
                    candidate_parents = []
1400
                    for parent_id in parent_ids:
1401
                        parent_text_key = (text_key[0], parent_id)
1402
                        try:
1403
                            check_parent = parent_text_key not in \
1404
                                revision_keys[parent_id]
1405
                        except KeyError:
1406
                            # the parent parent_id is a ghost:
1407
                            check_parent = False
1408
                            # truncate the derived graph against this ghost.
1409
                            parent_text_key = None
1410
                        if check_parent:
1411
                            # look at the parent commit details inventories to
1412
                            # determine possible candidates in the per file graph.
1413
                            # TODO: cache here.
2988.1.5 by Robert Collins
Use a LRU cache when generating the text index to reduce inventory deserialisations.
1414
                            try:
1415
                                inv = inventory_cache[parent_id]
1416
                            except KeyError:
1417
                                inv = self.revision_tree(parent_id).inventory
1418
                                inventory_cache[parent_id] = inv
1419
                            parent_entry = inv._byid.get(text_key[0], None)
2988.1.3 by Robert Collins
Add a new repositoy method _generate_text_key_index for use by reconcile/check.
1420
                            if parent_entry is not None:
1421
                                parent_text_key = (
1422
                                    text_key[0], parent_entry.revision)
1423
                            else:
1424
                                parent_text_key = None
1425
                        if parent_text_key is not None:
1426
                            candidate_parents.append(
1427
                                text_key_cache[parent_text_key])
1428
                    parent_heads = text_graph.heads(candidate_parents)
1429
                    new_parents = list(parent_heads)
1430
                    new_parents.sort(key=lambda x:candidate_parents.index(x))
1431
                    if new_parents == []:
1432
                        new_parents = [NULL_REVISION]
1433
                    text_index[text_key] = new_parents
1434
1435
        for text_key in invalid_keys:
1436
            text_index[text_key] = [NULL_REVISION]
1437
        return text_index
1438
2668.2.8 by Andrew Bennetts
Rename get_data_to_fetch_for_revision_ids as item_keys_introduced_by.
1439
    def item_keys_introduced_by(self, revision_ids, _files_pb=None):
1440
        """Get an iterable listing the keys of all the data introduced by a set
1441
        of revision IDs.
1442
1443
        The keys will be ordered so that the corresponding items can be safely
1444
        fetched and inserted in that order.
1445
1446
        :returns: An iterable producing tuples of (knit-kind, file-id,
1447
            versions).  knit-kind is one of 'file', 'inventory', 'signatures',
1448
            'revisions'.  file-id is None unless knit-kind is 'file'.
2535.3.6 by Andrew Bennetts
Move some "what repo data to fetch logic" from RepoFetcher to Repository.
1449
        """
1450
        # XXX: it's a bit weird to control the inventory weave caching in this
2535.3.7 by Andrew Bennetts
Remove now unused _fetch_weave_texts, make progress reporting closer to how it was before I refactored __fetch.
1451
        # generator.  Ideally the caching would be done in fetch.py I think.  Or
1452
        # maybe this generator should explicitly have the contract that it
1453
        # should not be iterated until the previously yielded item has been
1454
        # processed?
2535.3.47 by Andrew Bennetts
Massively improve item_keys_introduced_by performance by calling self.lock_read().
1455
        self.lock_read()
2535.3.6 by Andrew Bennetts
Move some "what repo data to fetch logic" from RepoFetcher to Repository.
1456
        inv_w = self.get_inventory_weave()
1457
        inv_w.enable_cache()
1458
1459
        # file ids that changed
1460
        file_ids = self.fileids_altered_by_revision_ids(revision_ids)
2535.3.8 by Andrew Bennetts
Unbreak progress reporting.
1461
        count = 0
1462
        num_file_ids = len(file_ids)
2535.3.6 by Andrew Bennetts
Move some "what repo data to fetch logic" from RepoFetcher to Repository.
1463
        for file_id, altered_versions in file_ids.iteritems():
2668.2.8 by Andrew Bennetts
Rename get_data_to_fetch_for_revision_ids as item_keys_introduced_by.
1464
            if _files_pb is not None:
1465
                _files_pb.update("fetch texts", count, num_file_ids)
2535.3.8 by Andrew Bennetts
Unbreak progress reporting.
1466
            count += 1
2535.3.6 by Andrew Bennetts
Move some "what repo data to fetch logic" from RepoFetcher to Repository.
1467
            yield ("file", file_id, altered_versions)
2535.3.9 by Andrew Bennetts
More comments.
1468
        # We're done with the files_pb.  Note that it finished by the caller,
1469
        # just as it was created by the caller.
2668.2.8 by Andrew Bennetts
Rename get_data_to_fetch_for_revision_ids as item_keys_introduced_by.
1470
        del _files_pb
2535.3.6 by Andrew Bennetts
Move some "what repo data to fetch logic" from RepoFetcher to Repository.
1471
1472
        # inventory
1473
        yield ("inventory", None, revision_ids)
1474
        inv_w.clear_cache()
1475
1476
        # signatures
2535.3.25 by Andrew Bennetts
Fetch signatures too.
1477
        revisions_with_signatures = set()
1478
        for rev_id in revision_ids:
1479
            try:
1480
                self.get_signature_text(rev_id)
1481
            except errors.NoSuchRevision:
1482
                # not signed.
1483
                pass
1484
            else:
1485
                revisions_with_signatures.add(rev_id)
2535.3.47 by Andrew Bennetts
Massively improve item_keys_introduced_by performance by calling self.lock_read().
1486
        self.unlock()
2535.3.25 by Andrew Bennetts
Fetch signatures too.
1487
        yield ("signatures", None, revisions_with_signatures)
2535.3.6 by Andrew Bennetts
Move some "what repo data to fetch logic" from RepoFetcher to Repository.
1488
1489
        # revisions
1490
        yield ("revisions", None, revision_ids)
1491
1185.65.27 by Robert Collins
Tweak storage towards mergability.
1492
    @needs_read_lock
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1493
    def get_inventory_weave(self):
1494
        return self.control_weaves.get_weave('inventory',
1495
            self.get_transaction())
1496
1185.65.27 by Robert Collins
Tweak storage towards mergability.
1497
    @needs_read_lock
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1498
    def get_inventory(self, revision_id):
3169.2.1 by Robert Collins
New method ``iter_inventories`` on Repository for access to many
1499
        """Get Inventory object by revision id."""
1500
        return self.iter_inventories([revision_id]).next()
1501
1502
    def iter_inventories(self, revision_ids):
1503
        """Get many inventories by revision_ids.
1504
1505
        This will buffer some or all of the texts used in constructing the
1506
        inventories in memory, but will only parse a single inventory at a
1507
        time.
1508
1509
        :return: An iterator of inventories.
1510
        """
1511
        assert None not in revision_ids
1512
        assert _mod_revision.NULL_REVISION not in revision_ids
1513
        return self._iter_inventories(revision_ids)
1514
1515
    def _iter_inventories(self, revision_ids):
1516
        """single-document based inventory iteration."""
1517
        texts = self.get_inventory_weave().get_texts(revision_ids)
1518
        for text, revision_id in zip(texts, revision_ids):
1519
            yield self.deserialise_inventory(revision_id, text)
1740.2.3 by Aaron Bentley
Only reserialize the working tree basis inventory when needed.
1520
1521
    def deserialise_inventory(self, revision_id, xml):
1522
        """Transform the xml into an inventory object. 
1523
1524
        :param revision_id: The expected revision id of the inventory.
1525
        :param xml: A serialised inventory.
1526
        """
3169.2.2 by Robert Collins
Add a test to Repository.deserialise_inventory that the resulting ivnentory is the one asked for, and update relevant tests. Also tweak the model 1 to 2 regenerate inventories logic to use the revision trees parent marker which is more accurate in some cases.
1527
        result = self._serializer.read_inventory_from_string(xml, revision_id)
3169.2.3 by Robert Collins
Use an if, not an assert, as we test with -O.
1528
        if result.revision_id != revision_id:
1529
            raise AssertionError('revision id mismatch %s != %s' % (
1530
                result.revision_id, revision_id))
3169.2.2 by Robert Collins
Add a test to Repository.deserialise_inventory that the resulting ivnentory is the one asked for, and update relevant tests. Also tweak the model 1 to 2 regenerate inventories logic to use the revision trees parent marker which is more accurate in some cases.
1531
        return result
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1532
1910.2.22 by Aaron Bentley
Make commits preserve root entry data
1533
    def serialise_inventory(self, inv):
1910.2.48 by Aaron Bentley
Update from review comments
1534
        return self._serializer.write_inventory_to_string(inv)
1910.2.22 by Aaron Bentley
Make commits preserve root entry data
1535
2817.2.1 by Robert Collins
* Inventory serialisation no longer double-sha's the content.
1536
    def _serialise_inventory_to_lines(self, inv):
1537
        return self._serializer.write_inventory_to_lines(inv)
1538
2520.4.113 by Aaron Bentley
Avoid peeking at Repository._serializer
1539
    def get_serializer_format(self):
1540
        return self._serializer.format_num
1541
1185.65.27 by Robert Collins
Tweak storage towards mergability.
1542
    @needs_read_lock
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1543
    def get_inventory_xml(self, revision_id):
1544
        """Get inventory XML as a file object."""
1545
        try:
2249.5.13 by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids.
1546
            assert isinstance(revision_id, str), type(revision_id)
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1547
            iw = self.get_inventory_weave()
1563.2.18 by Robert Collins
get knit repositories really using knits for text storage.
1548
            return iw.get_text(revision_id)
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1549
        except IndexError:
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
1550
            raise errors.HistoryMissing(self, 'inventory', revision_id)
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1551
1185.65.27 by Robert Collins
Tweak storage towards mergability.
1552
    @needs_read_lock
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1553
    def get_inventory_sha1(self, revision_id):
1554
        """Return the sha1 hash of the inventory entry
1555
        """
1556
        return self.get_revision(revision_id).inventory_sha1
1557
1185.65.27 by Robert Collins
Tweak storage towards mergability.
1558
    @needs_read_lock
3341.2.2 by Alexander Belchenko
Tree.print_file and Repository.print_file are deprecated.
1559
    @deprecated_method(one_four)
2850.3.2 by Robert Collins
Review feedback.
1560
    def get_revision_graph(self, revision_id=None):
1561
        """Return a dictionary containing the revision graph.
1562
1563
        NB: This method should not be used as it accesses the entire graph all
1564
        at once, which is much more data than most operations should require.
1565
1566
        :param revision_id: The revision_id to get a graph from. If None, then
1567
        the entire revision graph is returned. This is a deprecated mode of
1568
        operation and will be removed in the future.
1569
        :return: a dictionary of revision_id->revision_parents_list.
1570
        """
1571
        raise NotImplementedError(self.get_revision_graph)
1572
1573
    @needs_read_lock
3341.2.2 by Alexander Belchenko
Tree.print_file and Repository.print_file are deprecated.
1574
    @deprecated_method(one_three)
1594.2.3 by Robert Collins
bugfix revision.MultipleRevisionSources.get_revision_graph to integrate ghosts between sources. [slow on weaves, fast on knits.
1575
    def get_revision_graph_with_ghosts(self, revision_ids=None):
1576
        """Return a graph of the revisions with ghosts marked as applicable.
1577
1578
        :param revision_ids: an iterable of revisions to graph or None for all.
1579
        :return: a Graph object with the graph reachable from revision_ids.
1580
        """
2745.1.1 by Robert Collins
Add a number of -Devil checkpoints.
1581
        if 'evil' in debug.debug_flags:
2592.3.112 by Robert Collins
Various fixups found dogfooding.
1582
            mutter_callsite(3,
2745.1.1 by Robert Collins
Add a number of -Devil checkpoints.
1583
                "get_revision_graph_with_ghosts scales with size of history.")
2490.2.21 by Aaron Bentley
Rename graph to deprecated_graph
1584
        result = deprecated_graph.Graph()
1594.2.3 by Robert Collins
bugfix revision.MultipleRevisionSources.get_revision_graph to integrate ghosts between sources. [slow on weaves, fast on knits.
1585
        if not revision_ids:
1773.4.2 by Martin Pool
Cleanup of imports; undeprecate all_revision_ids()
1586
            pending = set(self.all_revision_ids())
1594.2.3 by Robert Collins
bugfix revision.MultipleRevisionSources.get_revision_graph to integrate ghosts between sources. [slow on weaves, fast on knits.
1587
            required = set([])
1588
        else:
2858.2.1 by Martin Pool
Remove most calls to safe_file_id and safe_revision_id.
1589
            pending = set(revision_ids)
1836.3.1 by Robert Collins
(robertc) Teach repository.get_revision_graph, and revision.common_ancestor, about NULL_REVISION.
1590
            # special case NULL_REVISION
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
1591
            if _mod_revision.NULL_REVISION in pending:
1592
                pending.remove(_mod_revision.NULL_REVISION)
1836.3.1 by Robert Collins
(robertc) Teach repository.get_revision_graph, and revision.common_ancestor, about NULL_REVISION.
1593
            required = set(pending)
1594.2.3 by Robert Collins
bugfix revision.MultipleRevisionSources.get_revision_graph to integrate ghosts between sources. [slow on weaves, fast on knits.
1594
        done = set([])
1595
        while len(pending):
1596
            revision_id = pending.pop()
1597
            try:
1598
                rev = self.get_revision(revision_id)
1599
            except errors.NoSuchRevision:
1600
                if revision_id in required:
1601
                    raise
1602
                # a ghost
1603
                result.add_ghost(revision_id)
1604
                continue
1605
            for parent_id in rev.parent_ids:
1606
                # is this queued or done ?
1607
                if (parent_id not in pending and
1608
                    parent_id not in done):
1609
                    # no, queue it.
1610
                    pending.add(parent_id)
1611
            result.add_node(revision_id, rev.parent_ids)
1594.2.15 by Robert Collins
Unfuck performance.
1612
            done.add(revision_id)
1594.2.3 by Robert Collins
bugfix revision.MultipleRevisionSources.get_revision_graph to integrate ghosts between sources. [slow on weaves, fast on knits.
1613
        return result
1614
2230.3.54 by Aaron Bentley
Move reverse history iteration to repository
1615
    def iter_reverse_revision_history(self, revision_id):
1616
        """Iterate backwards through revision ids in the lefthand history
1617
1618
        :param revision_id: The revision id to start with.  All its lefthand
1619
            ancestors will be traversed.
1620
        """
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
1621
        graph = self.get_graph()
2230.3.54 by Aaron Bentley
Move reverse history iteration to repository
1622
        next_id = revision_id
1623
        while True:
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
1624
            if next_id in (None, _mod_revision.NULL_REVISION):
1625
                return
2230.3.54 by Aaron Bentley
Move reverse history iteration to repository
1626
            yield next_id
3287.5.10 by Robert Collins
Note iter_reverse_revision_history exception decision.
1627
            # Note: The following line may raise KeyError in the event of
1628
            # truncated history. We decided not to have a try:except:raise
1629
            # RevisionNotPresent here until we see a use for it, because of the
1630
            # cost in an inner loop that is by its very nature O(history).
1631
            # Robert Collins 20080326
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
1632
            parents = graph.get_parent_map([next_id])[next_id]
2230.3.54 by Aaron Bentley
Move reverse history iteration to repository
1633
            if len(parents) == 0:
1634
                return
1635
            else:
1636
                next_id = parents[0]
1637
1594.2.3 by Robert Collins
bugfix revision.MultipleRevisionSources.get_revision_graph to integrate ghosts between sources. [slow on weaves, fast on knits.
1638
    @needs_read_lock
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1639
    def get_revision_inventory(self, revision_id):
1640
        """Return inventory of a past revision."""
1641
        # TODO: Unify this with get_inventory()
1642
        # bzr 0.0.6 and later imposes the constraint that the inventory_id
1643
        # must be the same as its revision, so this is trivial.
1534.4.28 by Robert Collins
first cut at merge from integration.
1644
        if revision_id is None:
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1645
            # This does not make sense: if there is no revision,
1646
            # then it is the current tree inventory surely ?!
1647
            # and thus get_root_id() is something that looks at the last
1648
            # commit on the branch, and the get_root_id is an inventory check.
1649
            raise NotImplementedError
1650
            # return Inventory(self.get_root_id())
1651
        else:
1652
            return self.get_inventory(revision_id)
1653
1185.65.27 by Robert Collins
Tweak storage towards mergability.
1654
    @needs_read_lock
1534.6.3 by Robert Collins
find_repository sufficiently robust.
1655
    def is_shared(self):
1656
        """Return True if this repository is flagged as a shared repository."""
1596.2.12 by Robert Collins
Merge and make Knit Repository use the revision store for all possible queries.
1657
        raise NotImplementedError(self.is_shared)
1534.6.3 by Robert Collins
find_repository sufficiently robust.
1658
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
1659
    @needs_write_lock
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
1660
    def reconcile(self, other=None, thorough=False):
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
1661
        """Reconcile this repository."""
1662
        from bzrlib.reconcile import RepoReconciler
1692.1.1 by Robert Collins
* Repository.reconcile now takes a thorough keyword parameter to allow
1663
        reconciler = RepoReconciler(self, thorough=thorough)
1594.2.7 by Robert Collins
Add versionedfile.fix_parents api for correcting data post hoc.
1664
        reconciler.reconcile()
1665
        return reconciler
2440.1.1 by Martin Pool
Add new Repository.sprout,
1666
2617.6.2 by Robert Collins
Add abort_write_group and wire write_groups into fetch and commit.
1667
    def _refresh_data(self):
1668
        """Helper called from lock_* to ensure coherency with disk.
1669
1670
        The default implementation does nothing; it is however possible
1671
        for repositories to maintain loaded indices across multiple locks
1672
        by checking inside their implementation of this method to see
1673
        whether their indices are still valid. This depends of course on
1674
        the disk format being validatable in this manner.
1675
        """
1676
1534.6.3 by Robert Collins
find_repository sufficiently robust.
1677
    @needs_read_lock
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1678
    def revision_tree(self, revision_id):
1679
        """Return Tree for a revision on this branch.
1680
1852.5.1 by Robert Collins
Deprecate EmptyTree in favour of using Repository.revision_tree.
1681
        `revision_id` may be None for the empty tree revision.
1682
        """
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1683
        # TODO: refactor this to use an existing revision object
1684
        # so we don't need to read it in twice.
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
1685
        if revision_id is None or revision_id == _mod_revision.NULL_REVISION:
1731.1.61 by Aaron Bentley
Merge bzr.dev
1686
            return RevisionTree(self, Inventory(root_id=None), 
1687
                                _mod_revision.NULL_REVISION)
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1688
        else:
1689
            inv = self.get_revision_inventory(revision_id)
1185.65.17 by Robert Collins
Merge from integration, mode-changes are broken.
1690
            return RevisionTree(self, inv, revision_id)
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1691
1185.65.27 by Robert Collins
Tweak storage towards mergability.
1692
    @needs_read_lock
1756.3.3 by Aaron Bentley
More refactoring, introduce revision_trees.
1693
    def revision_trees(self, revision_ids):
1694
        """Return Tree for a revision on this branch.
1695
1756.3.19 by Aaron Bentley
Documentation and cleanups
1696
        `revision_id` may not be None or 'null:'"""
3169.2.1 by Robert Collins
New method ``iter_inventories`` on Repository for access to many
1697
        inventories = self.iter_inventories(revision_ids)
1698
        for inv in inventories:
1699
            yield RevisionTree(self, inv, inv.revision_id)
1756.3.3 by Aaron Bentley
More refactoring, introduce revision_trees.
1700
1701
    @needs_read_lock
2530.1.1 by Aaron Bentley
Make topological sorting optional for get_ancestry
1702
    def get_ancestry(self, revision_id, topo_sorted=True):
1185.66.2 by Aaron Bentley
Moved get_ancestry to RevisionStorage
1703
        """Return a list of revision-ids integrated by a revision.
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
1704
1705
        The first element of the list is always None, indicating the origin 
1706
        revision.  This might change when we have history horizons, or 
1707
        perhaps we should have a new API.
1185.66.2 by Aaron Bentley
Moved get_ancestry to RevisionStorage
1708
        
1709
        This is topologically sorted.
1710
        """
2598.5.1 by Aaron Bentley
Start eliminating the use of None to indicate null revision
1711
        if _mod_revision.is_null(revision_id):
1185.66.2 by Aaron Bentley
Moved get_ancestry to RevisionStorage
1712
            return [None]
1534.4.41 by Robert Collins
Branch now uses BzrDir reasonably sanely.
1713
        if not self.has_revision(revision_id):
1714
            raise errors.NoSuchRevision(self, revision_id)
1185.66.2 by Aaron Bentley
Moved get_ancestry to RevisionStorage
1715
        w = self.get_inventory_weave()
2490.2.33 by Aaron Bentley
Disable topological sorting of get_ancestry where sensible
1716
        candidates = w.get_ancestry(revision_id, topo_sorted)
1594.2.9 by Robert Collins
Teach Knit repositories how to handle ghosts without corrupting at all.
1717
        return [None] + candidates # self._eliminate_revisions_not_present(candidates)
1185.66.2 by Aaron Bentley
Moved get_ancestry to RevisionStorage
1718
2604.2.1 by Robert Collins
(robertc) Introduce a pack command.
1719
    def pack(self):
1720
        """Compress the data within the repository.
1721
1722
        This operation only makes sense for some repository types. For other
1723
        types it should be a no-op that just returns.
1724
1725
        This stub method does not require a lock, but subclasses should use
1726
        @needs_write_lock as this is a long running call its reasonable to 
1727
        implicitly lock for the user.
1728
        """
1729
1185.65.4 by Aaron Bentley
Fixed cat command
1730
    @needs_read_lock
3341.2.2 by Alexander Belchenko
Tree.print_file and Repository.print_file are deprecated.
1731
    @deprecated_method(one_four)
1185.65.4 by Aaron Bentley
Fixed cat command
1732
    def print_file(self, file, revision_id):
1185.65.29 by Robert Collins
Implement final review suggestions.
1733
        """Print `file` to stdout.
1734
        
1735
        FIXME RBC 20060125 as John Meinel points out this is a bad api
1736
        - it writes to stdout, it assumes that that is valid etc. Fix
1737
        by creating a new more flexible convenience function.
1738
        """
1185.65.4 by Aaron Bentley
Fixed cat command
1739
        tree = self.revision_tree(revision_id)
1740
        # use inventory as it was in that revision
1741
        file_id = tree.inventory.path2id(file)
1742
        if not file_id:
1685.1.26 by John Arbash Meinel
Repository had a bug with what exception was raised when a file was missing
1743
            # TODO: jam 20060427 Write a test for this code path
1744
            #       it had a bug in it, and was raising the wrong
1745
            #       exception.
1746
            raise errors.BzrError("%r is not present in revision %s" % (file, revision_id))
1185.65.4 by Aaron Bentley
Fixed cat command
1747
        tree.print_file(file_id)
1748
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1749
    def get_transaction(self):
1750
        return self.control_files.get_transaction()
1751
2249.5.13 by John Arbash Meinel
Finish auditing Repository, and fix generate_ids to always generate utf8 ids.
1752
    def revision_parents(self, revision_id):
1753
        return self.get_inventory_weave().parent_names(revision_id)
1590.1.1 by Robert Collins
Improve common_ancestor performance.
1754
3341.2.2 by Alexander Belchenko
Tree.print_file and Repository.print_file are deprecated.
1755
    @deprecated_method(one_one)
2490.2.13 by Aaron Bentley
Update distinct -> lowest, refactor, add ParentsProvider concept
1756
    def get_parents(self, revision_ids):
2490.2.22 by Aaron Bentley
Rename GraphWalker -> Graph, _AncestryWalker -> _BreadthFirstSearcher
1757
        """See StackedParentsProvider.get_parents"""
3099.3.1 by John Arbash Meinel
Implement get_parent_map for ParentProviders
1758
        parent_map = self.get_parent_map(revision_ids)
1759
        return [parent_map.get(r, None) for r in revision_ids]
1760
1761
    def get_parent_map(self, keys):
1762
        """See graph._StackedParentsProvider.get_parent_map"""
1763
        parent_map = {}
1764
        for revision_id in keys:
2490.2.13 by Aaron Bentley
Update distinct -> lowest, refactor, add ParentsProvider concept
1765
            if revision_id == _mod_revision.NULL_REVISION:
3146.1.2 by Aaron Bentley
ParentsProviders now provide tuples of parents, never lists
1766
                parent_map[revision_id] = ()
2490.2.13 by Aaron Bentley
Update distinct -> lowest, refactor, add ParentsProvider concept
1767
            else:
1768
                try:
3146.1.2 by Aaron Bentley
ParentsProviders now provide tuples of parents, never lists
1769
                    parent_id_list = self.get_revision(revision_id).parent_ids
2490.2.13 by Aaron Bentley
Update distinct -> lowest, refactor, add ParentsProvider concept
1770
                except errors.NoSuchRevision:
3099.3.1 by John Arbash Meinel
Implement get_parent_map for ParentProviders
1771
                    pass
2490.2.13 by Aaron Bentley
Update distinct -> lowest, refactor, add ParentsProvider concept
1772
                else:
3146.1.2 by Aaron Bentley
ParentsProviders now provide tuples of parents, never lists
1773
                    if len(parent_id_list) == 0:
1774
                        parent_ids = (_mod_revision.NULL_REVISION,)
1775
                    else:
1776
                        parent_ids = tuple(parent_id_list)
3099.3.1 by John Arbash Meinel
Implement get_parent_map for ParentProviders
1777
                    parent_map[revision_id] = parent_ids
1778
        return parent_map
2490.2.13 by Aaron Bentley
Update distinct -> lowest, refactor, add ParentsProvider concept
1779
1780
    def _make_parents_provider(self):
1781
        return self
1782
2490.2.21 by Aaron Bentley
Rename graph to deprecated_graph
1783
    def get_graph(self, other_repository=None):
2490.2.13 by Aaron Bentley
Update distinct -> lowest, refactor, add ParentsProvider concept
1784
        """Return the graph walker for this repository format"""
1785
        parents_provider = self._make_parents_provider()
2490.2.14 by Aaron Bentley
Avoid StackedParentsProvider when underlying repos match
1786
        if (other_repository is not None and
3211.3.1 by Jelmer Vernooij
Use convenience function to check whether two repository handles are referring to the same repository.
1787
            not self.has_same_location(other_repository)):
2490.2.21 by Aaron Bentley
Rename graph to deprecated_graph
1788
            parents_provider = graph._StackedParentsProvider(
2490.2.13 by Aaron Bentley
Update distinct -> lowest, refactor, add ParentsProvider concept
1789
                [parents_provider, other_repository._make_parents_provider()])
2490.2.22 by Aaron Bentley
Rename GraphWalker -> Graph, _AncestryWalker -> _BreadthFirstSearcher
1790
        return graph.Graph(parents_provider)
2490.2.13 by Aaron Bentley
Update distinct -> lowest, refactor, add ParentsProvider concept
1791
3036.1.3 by Robert Collins
Privatise VersionedFileChecker.
1792
    def _get_versioned_file_checker(self):
2988.1.6 by Robert Collins
Change the contract for VersionedFileChecker to consolidate related parameters rather than splitting them across two api calls. This allows better reuse of a single checker object.
1793
        """Return an object suitable for checking versioned files."""
3036.1.3 by Robert Collins
Privatise VersionedFileChecker.
1794
        return _VersionedFileChecker(self)
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
1795
3184.1.9 by Robert Collins
* ``Repository.get_data_stream`` is now deprecated in favour of
1796
    def revision_ids_to_search_result(self, result_set):
1797
        """Convert a set of revision ids to a graph SearchResult."""
1798
        result_parents = set()
1799
        for parents in self.get_graph().get_parent_map(
1800
            result_set).itervalues():
1801
            result_parents.update(parents)
1802
        included_keys = result_set.intersection(result_parents)
1803
        start_keys = result_set.difference(included_keys)
1804
        exclude_keys = result_parents.difference(result_set)
1805
        result = graph.SearchResult(start_keys, exclude_keys,
1806
            len(result_set), result_set)
1807
        return result
1808
1185.65.27 by Robert Collins
Tweak storage towards mergability.
1809
    @needs_write_lock
1534.6.5 by Robert Collins
Cloning of repos preserves shared and make-working-tree attributes.
1810
    def set_make_working_trees(self, new_value):
1811
        """Set the policy flag for making working trees when creating branches.
1812
1813
        This only applies to branches that use this repository.
1814
1815
        The default is 'True'.
1816
        :param new_value: True to restore the default, False to disable making
1817
                          working trees.
1818
        """
1596.2.12 by Robert Collins
Merge and make Knit Repository use the revision store for all possible queries.
1819
        raise NotImplementedError(self.set_make_working_trees)
1534.6.5 by Robert Collins
Cloning of repos preserves shared and make-working-tree attributes.
1820
    
1821
    def make_working_trees(self):
1822
        """Returns the policy for making working trees on new branches."""
1596.2.12 by Robert Collins
Merge and make Knit Repository use the revision store for all possible queries.
1823
        raise NotImplementedError(self.make_working_trees)
1534.6.5 by Robert Collins
Cloning of repos preserves shared and make-working-tree attributes.
1824
1825
    @needs_write_lock
1185.65.1 by Aaron Bentley
Refactored out ControlFiles and RevisionStore from _Branch
1826
    def sign_revision(self, revision_id, gpg_strategy):
1827
        plaintext = Testament.from_revision(self, revision_id).as_short_text()
1828
        self.store_revision_signature(gpg_strategy, plaintext, revision_id)
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
1829
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
1830
    @needs_read_lock
1831
    def has_signature_for_revision_id(self, revision_id):
1832
        """Query for a revision signature for revision_id in the repository."""
1833
        return self._revision_store.has_signature(revision_id,
1834
                                                  self.get_transaction())
1835
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
1836
    @needs_read_lock
1837
    def get_signature_text(self, revision_id):
1838
        """Return the text for a signature."""
1839
        return self._revision_store.get_signature_text(revision_id,
1840
                                                       self.get_transaction())
1841
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
1842
    @needs_read_lock
2745.6.36 by Andrew Bennetts
Deprecate revision_ids arg to Repository.check and other tweaks.
1843
    def check(self, revision_ids=None):
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
1844
        """Check consistency of all history of given revision_ids.
1845
1846
        Different repository implementations should override _check().
1847
1848
        :param revision_ids: A non-empty list of revision_ids whose ancestry
1849
             will be checked.  Typically the last revision_id of a branch.
1850
        """
1851
        return self._check(revision_ids)
1852
1853
    def _check(self, revision_ids):
1773.4.1 by Martin Pool
Add pyflakes makefile target; fix many warnings
1854
        result = check.Check(self)
1732.2.4 by Martin Pool
Split check into Branch.check and Repository.check
1855
        result.check()
1856
        return result
1857
1904.2.3 by Martin Pool
Give a warning on access to old repository formats
1858
    def _warn_if_deprecated(self):
1904.2.5 by Martin Pool
Fix format warning inside test suite and add test
1859
        global _deprecation_warning_done
1860
        if _deprecation_warning_done:
1861
            return
1862
        _deprecation_warning_done = True
1904.2.3 by Martin Pool
Give a warning on access to old repository formats
1863
        warning("Format %s for %s is deprecated - please use 'bzr upgrade' to get better performance"
1864
                % (self._format, self.bzrdir.transport.base))
1865
1910.2.63 by Aaron Bentley
Add supports_rich_root member to repository
1866
    def supports_rich_root(self):
1867
        return self._format.rich_root_data
1868
2150.2.2 by Robert Collins
Change the commit builder selected-revision-id test to use a unicode revision id where possible, leading to stricter testing of the hypothetical unicode revision id support in bzr.
1869
    def _check_ascii_revisionid(self, revision_id, method):
1870
        """Private helper for ascii-only repositories."""
1871
        # weave repositories refuse to store revisionids that are non-ascii.
1872
        if revision_id is not None:
1873
            # weaves require ascii revision ids.
1874
            if isinstance(revision_id, unicode):
1875
                try:
1876
                    revision_id.encode('ascii')
1877
                except UnicodeEncodeError:
1878
                    raise errors.NonAsciiRevisionId(method, self)
2249.5.12 by John Arbash Meinel
Change the APIs for VersionedFile, Store, and some of Repository into utf-8
1879
            else:
1880
                try:
1881
                    revision_id.decode('ascii')
1882
                except UnicodeDecodeError:
1883
                    raise errors.NonAsciiRevisionId(method, self)
2819.2.4 by Andrew Bennetts
Add a 'revision_graph_can_have_wrong_parents' method to repository.
1884
    
1885
    def revision_graph_can_have_wrong_parents(self):
1886
        """Is it possible for this repository to have a revision graph with
1887
        incorrect parents?
2150.2.2 by Robert Collins
Change the commit builder selected-revision-id test to use a unicode revision id where possible, leading to stricter testing of the hypothetical unicode revision id support in bzr.
1888
2819.2.4 by Andrew Bennetts
Add a 'revision_graph_can_have_wrong_parents' method to repository.
1889
        If True, then this repository must also implement
1890
        _find_inconsistent_revision_parents so that check and reconcile can
1891
        check for inconsistencies before proceeding with other checks that may
1892
        depend on the revision index being consistent.
1893
        """
1894
        raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
3184.1.9 by Robert Collins
* ``Repository.get_data_stream`` is now deprecated in favour of
1895
1896
2241.1.18 by mbp at sourcefrog
Restore use of deprecating delegator for old formats in bzrlib.repository.
1897
# remove these delegates a while after bzr 0.15
1898
def __make_delegated(name, from_module):
1899
    def _deprecated_repository_forwarder():
1900
        symbol_versioning.warn('%s moved to %s in bzr 0.15'
1901
            % (name, from_module),
2241.1.20 by mbp at sourcefrog
update tests for new locations of weave repos
1902
            DeprecationWarning,
1903
            stacklevel=2)
2241.1.18 by mbp at sourcefrog
Restore use of deprecating delegator for old formats in bzrlib.repository.
1904
        m = __import__(from_module, globals(), locals(), [name])
1905
        try:
1906
            return getattr(m, name)
1907
        except AttributeError:
1908
            raise AttributeError('module %s has no name %s'
1909
                    % (m, name))
1910
    globals()[name] = _deprecated_repository_forwarder
1911
1912
for _name in [
1913
        'AllInOneRepository',
1914
        'WeaveMetaDirRepository',
1915
        'PreSplitOutRepositoryFormat',
1916
        'RepositoryFormat4',
1917
        'RepositoryFormat5',
1918
        'RepositoryFormat6',
1919
        'RepositoryFormat7',
1920
        ]:
1921
    __make_delegated(_name, 'bzrlib.repofmt.weaverepo')
1922
1923
for _name in [
1924
        'KnitRepository',
1925
        'RepositoryFormatKnit',
1926
        'RepositoryFormatKnit1',
1927
        ]:
1928
    __make_delegated(_name, 'bzrlib.repofmt.knitrepo')
1929
1930
2996.2.2 by Aaron Bentley
Create install_revisions function
1931
def install_revision(repository, rev, revision_tree):
1932
    """Install all revision data into a repository."""
1933
    install_revisions(repository, [(rev, revision_tree, None)])
1934
1935
3146.6.1 by Aaron Bentley
InterDifferingSerializer shows a progress bar
1936
def install_revisions(repository, iterable, num_revisions=None, pb=None):
2996.2.4 by Aaron Bentley
Rename function to add_signature_text
1937
    """Install all revision data into a repository.
1938
1939
    Accepts an iterable of revision, tree, signature tuples.  The signature
1940
    may be None.
1941
    """
2592.3.96 by Robert Collins
Merge index improvements (includes bzr.dev).
1942
    repository.start_write_group()
1943
    try:
3146.6.1 by Aaron Bentley
InterDifferingSerializer shows a progress bar
1944
        for n, (revision, revision_tree, signature) in enumerate(iterable):
2996.2.2 by Aaron Bentley
Create install_revisions function
1945
            _install_revision(repository, revision, revision_tree, signature)
3146.6.1 by Aaron Bentley
InterDifferingSerializer shows a progress bar
1946
            if pb is not None:
1947
                pb.update('Transferring revisions', n + 1, num_revisions)
2592.3.96 by Robert Collins
Merge index improvements (includes bzr.dev).
1948
    except:
1949
        repository.abort_write_group()
2592.3.101 by Robert Collins
Correctly propogate exceptions from repository.install_revisions.
1950
        raise
2592.3.96 by Robert Collins
Merge index improvements (includes bzr.dev).
1951
    else:
1952
        repository.commit_write_group()
1953
1954
2996.2.1 by Aaron Bentley
Add KnitRepositoryFormat4
1955
def _install_revision(repository, rev, revision_tree, signature):
2592.3.96 by Robert Collins
Merge index improvements (includes bzr.dev).
1956
    """Install all revision data into a repository."""
1185.82.84 by Aaron Bentley
Moved stuff around
1957
    present_parents = []
1958
    parent_trees = {}
1959
    for p_id in rev.parent_ids:
1960
        if repository.has_revision(p_id):
1961
            present_parents.append(p_id)
1962
            parent_trees[p_id] = repository.revision_tree(p_id)
1963
        else:
1852.5.1 by Robert Collins
Deprecate EmptyTree in favour of using Repository.revision_tree.
1964
            parent_trees[p_id] = repository.revision_tree(None)
1185.82.84 by Aaron Bentley
Moved stuff around
1965
1966
    inv = revision_tree.inventory
1910.2.51 by Aaron Bentley
Bundles now corrupt repositories
1967
    entries = inv.iter_entries()
2617.6.6 by Robert Collins
Some review feedback.
1968
    # backwards compatibility hack: skip the root id.
1910.2.63 by Aaron Bentley
Add supports_rich_root member to repository
1969
    if not repository.supports_rich_root():
1910.2.60 by Aaron Bentley
Ensure that new-model revisions aren't installed into old-model repos
1970
        path, root = entries.next()
1971
        if root.revision != rev.revision_id:
1910.2.63 by Aaron Bentley
Add supports_rich_root member to repository
1972
            raise errors.IncompatibleRevision(repr(repository))
1185.82.84 by Aaron Bentley
Moved stuff around
1973
    # Add the texts that are not already present
1852.6.3 by Robert Collins
Make iter(Tree) consistent for all tree types.
1974
    for path, ie in entries:
1185.82.84 by Aaron Bentley
Moved stuff around
1975
        w = repository.weave_store.get_weave_or_empty(ie.file_id,
1976
                repository.get_transaction())
1977
        if ie.revision not in w:
1978
            text_parents = []
1740.2.2 by Aaron Bentley
Add test for the basis inventory automatically adding the revision id.
1979
            # FIXME: TODO: The following loop *may* be overlapping/duplicate
1759.2.1 by Jelmer Vernooij
Fix some types (found using aspell).
1980
            # with InventoryEntry.find_previous_heads(). if it is, then there
1740.2.2 by Aaron Bentley
Add test for the basis inventory automatically adding the revision id.
1981
            # is a latent bug here where the parents may have ancestors of each
1982
            # other. RBC, AB
1185.82.84 by Aaron Bentley
Moved stuff around
1983
            for revision, tree in parent_trees.iteritems():
1984
                if ie.file_id not in tree:
1985
                    continue
1986
                parent_id = tree.inventory[ie.file_id].revision
1987
                if parent_id in text_parents:
1988
                    continue
1989
                text_parents.append(parent_id)
1990
                    
1991
            vfile = repository.weave_store.get_weave_or_empty(ie.file_id, 
1992
                repository.get_transaction())
1993
            lines = revision_tree.get_file(ie.file_id).readlines()
1994
            vfile.add_lines(rev.revision_id, text_parents, lines)
1995
    try:
1996
        # install the inventory
1997
        repository.add_inventory(rev.revision_id, inv, present_parents)
1998
    except errors.RevisionAlreadyPresent:
1999
        pass
2996.2.1 by Aaron Bentley
Add KnitRepositoryFormat4
2000
    if signature is not None:
2996.2.8 by Aaron Bentley
Fix add_signature discrepancies
2001
        repository.add_signature_text(rev.revision_id, signature)
1185.82.84 by Aaron Bentley
Moved stuff around
2002
    repository.add_revision(rev.revision_id, rev, inv)
2003
2004
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2005
class MetaDirRepository(Repository):
2006
    """Repositories in the new meta-dir layout."""
2007
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
2008
    def __init__(self, _format, a_bzrdir, control_files, _revision_store, control_store, text_store):
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2009
        super(MetaDirRepository, self).__init__(_format,
2010
                                                a_bzrdir,
2011
                                                control_files,
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
2012
                                                _revision_store,
1563.2.23 by Robert Collins
Add add_revision and get_revision methods to RevisionStore
2013
                                                control_store,
1563.2.17 by Robert Collins
Change knits repositories to use a knit versioned file store for file texts.
2014
                                                text_store)
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2015
        dir_mode = self.control_files._dir_mode
2016
        file_mode = self.control_files._file_mode
2017
1596.2.12 by Robert Collins
Merge and make Knit Repository use the revision store for all possible queries.
2018
    @needs_read_lock
2019
    def is_shared(self):
2020
        """Return True if this repository is flagged as a shared repository."""
2021
        return self.control_files._transport.has('shared-storage')
2022
2023
    @needs_write_lock
2024
    def set_make_working_trees(self, new_value):
2025
        """Set the policy flag for making working trees when creating branches.
2026
2027
        This only applies to branches that use this repository.
2028
2029
        The default is 'True'.
2030
        :param new_value: True to restore the default, False to disable making
2031
                          working trees.
2032
        """
2033
        if new_value:
2034
            try:
2035
                self.control_files._transport.delete('no-working-trees')
2036
            except errors.NoSuchFile:
2037
                pass
2038
        else:
2039
            self.control_files.put_utf8('no-working-trees', '')
2040
    
2041
    def make_working_trees(self):
2042
        """Returns the policy for making working trees on new branches."""
2043
        return not self.control_files._transport.has('no-working-trees')
2044
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2045
2241.1.2 by Martin Pool
change to using external Repository format registry
2046
class RepositoryFormatRegistry(registry.Registry):
2889.1.1 by Robert Collins
* The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into
2047
    """Registry of RepositoryFormats."""
2241.1.11 by Martin Pool
Get rid of RepositoryFormat*_instance objects. Instead the format
2048
2049
    def get(self, format_string):
2050
        r = registry.Registry.get(self, format_string)
2051
        if callable(r):
2052
            r = r()
2053
        return r
2241.1.2 by Martin Pool
change to using external Repository format registry
2054
    
2055
2056
format_registry = RepositoryFormatRegistry()
2241.1.11 by Martin Pool
Get rid of RepositoryFormat*_instance objects. Instead the format
2057
"""Registry of formats, indexed by their identifying format string.
2058
2059
This can contain either format instances themselves, or classes/factories that
2060
can be called to obtain one.
2061
"""
2241.1.2 by Martin Pool
change to using external Repository format registry
2062
2220.2.3 by Martin Pool
Add tag: revision namespace.
2063
2064
#####################################################################
2065
# Repository Formats
1910.2.46 by Aaron Bentley
Whitespace fix
2066
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2067
class RepositoryFormat(object):
2068
    """A repository format.
2069
2070
    Formats provide three things:
2071
     * An initialization routine to construct repository data on disk.
2072
     * a format string which is used when the BzrDir supports versioned
2073
       children.
2074
     * an open routine which returns a Repository instance.
2075
2889.1.2 by Robert Collins
Review feedback.
2076
    There is one and only one Format subclass for each on-disk format. But
2077
    there can be one Repository subclass that is used for several different
2078
    formats. The _format attribute on a Repository instance can be used to
2079
    determine the disk format.
2889.1.1 by Robert Collins
* The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into
2080
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2081
    Formats are placed in an dict by their format string for reference 
2082
    during opening. These should be subclasses of RepositoryFormat
2083
    for consistency.
2084
2085
    Once a format is deprecated, just deprecate the initialize and open
2086
    methods on the format class. Do not deprecate the object, as the 
2087
    object will be created every system load.
2088
2089
    Common instance attributes:
2090
    _matchingbzrdir - the bzrdir format that the repository format was
2091
    originally written to work with. This can be used if manually
2092
    constructing a bzrdir and repository, or more commonly for test suite
3128.1.3 by Vincent Ladeuil
Since we are there s/parameteris.*/parameteriz&/.
2093
    parameterization.
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2094
    """
2095
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2096
    # Set to True or False in derived classes. True indicates that the format
2097
    # supports ghosts gracefully.
2098
    supports_ghosts = None
3221.3.1 by Robert Collins
* Repository formats have a new supported-feature attribute
2099
    # Can this repository be given external locations to lookup additional
2100
    # data. Set to True or False in derived classes.
2101
    supports_external_lookups = None
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2102
1904.2.3 by Martin Pool
Give a warning on access to old repository formats
2103
    def __str__(self):
2104
        return "<%s>" % self.__class__.__name__
2105
2241.1.11 by Martin Pool
Get rid of RepositoryFormat*_instance objects. Instead the format
2106
    def __eq__(self, other):
2107
        # format objects are generally stateless
2108
        return isinstance(other, self.__class__)
2109
2100.3.35 by Aaron Bentley
equality operations on bzrdir
2110
    def __ne__(self, other):
2100.3.31 by Aaron Bentley
Merged bzr.dev (17 tests failing)
2111
        return not self == other
2112
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2113
    @classmethod
1534.4.47 by Robert Collins
Split out repository into .bzr/repository
2114
    def find_format(klass, a_bzrdir):
2241.1.1 by Martin Pool
Change RepositoryFormat to use a Registry rather than ad-hoc dictionary
2115
        """Return the format for the repository object in a_bzrdir.
2116
        
2117
        This is used by bzr native formats that have a "format" file in
2118
        the repository.  Other methods may be used by different types of 
2119
        control directory.
2120
        """
1534.4.47 by Robert Collins
Split out repository into .bzr/repository
2121
        try:
2122
            transport = a_bzrdir.get_repository_transport(None)
2123
            format_string = transport.get("format").read()
2241.1.2 by Martin Pool
change to using external Repository format registry
2124
            return format_registry.get(format_string)
1534.4.47 by Robert Collins
Split out repository into .bzr/repository
2125
        except errors.NoSuchFile:
2126
            raise errors.NoRepositoryPresent(a_bzrdir)
2127
        except KeyError:
3246.3.2 by Daniel Watkins
Modified uses of errors.UnknownFormatError.
2128
            raise errors.UnknownFormatError(format=format_string,
2129
                                            kind='repository')
1534.4.47 by Robert Collins
Split out repository into .bzr/repository
2130
2241.1.1 by Martin Pool
Change RepositoryFormat to use a Registry rather than ad-hoc dictionary
2131
    @classmethod
2241.1.2 by Martin Pool
change to using external Repository format registry
2132
    def register_format(klass, format):
2133
        format_registry.register(format.get_format_string(), format)
2241.1.1 by Martin Pool
Change RepositoryFormat to use a Registry rather than ad-hoc dictionary
2134
2135
    @classmethod
2136
    def unregister_format(klass, format):
2241.1.2 by Martin Pool
change to using external Repository format registry
2137
        format_registry.remove(format.get_format_string())
1563.2.23 by Robert Collins
Add add_revision and get_revision methods to RevisionStore
2138
    
1534.4.47 by Robert Collins
Split out repository into .bzr/repository
2139
    @classmethod
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2140
    def get_default_format(klass):
2141
        """Return the current default format."""
2204.5.3 by Aaron Bentley
zap old repository default handling
2142
        from bzrlib import bzrdir
2143
        return bzrdir.format_registry.make_bzrdir('default').repository_format
2241.1.1 by Martin Pool
Change RepositoryFormat to use a Registry rather than ad-hoc dictionary
2144
2145
    def _get_control_store(self, repo_transport, control_files):
2146
        """Return the control store for this repository."""
2147
        raise NotImplementedError(self._get_control_store)
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2148
2149
    def get_format_string(self):
2150
        """Return the ASCII format string that identifies this format.
2151
        
2152
        Note that in pre format ?? repositories the format string is 
2153
        not permitted nor written to disk.
2154
        """
2155
        raise NotImplementedError(self.get_format_string)
2156
1624.3.19 by Olaf Conradi
New call get_format_description to give a user-friendly description of a
2157
    def get_format_description(self):
1759.2.1 by Jelmer Vernooij
Fix some types (found using aspell).
2158
        """Return the short description for this format."""
1624.3.19 by Olaf Conradi
New call get_format_description to give a user-friendly description of a
2159
        raise NotImplementedError(self.get_format_description)
2160
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2161
    def _get_revision_store(self, repo_transport, control_files):
2162
        """Return the revision store object for this a_bzrdir."""
1556.1.5 by Robert Collins
Review feedback.
2163
        raise NotImplementedError(self._get_revision_store)
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2164
1563.2.22 by Robert Collins
Move responsibility for repository.has_revision into RevisionStore
2165
    def _get_text_rev_store(self,
2166
                            transport,
2167
                            control_files,
2168
                            name,
2169
                            compressed=True,
1563.2.28 by Robert Collins
Add total_size to the revision_store api.
2170
                            prefixed=False,
2171
                            serializer=None):
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2172
        """Common logic for getting a revision store for a repository.
2173
        
1563.2.17 by Robert Collins
Change knits repositories to use a knit versioned file store for file texts.
2174
        see self._get_revision_store for the subclass-overridable method to 
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2175
        get the store for a repository.
2176
        """
1563.2.22 by Robert Collins
Move responsibility for repository.has_revision into RevisionStore
2177
        from bzrlib.store.revision.text import TextRevisionStore
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2178
        dir_mode = control_files._dir_mode
2179
        file_mode = control_files._file_mode
2220.2.2 by Martin Pool
Add tag command and basic implementation
2180
        text_store = TextStore(transport.clone(name),
1563.2.22 by Robert Collins
Move responsibility for repository.has_revision into RevisionStore
2181
                              prefixed=prefixed,
2182
                              compressed=compressed,
2183
                              dir_mode=dir_mode,
2184
                              file_mode=file_mode)
1563.2.29 by Robert Collins
Remove all but fetch references to repository.revision_store.
2185
        _revision_store = TextRevisionStore(text_store, serializer)
2186
        return _revision_store
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2187
2241.1.6 by Martin Pool
Move Knit repositories into the submodule bzrlib.repofmt.knitrepo and
2188
    # TODO: this shouldn't be in the base class, it's specific to things that
2189
    # use weaves or knits -- mbp 20070207
1563.2.17 by Robert Collins
Change knits repositories to use a knit versioned file store for file texts.
2190
    def _get_versioned_file_store(self,
2191
                                  name,
2192
                                  transport,
2193
                                  control_files,
2194
                                  prefixed=True,
2241.1.10 by Martin Pool
Remove more references to weaves from the repository.py file
2195
                                  versionedfile_class=None,
1946.2.5 by John Arbash Meinel
Make knit stores delay creation, but not control stores
2196
                                  versionedfile_kwargs={},
1608.2.12 by Martin Pool
Store-escaping must quote uppercase characters too, so that they're safely
2197
                                  escaped=False):
2241.1.10 by Martin Pool
Remove more references to weaves from the repository.py file
2198
        if versionedfile_class is None:
2199
            versionedfile_class = self._versionedfile_class
1563.2.17 by Robert Collins
Change knits repositories to use a knit versioned file store for file texts.
2200
        weave_transport = control_files._transport.clone(name)
2201
        dir_mode = control_files._dir_mode
2202
        file_mode = control_files._file_mode
2203
        return VersionedFileStore(weave_transport, prefixed=prefixed,
1608.2.12 by Martin Pool
Store-escaping must quote uppercase characters too, so that they're safely
2204
                                  dir_mode=dir_mode,
2205
                                  file_mode=file_mode,
2206
                                  versionedfile_class=versionedfile_class,
1946.2.5 by John Arbash Meinel
Make knit stores delay creation, but not control stores
2207
                                  versionedfile_kwargs=versionedfile_kwargs,
1608.2.12 by Martin Pool
Store-escaping must quote uppercase characters too, so that they're safely
2208
                                  escaped=escaped)
1563.2.17 by Robert Collins
Change knits repositories to use a knit versioned file store for file texts.
2209
1534.6.1 by Robert Collins
allow API creation of shared repositories
2210
    def initialize(self, a_bzrdir, shared=False):
2211
        """Initialize a repository of this format in a_bzrdir.
2212
2213
        :param a_bzrdir: The bzrdir to put the new repository in it.
2214
        :param shared: The repository should be initialized as a sharable one.
1752.2.52 by Andrew Bennetts
Flesh out more Remote* methods needed to open and initialise remote branches/trees/repositories.
2215
        :returns: The new repository object.
2216
        
1534.6.1 by Robert Collins
allow API creation of shared repositories
2217
        This may raise UninitializableFormat if shared repository are not
2218
        compatible the a_bzrdir.
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2219
        """
1752.2.52 by Andrew Bennetts
Flesh out more Remote* methods needed to open and initialise remote branches/trees/repositories.
2220
        raise NotImplementedError(self.initialize)
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2221
2222
    def is_supported(self):
2223
        """Is this format supported?
2224
2225
        Supported formats must be initializable and openable.
2226
        Unsupported formats may not support initialization or committing or 
2227
        some other features depending on the reason for not being supported.
2228
        """
2229
        return True
2230
1910.2.12 by Aaron Bentley
Implement knit repo format 2
2231
    def check_conversion_target(self, target_format):
2232
        raise NotImplementedError(self.check_conversion_target)
2233
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2234
    def open(self, a_bzrdir, _found=False):
2235
        """Return an instance of this format for the bzrdir a_bzrdir.
2236
        
2237
        _found is a private parameter, do not use it.
2238
        """
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2239
        raise NotImplementedError(self.open)
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2240
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2241
2242
class MetaDirRepositoryFormat(RepositoryFormat):
1759.2.1 by Jelmer Vernooij
Fix some types (found using aspell).
2243
    """Common base class for the new repositories using the metadir layout."""
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2244
1910.2.14 by Aaron Bentley
Fail when trying to use interrepository on Knit2 and Knit1
2245
    rich_root_data = False
2323.5.17 by Martin Pool
Add supports_tree_reference to all repo formats (robert)
2246
    supports_tree_reference = False
3221.3.1 by Robert Collins
* Repository formats have a new supported-feature attribute
2247
    supports_external_lookups = False
2241.1.11 by Martin Pool
Get rid of RepositoryFormat*_instance objects. Instead the format
2248
    _matchingbzrdir = bzrdir.BzrDirMetaFormat1()
1910.2.14 by Aaron Bentley
Fail when trying to use interrepository on Knit2 and Knit1
2249
1556.1.4 by Robert Collins
Add a new format for what will become knit, and the surrounding logic to upgrade repositories within metadirs, and tests for the same.
2250
    def __init__(self):
2251
        super(MetaDirRepositoryFormat, self).__init__()
2252
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2253
    def _create_control_files(self, a_bzrdir):
2254
        """Create the required files and the initial control_files object."""
1759.2.2 by Jelmer Vernooij
Revert some of my spelling fixes and fix some typos after review by Aaron.
2255
        # FIXME: RBC 20060125 don't peek under the covers
1534.4.47 by Robert Collins
Split out repository into .bzr/repository
2256
        # NB: no need to escape relative paths that are url safe.
2257
        repository_transport = a_bzrdir.get_repository_transport(self)
1996.3.4 by John Arbash Meinel
lazy_import bzrlib/repository.py
2258
        control_files = lockable_files.LockableFiles(repository_transport,
2259
                                'lock', lockdir.LockDir)
1553.5.61 by Martin Pool
Locks protecting LockableFiles must now be explicitly created before use.
2260
        control_files.create_lock()
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2261
        return control_files
2262
2263
    def _upload_blank_content(self, a_bzrdir, dirs, files, utf8_files, shared):
2264
        """Upload the initial blank content."""
2265
        control_files = self._create_control_files(a_bzrdir)
1534.4.47 by Robert Collins
Split out repository into .bzr/repository
2266
        control_files.lock_write()
2267
        try:
1553.5.49 by Martin Pool
Use LockDirs for repo format 7
2268
            control_files._transport.mkdir_multi(dirs,
2269
                    mode=control_files._dir_mode)
1534.4.47 by Robert Collins
Split out repository into .bzr/repository
2270
            for file, content in files:
2271
                control_files.put(file, content)
2272
            for file, content in utf8_files:
2273
                control_files.put_utf8(file, content)
1534.6.1 by Robert Collins
allow API creation of shared repositories
2274
            if shared == True:
2275
                control_files.put_utf8('shared-storage', '')
1534.4.47 by Robert Collins
Split out repository into .bzr/repository
2276
        finally:
2277
            control_files.unlock()
1556.1.3 by Robert Collins
Rearrangment of Repository logic to be less type code driven, and bugfix InterRepository.missing_revision_ids
2278
2279
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2280
# formats which have no format string are not discoverable
2241.1.4 by Martin Pool
Moved old weave-based repository formats into bzrlib.repofmt.weaverepo.
2281
# and not independently creatable, so are not registered.  They're 
2241.1.11 by Martin Pool
Get rid of RepositoryFormat*_instance objects. Instead the format
2282
# all in bzrlib.repofmt.weaverepo now.  When an instance of one of these is
2283
# needed, it's constructed directly by the BzrDir.  Non-native formats where
2284
# the repository is not separately opened are similar.
2285
2241.1.4 by Martin Pool
Moved old weave-based repository formats into bzrlib.repofmt.weaverepo.
2286
format_registry.register_lazy(
2287
    'Bazaar-NG Repository format 7',
2288
    'bzrlib.repofmt.weaverepo',
2241.1.11 by Martin Pool
Get rid of RepositoryFormat*_instance objects. Instead the format
2289
    'RepositoryFormat7'
2241.1.4 by Martin Pool
Moved old weave-based repository formats into bzrlib.repofmt.weaverepo.
2290
    )
2592.3.22 by Robert Collins
Add new experimental repository formats.
2291
2241.1.6 by Martin Pool
Move Knit repositories into the submodule bzrlib.repofmt.knitrepo and
2292
format_registry.register_lazy(
2293
    'Bazaar-NG Knit Repository Format 1',
2294
    'bzrlib.repofmt.knitrepo',
2241.1.11 by Martin Pool
Get rid of RepositoryFormat*_instance objects. Instead the format
2295
    'RepositoryFormatKnit1',
2241.1.6 by Martin Pool
Move Knit repositories into the submodule bzrlib.repofmt.knitrepo and
2296
    )
2297
2241.1.5 by Martin Pool
Move KnitFormat2 into repofmt
2298
format_registry.register_lazy(
2255.2.230 by Robert Collins
Update tree format signatures to mention introducing bzr version.
2299
    'Bazaar Knit Repository Format 3 (bzr 0.15)\n',
2100.3.31 by Aaron Bentley
Merged bzr.dev (17 tests failing)
2300
    'bzrlib.repofmt.knitrepo',
2301
    'RepositoryFormatKnit3',
2302
    )
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2303
2996.2.1 by Aaron Bentley
Add KnitRepositoryFormat4
2304
format_registry.register_lazy(
2305
    'Bazaar Knit Repository Format 4 (bzr 1.0)\n',
2306
    'bzrlib.repofmt.knitrepo',
2307
    'RepositoryFormatKnit4',
2308
    )
2309
2939.2.1 by Ian Clatworthy
use 'knitpack' naming instead of 'experimental' for pack formats
2310
# Pack-based formats. There is one format for pre-subtrees, and one for
2311
# post-subtrees to allow ease of testing.
3152.2.1 by Robert Collins
* A new repository format 'development' has been added. This format will
2312
# NOTE: These are experimental in 0.92. Stable in 1.0 and above
2592.3.22 by Robert Collins
Add new experimental repository formats.
2313
format_registry.register_lazy(
2939.2.6 by Ian Clatworthy
more review feedback from lifeless and poolie
2314
    'Bazaar pack repository format 1 (needs bzr 0.92)\n',
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2315
    'bzrlib.repofmt.pack_repo',
2592.3.224 by Martin Pool
Rename GraphKnitRepository etc to KnitPackRepository
2316
    'RepositoryFormatKnitPack1',
2592.3.22 by Robert Collins
Add new experimental repository formats.
2317
    )
2318
format_registry.register_lazy(
2939.2.6 by Ian Clatworthy
more review feedback from lifeless and poolie
2319
    'Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n',
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2320
    'bzrlib.repofmt.pack_repo',
2592.3.224 by Martin Pool
Rename GraphKnitRepository etc to KnitPackRepository
2321
    'RepositoryFormatKnitPack3',
2592.3.22 by Robert Collins
Add new experimental repository formats.
2322
    )
2996.2.11 by Aaron Bentley
Implement rich-root-pack format ( #164639)
2323
format_registry.register_lazy(
2324
    'Bazaar pack repository format 1 with rich root (needs bzr 1.0)\n',
2325
    'bzrlib.repofmt.pack_repo',
2326
    'RepositoryFormatKnitPack4',
2327
    )
3152.2.1 by Robert Collins
* A new repository format 'development' has been added. This format will
2328
# Development formats. 
3152.2.3 by Robert Collins
Merge up with bzr.dev.
2329
# 1.2->1.3
3152.2.1 by Robert Collins
* A new repository format 'development' has been added. This format will
2330
# development 0 - stub to introduce development versioning scheme.
2331
format_registry.register_lazy(
3152.2.3 by Robert Collins
Merge up with bzr.dev.
2332
    "Bazaar development format 0 (needs bzr.dev from before 1.3)\n",
3152.2.1 by Robert Collins
* A new repository format 'development' has been added. This format will
2333
    'bzrlib.repofmt.pack_repo',
2334
    'RepositoryFormatPackDevelopment0',
2335
    )
2336
format_registry.register_lazy(
2337
    ("Bazaar development format 0 with subtree support "
3152.2.3 by Robert Collins
Merge up with bzr.dev.
2338
        "(needs bzr.dev from before 1.3)\n"),
3152.2.1 by Robert Collins
* A new repository format 'development' has been added. This format will
2339
    'bzrlib.repofmt.pack_repo',
2340
    'RepositoryFormatPackDevelopment0Subtree',
2341
    )
3152.2.3 by Robert Collins
Merge up with bzr.dev.
2342
# 1.3->1.4 go below here
2592.3.22 by Robert Collins
Add new experimental repository formats.
2343
1534.4.40 by Robert Collins
Add RepositoryFormats and allow bzrdir.open or create _repository to be used.
2344
1563.2.12 by Robert Collins
Checkpointing: created InterObject to factor out common inter object worker code, added InterVersionedFile and tests to allow making join work between any versionedfile.
2345
class InterRepository(InterObject):
1534.1.27 by Robert Collins
Start InterRepository with InterRepository.get.
2346
    """This class represents operations taking place between two repositories.
2347
1534.1.33 by Robert Collins
Move copy_content_into into InterRepository and InterWeaveRepo, and disable the default codepath test as we have optimised paths for all current combinations.
2348
    Its instances have methods like copy_content and fetch, and contain
1534.1.27 by Robert Collins
Start InterRepository with InterRepository.get.
2349
    references to the source and target repositories these operations can be 
2350
    carried out on.
2351
2352
    Often we will provide convenience methods on 'repository' which carry out
2353
    operations with another repository - they will always forward to
2354
    InterRepository.get(other).method_name(parameters).
2355
    """
2356
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2357
    _optimisers = []
1534.1.28 by Robert Collins
Allow for optimised InterRepository selection.
2358
    """The available optimised InterRepository types."""
2359
2387.1.1 by Robert Collins
Remove the --basis parameter to clone etc. (Robert Collins)
2360
    def copy_content(self, revision_id=None):
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2361
        raise NotImplementedError(self.copy_content)
2362
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2363
    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
1534.1.31 by Robert Collins
Deprecated fetch.fetch and fetch.greedy_fetch for branch.fetch, and move the Repository.fetch internals to InterRepo and InterWeaveRepo.
2364
        """Fetch the content required to construct revision_id.
2365
1910.7.17 by Andrew Bennetts
Various cosmetic changes.
2366
        The content is copied from self.source to self.target.
1534.1.31 by Robert Collins
Deprecated fetch.fetch and fetch.greedy_fetch for branch.fetch, and move the Repository.fetch internals to InterRepo and InterWeaveRepo.
2367
2368
        :param revision_id: if None all content is copied, if NULL_REVISION no
2369
                            content is copied.
2370
        :param pb: optional progress bar to use for progress reports. If not
2371
                   provided a default one will be created.
2372
2373
        Returns the copied revision count and the failed revisions in a tuple:
2374
        (copied, failures).
2375
        """
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2376
        raise NotImplementedError(self.fetch)
3172.4.4 by Robert Collins
Review feedback.
2377
2378
    def _walk_to_common_revisions(self, revision_ids):
2379
        """Walk out from revision_ids in source to revisions target has.
2380
2381
        :param revision_ids: The start point for the search.
2382
        :return: A set of revision ids.
2383
        """
2384
        graph = self.source.get_graph()
2385
        missing_revs = set()
2386
        # ensure we don't pay silly lookup costs.
2387
        revision_ids = frozenset(revision_ids)
2388
        searcher = graph._make_breadth_first_searcher(revision_ids)
2389
        null_set = frozenset([_mod_revision.NULL_REVISION])
2390
        while True:
2391
            try:
2392
                next_revs, ghosts = searcher.next_with_ghosts()
2393
            except StopIteration:
2394
                break
2395
            if revision_ids.intersection(ghosts):
2396
                absent_ids = set(revision_ids.intersection(ghosts))
2397
                # If all absent_ids are present in target, no error is needed.
2398
                absent_ids.difference_update(
2399
                    self.target.has_revisions(absent_ids))
2400
                if absent_ids:
2401
                    raise errors.NoSuchRevision(self.source, absent_ids.pop())
2402
            # we don't care about other ghosts as we can't fetch them and
2403
            # haven't been asked to.
2404
            next_revs = set(next_revs)
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2405
            # we always have NULL_REVISION present.
2406
            have_revs = self.target.has_revisions(next_revs).union(null_set)
3172.4.4 by Robert Collins
Review feedback.
2407
            missing_revs.update(next_revs - have_revs)
2408
            searcher.stop_searching_any(have_revs)
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2409
        return searcher.get_result()
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2410
   
3341.2.2 by Alexander Belchenko
Tree.print_file and Repository.print_file are deprecated.
2411
    @deprecated_method(one_two)
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2412
    @needs_read_lock
3010.1.5 by Robert Collins
Test that missing_revision_ids handles the case of the source not having the requested revision correctly with and without find_ghosts.
2413
    def missing_revision_ids(self, revision_id=None, find_ghosts=True):
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2414
        """Return the revision ids that source has that target does not.
2415
        
2416
        These are returned in topological order.
2417
2418
        :param revision_id: only return revision ids included by this
2419
                            revision_id.
3172.4.1 by Robert Collins
* Fetching via bzr+ssh will no longer fill ghosts by default (this is
2420
        :param find_ghosts: If True find missing revisions in deep history
2421
            rather than just finding the surface difference.
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2422
        """
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2423
        return list(self.search_missing_revision_ids(
2424
            revision_id, find_ghosts).get_keys())
2425
2426
    @needs_read_lock
2427
    def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2428
        """Return the revision ids that source has that target does not.
2429
        
2430
        :param revision_id: only return revision ids included by this
2431
                            revision_id.
2432
        :param find_ghosts: If True find missing revisions in deep history
2433
            rather than just finding the surface difference.
2434
        :return: A bzrlib.graph.SearchResult.
2435
        """
3172.4.1 by Robert Collins
* Fetching via bzr+ssh will no longer fill ghosts by default (this is
2436
        # stop searching at found target revisions.
2437
        if not find_ghosts and revision_id is not None:
3172.4.4 by Robert Collins
Review feedback.
2438
            return self._walk_to_common_revisions([revision_id])
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2439
        # generic, possibly worst case, slow code path.
2440
        target_ids = set(self.target.all_revision_ids())
2441
        if revision_id is not None:
2442
            source_ids = self.source.get_ancestry(revision_id)
1963.2.6 by Robey Pointer
pychecker is on crack; go back to using 'is None'.
2443
            assert source_ids[0] is None
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2444
            source_ids.pop(0)
2445
        else:
2446
            source_ids = self.source.all_revision_ids()
2447
        result_set = set(source_ids).difference(target_ids)
3184.1.9 by Robert Collins
* ``Repository.get_data_stream`` is now deprecated in favour of
2448
        return self.source.revision_ids_to_search_result(result_set)
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2449
2592.3.28 by Robert Collins
Make InterKnitOptimiser be used between any same-model knit repository.
2450
    @staticmethod
2451
    def _same_model(source, target):
2452
        """True if source and target have the same data representation."""
2453
        if source.supports_rich_root() != target.supports_rich_root():
2454
            return False
2455
        if source._serializer != target._serializer:
2456
            return False
2457
        return True
2458
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2459
2460
class InterSameDataRepository(InterRepository):
2461
    """Code for converting between repositories that represent the same data.
2462
    
2463
    Data format and model must match for this to work.
2464
    """
2465
2241.1.6 by Martin Pool
Move Knit repositories into the submodule bzrlib.repofmt.knitrepo and
2466
    @classmethod
2241.1.7 by Martin Pool
rename method
2467
    def _get_repo_format_to_test(self):
2814.1.1 by Robert Collins
* Pushing, pulling and branching branches with subtree references was not
2468
        """Repository format for testing with.
2469
        
2470
        InterSameData can pull from subtree to subtree and from non-subtree to
2471
        non-subtree, so we test this with the richest repository format.
2472
        """
2473
        from bzrlib.repofmt import knitrepo
2474
        return knitrepo.RepositoryFormatKnit3()
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2475
1910.2.14 by Aaron Bentley
Fail when trying to use interrepository on Knit2 and Knit1
2476
    @staticmethod
2477
    def is_compatible(source, target):
2592.3.28 by Robert Collins
Make InterKnitOptimiser be used between any same-model knit repository.
2478
        return InterRepository._same_model(source, target)
1910.2.14 by Aaron Bentley
Fail when trying to use interrepository on Knit2 and Knit1
2479
1534.1.33 by Robert Collins
Move copy_content_into into InterRepository and InterWeaveRepo, and disable the default codepath test as we have optimised paths for all current combinations.
2480
    @needs_write_lock
2387.1.1 by Robert Collins
Remove the --basis parameter to clone etc. (Robert Collins)
2481
    def copy_content(self, revision_id=None):
1534.1.33 by Robert Collins
Move copy_content_into into InterRepository and InterWeaveRepo, and disable the default codepath test as we have optimised paths for all current combinations.
2482
        """Make a complete copy of the content in self into destination.
2440.1.1 by Martin Pool
Add new Repository.sprout,
2483
2484
        This copies both the repository's revision data, and configuration information
2485
        such as the make_working_trees setting.
1534.1.33 by Robert Collins
Move copy_content_into into InterRepository and InterWeaveRepo, and disable the default codepath test as we have optimised paths for all current combinations.
2486
        
2487
        This is a destructive operation! Do not use it on existing 
2488
        repositories.
2489
2490
        :param revision_id: Only copy the content needed to construct
2491
                            revision_id and its parents.
2492
        """
2493
        try:
2494
            self.target.set_make_working_trees(self.source.make_working_trees())
2495
        except NotImplementedError:
2496
            pass
1759.2.2 by Jelmer Vernooij
Revert some of my spelling fixes and fix some typos after review by Aaron.
2497
        # but don't bother fetching if we have the needed data now.
1996.3.20 by John Arbash Meinel
[merge] bzr.dev 2063
2498
        if (revision_id not in (None, _mod_revision.NULL_REVISION) and 
1534.1.33 by Robert Collins
Move copy_content_into into InterRepository and InterWeaveRepo, and disable the default codepath test as we have optimised paths for all current combinations.
2499
            self.target.has_revision(revision_id)):
2500
            return
2501
        self.target.fetch(self.source, revision_id=revision_id)
2502
2503
    @needs_write_lock
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2504
    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
1910.7.20 by Andrew Bennetts
Merge from bzr.dev
2505
        """See InterRepository.fetch()."""
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2506
        from bzrlib.fetch import GenericRepoFetcher
1534.1.31 by Robert Collins
Deprecated fetch.fetch and fetch.greedy_fetch for branch.fetch, and move the Repository.fetch internals to InterRepo and InterWeaveRepo.
2507
        mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2858.2.1 by Martin Pool
Remove most calls to safe_file_id and safe_revision_id.
2508
               self.source, self.source._format, self.target,
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2509
               self.target._format)
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2510
        f = GenericRepoFetcher(to_repository=self.target,
2511
                               from_repository=self.source,
2512
                               last_revision=revision_id,
3172.4.1 by Robert Collins
* Fetching via bzr+ssh will no longer fill ghosts by default (this is
2513
                               pb=pb, find_ghosts=find_ghosts)
1534.1.33 by Robert Collins
Move copy_content_into into InterRepository and InterWeaveRepo, and disable the default codepath test as we have optimised paths for all current combinations.
2514
        return f.count_copied, f.failed_revisions
1534.1.31 by Robert Collins
Deprecated fetch.fetch and fetch.greedy_fetch for branch.fetch, and move the Repository.fetch internals to InterRepo and InterWeaveRepo.
2515
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2516
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2517
class InterWeaveRepo(InterSameDataRepository):
2850.3.1 by Robert Collins
Move various weave specific code out of the base Repository class to weaverepo.py.
2518
    """Optimised code paths between Weave based repositories.
2519
    
2520
    This should be in bzrlib/repofmt/weaverepo.py but we have not yet
2521
    implemented lazy inter-object optimisation.
2522
    """
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2523
2241.1.13 by Martin Pool
Re-register InterWeaveRepo, fix test integration, add test for it
2524
    @classmethod
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2525
    def _get_repo_format_to_test(self):
2526
        from bzrlib.repofmt import weaverepo
2527
        return weaverepo.RepositoryFormat7()
2528
2529
    @staticmethod
2530
    def is_compatible(source, target):
2531
        """Be compatible with known Weave formats.
2532
        
2533
        We don't test for the stores being of specific types because that
2534
        could lead to confusing results, and there is no need to be 
2535
        overly general.
2536
        """
2537
        from bzrlib.repofmt.weaverepo import (
2538
                RepositoryFormat5,
2539
                RepositoryFormat6,
2540
                RepositoryFormat7,
2541
                )
2542
        try:
2543
            return (isinstance(source._format, (RepositoryFormat5,
2544
                                                RepositoryFormat6,
2545
                                                RepositoryFormat7)) and
2546
                    isinstance(target._format, (RepositoryFormat5,
2547
                                                RepositoryFormat6,
2548
                                                RepositoryFormat7)))
2549
        except AttributeError:
2550
            return False
2551
    
2552
    @needs_write_lock
2387.1.1 by Robert Collins
Remove the --basis parameter to clone etc. (Robert Collins)
2553
    def copy_content(self, revision_id=None):
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2554
        """See InterRepository.copy_content()."""
2555
        # weave specific optimised path:
2387.1.1 by Robert Collins
Remove the --basis parameter to clone etc. (Robert Collins)
2556
        try:
2557
            self.target.set_make_working_trees(self.source.make_working_trees())
2558
        except NotImplementedError:
2559
            pass
2560
        # FIXME do not peek!
2561
        if self.source.control_files._transport.listable():
2562
            pb = ui.ui_factory.nested_progress_bar()
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2563
            try:
2387.1.1 by Robert Collins
Remove the --basis parameter to clone etc. (Robert Collins)
2564
                self.target.weave_store.copy_all_ids(
2565
                    self.source.weave_store,
2566
                    pb=pb,
2567
                    from_transaction=self.source.get_transaction(),
2568
                    to_transaction=self.target.get_transaction())
2569
                pb.update('copying inventory', 0, 1)
2570
                self.target.control_weaves.copy_multi(
2571
                    self.source.control_weaves, ['inventory'],
2572
                    from_transaction=self.source.get_transaction(),
2573
                    to_transaction=self.target.get_transaction())
2574
                self.target._revision_store.text_store.copy_all_ids(
2575
                    self.source._revision_store.text_store,
2576
                    pb=pb)
2577
            finally:
2578
                pb.finished()
2579
        else:
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2580
            self.target.fetch(self.source, revision_id=revision_id)
2581
2582
    @needs_write_lock
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2583
    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2584
        """See InterRepository.fetch()."""
2585
        from bzrlib.fetch import GenericRepoFetcher
2586
        mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2587
               self.source, self.source._format, self.target, self.target._format)
2588
        f = GenericRepoFetcher(to_repository=self.target,
2589
                               from_repository=self.source,
2590
                               last_revision=revision_id,
3172.4.1 by Robert Collins
* Fetching via bzr+ssh will no longer fill ghosts by default (this is
2591
                               pb=pb, find_ghosts=find_ghosts)
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2592
        return f.count_copied, f.failed_revisions
2593
2594
    @needs_read_lock
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2595
    def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2596
        """See InterRepository.missing_revision_ids()."""
2597
        # we want all revisions to satisfy revision_id in source.
2598
        # but we don't want to stat every file here and there.
2599
        # we want then, all revisions other needs to satisfy revision_id 
2600
        # checked, but not those that we have locally.
2601
        # so the first thing is to get a subset of the revisions to 
2602
        # satisfy revision_id in source, and then eliminate those that
2603
        # we do already have. 
2604
        # this is slow on high latency connection to self, but as as this
2605
        # disk format scales terribly for push anyway due to rewriting 
2606
        # inventory.weave, this is considered acceptable.
2607
        # - RBC 20060209
2608
        if revision_id is not None:
2609
            source_ids = self.source.get_ancestry(revision_id)
2610
            assert source_ids[0] is None
2611
            source_ids.pop(0)
2612
        else:
2613
            source_ids = self.source._all_possible_ids()
2614
        source_ids_set = set(source_ids)
2615
        # source_ids is the worst possible case we may need to pull.
2616
        # now we want to filter source_ids against what we actually
2617
        # have in target, but don't try to check for existence where we know
2618
        # we do not have a revision as that would be pointless.
2619
        target_ids = set(self.target._all_possible_ids())
2620
        possibly_present_revisions = target_ids.intersection(source_ids_set)
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2621
        actually_present_revisions = set(
2622
            self.target._eliminate_revisions_not_present(possibly_present_revisions))
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2623
        required_revisions = source_ids_set.difference(actually_present_revisions)
2624
        if revision_id is not None:
2625
            # we used get_ancestry to determine source_ids then we are assured all
2626
            # revisions referenced are present as they are installed in topological order.
2627
            # and the tip revision was validated by get_ancestry.
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2628
            result_set = required_revisions
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2629
        else:
2630
            # if we just grabbed the possibly available ids, then 
2631
            # we only have an estimate of whats available and need to validate
2632
            # that against the revision records.
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2633
            result_set = set(
2634
                self.source._eliminate_revisions_not_present(required_revisions))
3184.1.9 by Robert Collins
* ``Repository.get_data_stream`` is now deprecated in favour of
2635
        return self.source.revision_ids_to_search_result(result_set)
2241.1.12 by Martin Pool
Restore InterWeaveRepo
2636
2637
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
2638
class InterKnitRepo(InterSameDataRepository):
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2639
    """Optimised code paths between Knit based repositories."""
2640
2241.1.6 by Martin Pool
Move Knit repositories into the submodule bzrlib.repofmt.knitrepo and
2641
    @classmethod
2241.1.7 by Martin Pool
rename method
2642
    def _get_repo_format_to_test(self):
2241.1.6 by Martin Pool
Move Knit repositories into the submodule bzrlib.repofmt.knitrepo and
2643
        from bzrlib.repofmt import knitrepo
2644
        return knitrepo.RepositoryFormatKnit1()
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2645
2646
    @staticmethod
2647
    def is_compatible(source, target):
2648
        """Be compatible with known Knit formats.
2649
        
1759.2.2 by Jelmer Vernooij
Revert some of my spelling fixes and fix some typos after review by Aaron.
2650
        We don't test for the stores being of specific types because that
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2651
        could lead to confusing results, and there is no need to be 
2652
        overly general.
2653
        """
2592.3.28 by Robert Collins
Make InterKnitOptimiser be used between any same-model knit repository.
2654
        from bzrlib.repofmt.knitrepo import RepositoryFormatKnit
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2655
        try:
2592.3.28 by Robert Collins
Make InterKnitOptimiser be used between any same-model knit repository.
2656
            are_knits = (isinstance(source._format, RepositoryFormatKnit) and
2657
                isinstance(target._format, RepositoryFormatKnit))
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2658
        except AttributeError:
2659
            return False
2592.3.28 by Robert Collins
Make InterKnitOptimiser be used between any same-model knit repository.
2660
        return are_knits and InterRepository._same_model(source, target)
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2661
2662
    @needs_write_lock
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2663
    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2664
        """See InterRepository.fetch()."""
2665
        from bzrlib.fetch import KnitRepoFetcher
2666
        mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2667
               self.source, self.source._format, self.target, self.target._format)
2668
        f = KnitRepoFetcher(to_repository=self.target,
2669
                            from_repository=self.source,
2670
                            last_revision=revision_id,
3172.4.1 by Robert Collins
* Fetching via bzr+ssh will no longer fill ghosts by default (this is
2671
                            pb=pb, find_ghosts=find_ghosts)
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2672
        return f.count_copied, f.failed_revisions
2673
2674
    @needs_read_lock
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2675
    def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2676
        """See InterRepository.missing_revision_ids()."""
2677
        if revision_id is not None:
2678
            source_ids = self.source.get_ancestry(revision_id)
1963.2.6 by Robey Pointer
pychecker is on crack; go back to using 'is None'.
2679
            assert source_ids[0] is None
1668.1.14 by Martin Pool
merge olaf - InvalidRevisionId fixes
2680
            source_ids.pop(0)
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2681
        else:
2850.3.1 by Robert Collins
Move various weave specific code out of the base Repository class to weaverepo.py.
2682
            source_ids = self.source.all_revision_ids()
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2683
        source_ids_set = set(source_ids)
2684
        # source_ids is the worst possible case we may need to pull.
2685
        # now we want to filter source_ids against what we actually
1759.2.2 by Jelmer Vernooij
Revert some of my spelling fixes and fix some typos after review by Aaron.
2686
        # have in target, but don't try to check for existence where we know
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2687
        # we do not have a revision as that would be pointless.
2850.3.1 by Robert Collins
Move various weave specific code out of the base Repository class to weaverepo.py.
2688
        target_ids = set(self.target.all_revision_ids())
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2689
        possibly_present_revisions = target_ids.intersection(source_ids_set)
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2690
        actually_present_revisions = set(
2691
            self.target._eliminate_revisions_not_present(possibly_present_revisions))
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2692
        required_revisions = source_ids_set.difference(actually_present_revisions)
2693
        if revision_id is not None:
2694
            # we used get_ancestry to determine source_ids then we are assured all
2695
            # revisions referenced are present as they are installed in topological order.
2696
            # and the tip revision was validated by get_ancestry.
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2697
            result_set = required_revisions
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2698
        else:
2699
            # if we just grabbed the possibly available ids, then 
2700
            # we only have an estimate of whats available and need to validate
2701
            # that against the revision records.
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2702
            result_set = set(
2703
                self.source._eliminate_revisions_not_present(required_revisions))
3184.1.9 by Robert Collins
* ``Repository.get_data_stream`` is now deprecated in favour of
2704
        return self.source.revision_ids_to_search_result(result_set)
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
2705
1910.2.17 by Aaron Bentley
Get fetching from 1 to 2 under test
2706
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2707
class InterPackRepo(InterSameDataRepository):
2708
    """Optimised code paths between Pack based repositories."""
2709
2710
    @classmethod
2711
    def _get_repo_format_to_test(self):
2712
        from bzrlib.repofmt import pack_repo
2592.3.224 by Martin Pool
Rename GraphKnitRepository etc to KnitPackRepository
2713
        return pack_repo.RepositoryFormatKnitPack1()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2714
2715
    @staticmethod
2716
    def is_compatible(source, target):
2717
        """Be compatible with known Pack formats.
2718
        
2719
        We don't test for the stores being of specific types because that
2720
        could lead to confusing results, and there is no need to be 
2721
        overly general.
2722
        """
2723
        from bzrlib.repofmt.pack_repo import RepositoryFormatPack
2724
        try:
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
2725
            are_packs = (isinstance(source._format, RepositoryFormatPack) and
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2726
                isinstance(target._format, RepositoryFormatPack))
2727
        except AttributeError:
2728
            return False
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
2729
        return are_packs and InterRepository._same_model(source, target)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2730
2731
    @needs_write_lock
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2732
    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2733
        """See InterRepository.fetch()."""
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
2734
        from bzrlib.repofmt.pack_repo import Packer
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2735
        mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2736
               self.source, self.source._format, self.target, self.target._format)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
2737
        self.count_copied = 0
2738
        if revision_id is None:
2739
            # TODO:
2740
            # everything to do - use pack logic
2741
            # to fetch from all packs to one without
2592.3.93 by Robert Collins
Steps toward filtering revisions/inventories/texts during fetch.
2742
            # inventory parsing etc, IFF nothing to be copied is in the target.
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
2743
            # till then:
2744
            revision_ids = self.source.all_revision_ids()
2592.3.93 by Robert Collins
Steps toward filtering revisions/inventories/texts during fetch.
2745
            # implementing the TODO will involve:
2746
            # - detecting when all of a pack is selected
2747
            # - avoiding as much as possible pre-selection, so the
2748
            # more-core routines such as create_pack_from_packs can filter in
2749
            # a just-in-time fashion. (though having a HEADS list on a
2750
            # repository might make this a lot easier, because we could
2751
            # sensibly detect 'new revisions' without doing a full index scan.
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
2752
        elif _mod_revision.is_null(revision_id):
2753
            # nothing to do:
3010.1.5 by Robert Collins
Test that missing_revision_ids handles the case of the source not having the requested revision correctly with and without find_ghosts.
2754
            return (0, [])
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
2755
        else:
2756
            try:
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2757
                revision_ids = self.search_missing_revision_ids(revision_id,
2758
                    find_ghosts=find_ghosts).get_keys()
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
2759
            except errors.NoSuchRevision:
2760
                raise errors.InstallFailed([revision_id])
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
2761
        packs = self.source._pack_collection.all_packs()
2951.1.2 by Robert Collins
Partial refactoring of pack_repo to create a Packer object for packing.
2762
        pack = Packer(self.target._pack_collection, packs, '.fetch',
2763
            revision_ids).pack()
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
2764
        if pack is not None:
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
2765
            self.target._pack_collection._save_pack_names()
2592.3.108 by Robert Collins
Autopack after pack to pack fetching too.
2766
            # Trigger an autopack. This may duplicate effort as we've just done
2767
            # a pack creation, but for now it is simpler to think about as
2768
            # 'upload data, then repack if needed'.
2592.3.232 by Martin Pool
Disambiguate two member variables called _packs into _packs_by_name and _pack_collection
2769
            self.target._pack_collection.autopack()
3010.1.5 by Robert Collins
Test that missing_revision_ids handles the case of the source not having the requested revision correctly with and without find_ghosts.
2770
            return (pack.get_revision_count(), [])
2592.3.91 by Robert Collins
Incrementally closing in on a correct fetch for packs.
2771
        else:
3010.1.5 by Robert Collins
Test that missing_revision_ids handles the case of the source not having the requested revision correctly with and without find_ghosts.
2772
            return (0, [])
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2773
2774
    @needs_read_lock
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2775
    def search_missing_revision_ids(self, revision_id=None, find_ghosts=True):
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2776
        """See InterRepository.missing_revision_ids().
2777
        
3172.4.1 by Robert Collins
* Fetching via bzr+ssh will no longer fill ghosts by default (this is
2778
        :param find_ghosts: Find ghosts throughout the ancestry of
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2779
            revision_id.
2780
        """
2781
        if not find_ghosts and revision_id is not None:
3172.4.4 by Robert Collins
Review feedback.
2782
            return self._walk_to_common_revisions([revision_id])
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2783
        elif revision_id is not None:
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2784
            source_ids = self.source.get_ancestry(revision_id)
2785
            assert source_ids[0] is None
2786
            source_ids.pop(0)
2787
        else:
2592.3.151 by Robert Collins
Use the revision index, not the inventory index, for missing and fetch operations.
2788
            source_ids = self.source.all_revision_ids()
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2789
        # source_ids is the worst possible case we may need to pull.
2790
        # now we want to filter source_ids against what we actually
2791
        # have in target, but don't try to check for existence where we know
2792
        # we do not have a revision as that would be pointless.
2592.3.151 by Robert Collins
Use the revision index, not the inventory index, for missing and fetch operations.
2793
        target_ids = set(self.target.all_revision_ids())
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2794
        result_set = set(source_ids).difference(target_ids)
3184.1.9 by Robert Collins
* ``Repository.get_data_stream`` is now deprecated in favour of
2795
        return self.source.revision_ids_to_search_result(result_set)
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2796
2797
1910.2.24 by Aaron Bentley
Got intra-repository fetch working between model1 and 2 for all types
2798
class InterModel1and2(InterRepository):
2799
2241.1.6 by Martin Pool
Move Knit repositories into the submodule bzrlib.repofmt.knitrepo and
2800
    @classmethod
2241.1.7 by Martin Pool
rename method
2801
    def _get_repo_format_to_test(self):
2241.1.6 by Martin Pool
Move Knit repositories into the submodule bzrlib.repofmt.knitrepo and
2802
        return None
1910.2.24 by Aaron Bentley
Got intra-repository fetch working between model1 and 2 for all types
2803
2804
    @staticmethod
2805
    def is_compatible(source, target):
2305.2.1 by Andrew Bennetts
Use repo.supports_rich_root() everywhere rather than
2806
        if not source.supports_rich_root() and target.supports_rich_root():
1910.2.24 by Aaron Bentley
Got intra-repository fetch working between model1 and 2 for all types
2807
            return True
2808
        else:
2809
            return False
2810
2811
    @needs_write_lock
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2812
    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
1910.2.24 by Aaron Bentley
Got intra-repository fetch working between model1 and 2 for all types
2813
        """See InterRepository.fetch()."""
2814
        from bzrlib.fetch import Model1toKnit2Fetcher
2815
        f = Model1toKnit2Fetcher(to_repository=self.target,
2816
                                 from_repository=self.source,
2817
                                 last_revision=revision_id,
3172.4.1 by Robert Collins
* Fetching via bzr+ssh will no longer fill ghosts by default (this is
2818
                                 pb=pb, find_ghosts=find_ghosts)
1910.2.24 by Aaron Bentley
Got intra-repository fetch working between model1 and 2 for all types
2819
        return f.count_copied, f.failed_revisions
2820
1910.2.26 by Aaron Bentley
Fix up some test cases
2821
    @needs_write_lock
2387.1.1 by Robert Collins
Remove the --basis parameter to clone etc. (Robert Collins)
2822
    def copy_content(self, revision_id=None):
1910.2.26 by Aaron Bentley
Fix up some test cases
2823
        """Make a complete copy of the content in self into destination.
2824
        
2825
        This is a destructive operation! Do not use it on existing 
2826
        repositories.
2827
2828
        :param revision_id: Only copy the content needed to construct
2829
                            revision_id and its parents.
2830
        """
2831
        try:
2832
            self.target.set_make_working_trees(self.source.make_working_trees())
2833
        except NotImplementedError:
2834
            pass
2835
        # but don't bother fetching if we have the needed data now.
1996.3.20 by John Arbash Meinel
[merge] bzr.dev 2063
2836
        if (revision_id not in (None, _mod_revision.NULL_REVISION) and 
1910.2.26 by Aaron Bentley
Fix up some test cases
2837
            self.target.has_revision(revision_id)):
2838
            return
2839
        self.target.fetch(self.source, revision_id=revision_id)
2840
1910.2.24 by Aaron Bentley
Got intra-repository fetch working between model1 and 2 for all types
2841
1910.2.17 by Aaron Bentley
Get fetching from 1 to 2 under test
2842
class InterKnit1and2(InterKnitRepo):
2843
2241.1.6 by Martin Pool
Move Knit repositories into the submodule bzrlib.repofmt.knitrepo and
2844
    @classmethod
2241.1.7 by Martin Pool
rename method
2845
    def _get_repo_format_to_test(self):
2241.1.6 by Martin Pool
Move Knit repositories into the submodule bzrlib.repofmt.knitrepo and
2846
        return None
1910.2.24 by Aaron Bentley
Got intra-repository fetch working between model1 and 2 for all types
2847
1910.2.17 by Aaron Bentley
Get fetching from 1 to 2 under test
2848
    @staticmethod
2849
    def is_compatible(source, target):
2255.2.211 by Robert Collins
Remove knit2 repository format- it has never been supported.
2850
        """Be compatible with Knit1 source and Knit3 target"""
2851
        from bzrlib.repofmt.knitrepo import RepositoryFormatKnit3
1910.2.17 by Aaron Bentley
Get fetching from 1 to 2 under test
2852
        try:
2592.3.88 by Robert Collins
Move Pack repository logic to bzrlib.repofmt.pack_repo.
2853
            from bzrlib.repofmt.knitrepo import (RepositoryFormatKnit1,
2854
                RepositoryFormatKnit3)
3152.2.1 by Robert Collins
* A new repository format 'development' has been added. This format will
2855
            from bzrlib.repofmt.pack_repo import (
2856
                RepositoryFormatKnitPack1,
2857
                RepositoryFormatKnitPack3,
2858
                RepositoryFormatPackDevelopment0,
2859
                RepositoryFormatPackDevelopment0Subtree,
2860
                )
2861
            nosubtrees = (
2862
                RepositoryFormatKnit1,
2863
                RepositoryFormatKnitPack1,
2864
                RepositoryFormatPackDevelopment0,
2865
                )
2866
            subtrees = (
2867
                RepositoryFormatKnit3,
2868
                RepositoryFormatKnitPack3,
2869
                RepositoryFormatPackDevelopment0Subtree,
2870
                )
2871
            return (isinstance(source._format, nosubtrees) and
2872
                isinstance(target._format, subtrees))
1910.2.17 by Aaron Bentley
Get fetching from 1 to 2 under test
2873
        except AttributeError:
2874
            return False
2875
2876
    @needs_write_lock
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2877
    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
1910.2.17 by Aaron Bentley
Get fetching from 1 to 2 under test
2878
        """See InterRepository.fetch()."""
2879
        from bzrlib.fetch import Knit1to2Fetcher
2880
        mutter("Using fetch logic to copy between %s(%s) and %s(%s)",
2881
               self.source, self.source._format, self.target, 
2882
               self.target._format)
2883
        f = Knit1to2Fetcher(to_repository=self.target,
2884
                            from_repository=self.source,
2885
                            last_revision=revision_id,
3172.4.1 by Robert Collins
* Fetching via bzr+ssh will no longer fill ghosts by default (this is
2886
                            pb=pb, find_ghosts=find_ghosts)
1910.2.17 by Aaron Bentley
Get fetching from 1 to 2 under test
2887
        return f.count_copied, f.failed_revisions
2888
2889
2996.2.1 by Aaron Bentley
Add KnitRepositoryFormat4
2890
class InterDifferingSerializer(InterKnitRepo):
2891
2892
    @classmethod
2893
    def _get_repo_format_to_test(self):
2894
        return None
2895
2896
    @staticmethod
2897
    def is_compatible(source, target):
2898
        """Be compatible with Knit2 source and Knit3 target"""
2899
        if source.supports_rich_root() != target.supports_rich_root():
2900
            return False
2901
        # Ideally, we'd support fetching if the source had no tree references
2902
        # even if it supported them...
2903
        if (getattr(source, '_format.supports_tree_reference', False) and
2904
            not getattr(target, '_format.supports_tree_reference', False)):
2905
            return False
2906
        return True
2907
2908
    @needs_write_lock
2909
    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2910
        """See InterRepository.fetch()."""
3184.1.9 by Robert Collins
* ``Repository.get_data_stream`` is now deprecated in favour of
2911
        revision_ids = self.target.search_missing_revision_ids(self.source,
3184.1.8 by Robert Collins
* ``InterRepository.missing_revision_ids`` is now deprecated in favour of
2912
            revision_id, find_ghosts=find_ghosts).get_keys()
2913
        revision_ids = tsort.topo_sort(
3184.1.9 by Robert Collins
* ``Repository.get_data_stream`` is now deprecated in favour of
2914
            self.source.get_graph().get_parent_map(revision_ids))
2996.2.2 by Aaron Bentley
Create install_revisions function
2915
        def revisions_iterator():
2916
            for current_revision_id in revision_ids:
2917
                revision = self.source.get_revision(current_revision_id)
2918
                tree = self.source.revision_tree(current_revision_id)
2919
                try:
2920
                    signature = self.source.get_signature_text(
2921
                        current_revision_id)
2922
                except errors.NoSuchRevision:
2923
                    signature = None
2924
                yield revision, tree, signature
3146.6.1 by Aaron Bentley
InterDifferingSerializer shows a progress bar
2925
        if pb is None:
2926
            my_pb = ui.ui_factory.nested_progress_bar()
2927
            pb = my_pb
2928
        else:
2929
            my_pb = None
2930
        try:
2931
            install_revisions(self.target, revisions_iterator(),
2932
                              len(revision_ids), pb)
2933
        finally:
2934
            if my_pb is not None:
2935
                my_pb.finished()
2996.2.1 by Aaron Bentley
Add KnitRepositoryFormat4
2936
        return len(revision_ids), 0
2937
2938
2535.3.12 by Andrew Bennetts
Add a first cut of a get_data_stream method to Repository.
2939
class InterRemoteToOther(InterRepository):
2940
2941
    def __init__(self, source, target):
2942
        InterRepository.__init__(self, source, target)
2943
        self._real_inter = None
2944
2945
    @staticmethod
2946
    def is_compatible(source, target):
2535.3.41 by Andrew Bennetts
Add tests for InterRemoteToOther.is_compatible.
2947
        if not isinstance(source, remote.RemoteRepository):
2948
            return False
3172.2.1 by Andrew Bennetts
Enable use of smart revision streaming between repos with compatible models, not just between identical format repos.
2949
        # Is source's model compatible with target's model?
2535.3.41 by Andrew Bennetts
Add tests for InterRemoteToOther.is_compatible.
2950
        source._ensure_real()
2951
        real_source = source._real_repository
2952
        assert not isinstance(real_source, remote.RemoteRepository), (
2953
            "We don't support remote repos backed by remote repos yet.")
3172.2.1 by Andrew Bennetts
Enable use of smart revision streaming between repos with compatible models, not just between identical format repos.
2954
        return InterRepository._same_model(real_source, target)
2535.3.41 by Andrew Bennetts
Add tests for InterRemoteToOther.is_compatible.
2955
2956
    @needs_write_lock
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2957
    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2535.3.41 by Andrew Bennetts
Add tests for InterRemoteToOther.is_compatible.
2958
        """See InterRepository.fetch()."""
2959
        from bzrlib.fetch import RemoteToOtherFetcher
2960
        mutter("Using fetch logic to copy between %s(remote) and %s(%s)",
2961
               self.source, self.target, self.target._format)
2962
        # TODO: jam 20070210 This should be an assert, not a translate
2963
        revision_id = osutils.safe_revision_id(revision_id)
2964
        f = RemoteToOtherFetcher(to_repository=self.target,
2965
                                 from_repository=self.source,
2966
                                 last_revision=revision_id,
3172.4.1 by Robert Collins
* Fetching via bzr+ssh will no longer fill ghosts by default (this is
2967
                                 pb=pb, find_ghosts=find_ghosts)
2535.3.41 by Andrew Bennetts
Add tests for InterRemoteToOther.is_compatible.
2968
        return f.count_copied, f.failed_revisions
2535.3.12 by Andrew Bennetts
Add a first cut of a get_data_stream method to Repository.
2969
2970
    @classmethod
2971
    def _get_repo_format_to_test(self):
2972
        return None
2973
2974
2975
class InterOtherToRemote(InterRepository):
2976
2977
    def __init__(self, source, target):
2978
        InterRepository.__init__(self, source, target)
2979
        self._real_inter = None
2980
2981
    @staticmethod
2982
    def is_compatible(source, target):
2983
        if isinstance(target, remote.RemoteRepository):
2984
            return True
2985
        return False
2986
2987
    def _ensure_real_inter(self):
2988
        if self._real_inter is None:
2989
            self.target._ensure_real()
2990
            real_target = self.target._real_repository
2991
            self._real_inter = InterRepository.get(self.source, real_target)
2992
    
2993
    def copy_content(self, revision_id=None):
2994
        self._ensure_real_inter()
2995
        self._real_inter.copy_content(revision_id=revision_id)
2996
2949.1.2 by Robert Collins
* Fetch with pack repositories will no longer read the entire history graph.
2997
    def fetch(self, revision_id=None, pb=None, find_ghosts=False):
2535.3.12 by Andrew Bennetts
Add a first cut of a get_data_stream method to Repository.
2998
        self._ensure_real_inter()
3172.4.1 by Robert Collins
* Fetching via bzr+ssh will no longer fill ghosts by default (this is
2999
        self._real_inter.fetch(revision_id=revision_id, pb=pb,
3000
            find_ghosts=find_ghosts)
2535.3.12 by Andrew Bennetts
Add a first cut of a get_data_stream method to Repository.
3001
3002
    @classmethod
3003
    def _get_repo_format_to_test(self):
3004
        return None
3005
3006
2996.2.1 by Aaron Bentley
Add KnitRepositoryFormat4
3007
InterRepository.register_optimiser(InterDifferingSerializer)
1910.2.15 by Aaron Bentley
Back out inter.get changes, make optimizers an ordered list
3008
InterRepository.register_optimiser(InterSameDataRepository)
2241.1.13 by Martin Pool
Re-register InterWeaveRepo, fix test integration, add test for it
3009
InterRepository.register_optimiser(InterWeaveRepo)
1563.2.31 by Robert Collins
Convert Knit repositories to use knits.
3010
InterRepository.register_optimiser(InterKnitRepo)
1910.2.24 by Aaron Bentley
Got intra-repository fetch working between model1 and 2 for all types
3011
InterRepository.register_optimiser(InterModel1and2)
1910.2.17 by Aaron Bentley
Get fetching from 1 to 2 under test
3012
InterRepository.register_optimiser(InterKnit1and2)
2592.3.90 by Robert Collins
Slightly broken, but branch and fetch performance is now roughly on par (for bzr.dev) with knits - should be much faster for large repos.
3013
InterRepository.register_optimiser(InterPackRepo)
2535.3.12 by Andrew Bennetts
Add a first cut of a get_data_stream method to Repository.
3014
InterRepository.register_optimiser(InterRemoteToOther)
3015
InterRepository.register_optimiser(InterOtherToRemote)
1534.1.31 by Robert Collins
Deprecated fetch.fetch and fetch.greedy_fetch for branch.fetch, and move the Repository.fetch internals to InterRepo and InterWeaveRepo.
3016
3017
1556.1.4 by Robert Collins
Add a new format for what will become knit, and the surrounding logic to upgrade repositories within metadirs, and tests for the same.
3018
class CopyConverter(object):
3019
    """A repository conversion tool which just performs a copy of the content.
3020
    
3021
    This is slow but quite reliable.
3022
    """
3023
3024
    def __init__(self, target_format):
3025
        """Create a CopyConverter.
3026
3027
        :param target_format: The format the resulting repository should be.
3028
        """
3029
        self.target_format = target_format
3030
        
3031
    def convert(self, repo, pb):
3032
        """Perform the conversion of to_convert, giving feedback via pb.
3033
3034
        :param to_convert: The disk object to convert.
3035
        :param pb: a progress bar to use for progress information.
3036
        """
3037
        self.pb = pb
3038
        self.count = 0
1596.2.22 by Robert Collins
Fetch changes to use new pb.
3039
        self.total = 4
1556.1.4 by Robert Collins
Add a new format for what will become knit, and the surrounding logic to upgrade repositories within metadirs, and tests for the same.
3040
        # this is only useful with metadir layouts - separated repo content.
3041
        # trigger an assertion if not such
3042
        repo._format.get_format_string()
3043
        self.repo_dir = repo.bzrdir
3044
        self.step('Moving repository to repository.backup')
3045
        self.repo_dir.transport.move('repository', 'repository.backup')
3046
        backup_transport =  self.repo_dir.transport.clone('repository.backup')
1910.2.12 by Aaron Bentley
Implement knit repo format 2
3047
        repo._format.check_conversion_target(self.target_format)
1556.1.4 by Robert Collins
Add a new format for what will become knit, and the surrounding logic to upgrade repositories within metadirs, and tests for the same.
3048
        self.source_repo = repo._format.open(self.repo_dir,
3049
            _found=True,
3050
            _override_transport=backup_transport)
3051
        self.step('Creating new repository')
3052
        converted = self.target_format.initialize(self.repo_dir,
3053
                                                  self.source_repo.is_shared())
3054
        converted.lock_write()
3055
        try:
3056
            self.step('Copying content into repository.')
3057
            self.source_repo.copy_content_into(converted)
3058
        finally:
3059
            converted.unlock()
3060
        self.step('Deleting old repository content.')
3061
        self.repo_dir.transport.delete_tree('repository.backup')
3062
        self.pb.note('repository converted')
3063
3064
    def step(self, message):
3065
        """Update the pb by a step."""
3066
        self.count +=1
3067
        self.pb.update(message, self.count, self.total)
1596.1.1 by Martin Pool
Use simple xml unescaping rather than importing xml.sax
3068
3069
1843.2.4 by Aaron Bentley
Switch to John Meinel's _unescape_xml implementation
3070
_unescape_map = {
3071
    'apos':"'",
3072
    'quot':'"',
3073
    'amp':'&',
3074
    'lt':'<',
3075
    'gt':'>'
3076
}
3077
3078
3079
def _unescaper(match, _map=_unescape_map):
2294.1.2 by John Arbash Meinel
Track down and add tests that all tree.commit() can handle
3080
    code = match.group(1)
3081
    try:
3082
        return _map[code]
3083
    except KeyError:
3084
        if not code.startswith('#'):
3085
            raise
2294.1.10 by John Arbash Meinel
Switch all apis over to utf8 file ids. All tests pass
3086
        return unichr(int(code[1:])).encode('utf8')
1843.2.4 by Aaron Bentley
Switch to John Meinel's _unescape_xml implementation
3087
3088
3089
_unescape_re = None
3090
3091
1596.1.1 by Martin Pool
Use simple xml unescaping rather than importing xml.sax
3092
def _unescape_xml(data):
1843.2.4 by Aaron Bentley
Switch to John Meinel's _unescape_xml implementation
3093
    """Unescape predefined XML entities in a string of data."""
3094
    global _unescape_re
3095
    if _unescape_re is None:
2120.2.1 by John Arbash Meinel
Remove tabs from source files, and add a test to keep it that way.
3096
        _unescape_re = re.compile('\&([^;]*);')
1843.2.4 by Aaron Bentley
Switch to John Meinel's _unescape_xml implementation
3097
    return _unescape_re.sub(_unescaper, data)
2745.6.3 by Aaron Bentley
Implement versionedfile checking for bzr check
3098
3099
3036.1.3 by Robert Collins
Privatise VersionedFileChecker.
3100
class _VersionedFileChecker(object):
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
3101
2988.1.6 by Robert Collins
Change the contract for VersionedFileChecker to consolidate related parameters rather than splitting them across two api calls. This allows better reuse of a single checker object.
3102
    def __init__(self, repository):
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
3103
        self.repository = repository
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
3104
        self.text_index = self.repository._generate_text_key_index()
2745.6.49 by Andrew Bennetts
Get rid of bzrlib.repository._RevisionParentsProvider.
3105
    
3106
    def calculate_file_version_parents(self, revision_id, file_id):
2927.2.10 by Andrew Bennetts
More docstrings, elaborate a comment with an XXX, and remove a little bit of cruft.
3107
        """Calculate the correct parents for a file version according to
3108
        the inventories.
3109
        """
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
3110
        parent_keys = self.text_index[(file_id, revision_id)]
3111
        if parent_keys == [_mod_revision.NULL_REVISION]:
3112
            return ()
3113
        # strip the file_id, for the weave api
3114
        return tuple([revision_id for file_id, revision_id in parent_keys])
2745.6.47 by Andrew Bennetts
Move check_parents out of VersionedFile.
3115
3036.1.2 by Robert Collins
Simplify the check_file_version_parents API some more. This has already changed in this release cycle.
3116
    def check_file_version_parents(self, weave, file_id):
2927.2.10 by Andrew Bennetts
More docstrings, elaborate a comment with an XXX, and remove a little bit of cruft.
3117
        """Check the parents stored in a versioned file are correct.
3118
3119
        It also detects file versions that are not referenced by their
3120
        corresponding revision's inventory.
3121
2927.2.14 by Andrew Bennetts
Tweaks suggested by review.
3122
        :returns: A tuple of (wrong_parents, dangling_file_versions).
2927.2.10 by Andrew Bennetts
More docstrings, elaborate a comment with an XXX, and remove a little bit of cruft.
3123
            wrong_parents is a dict mapping {revision_id: (stored_parents,
3124
            correct_parents)} for each revision_id where the stored parents
2927.2.14 by Andrew Bennetts
Tweaks suggested by review.
3125
            are not correct.  dangling_file_versions is a set of (file_id,
3126
            revision_id) tuples for versions that are present in this versioned
3127
            file, but not used by the corresponding inventory.
2927.2.10 by Andrew Bennetts
More docstrings, elaborate a comment with an XXX, and remove a little bit of cruft.
3128
        """
2927.2.3 by Andrew Bennetts
Add fulltexts to avoid bug 155730.
3129
        wrong_parents = {}
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
3130
        unused_versions = set()
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
3131
        versions = weave.versions()
3132
        parent_map = weave.get_parent_map(versions)
3133
        for num, revision_id in enumerate(versions):
2927.2.6 by Andrew Bennetts
Make some more check tests pass.
3134
            try:
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
3135
                correct_parents = self.calculate_file_version_parents(
3136
                    revision_id, file_id)
3137
            except KeyError:
3036.1.2 by Robert Collins
Simplify the check_file_version_parents API some more. This has already changed in this release cycle.
3138
                # The version is not part of the used keys.
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
3139
                unused_versions.add(revision_id)
3140
            else:
3141
                try:
3287.5.2 by Robert Collins
Deprecate VersionedFile.get_parents, breaking pulling from a ghost containing knit or pack repository to weaves, which improves correctness and allows simplification of core code.
3142
                    knit_parents = tuple(parent_map[revision_id])
2988.1.8 by Robert Collins
Change check and reconcile to use the new _generate_text_key_index rather
3143
                except errors.RevisionNotPresent:
3144
                    knit_parents = None
3145
                if correct_parents != knit_parents:
3146
                    wrong_parents[revision_id] = (knit_parents, correct_parents)
3147
        return wrong_parents, unused_versions
3287.6.8 by Robert Collins
Reduce code duplication as per review.
3148
3149
3150
def _old_get_graph(repository, revision_id):
3151
    """DO NOT USE. That is all. I'm serious."""
3152
    graph = repository.get_graph()
3153
    revision_graph = dict(((key, value) for key, value in
3154
        graph.iter_ancestry([revision_id]) if value is not None))
3155
    return _strip_NULL_ghosts(revision_graph)
3156
3157
3158
def _strip_NULL_ghosts(revision_graph):
3159
    """Also don't use this. more compatibility code for unmigrated clients."""
3160
    # Filter ghosts, and null:
3161
    if _mod_revision.NULL_REVISION in revision_graph:
3162
        del revision_graph[_mod_revision.NULL_REVISION]
3163
    for key, parents in revision_graph.items():
3164
        revision_graph[key] = tuple(parent for parent in parents if parent
3165
            in revision_graph)
3166
    return revision_graph