/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.9 by Robert Collins
Initial stab at repository format support.
1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
19
20
import md5
21
import time
22
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
23
from bzrlib import (
24
    debug,
25
    errors,
26
    knit,
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
27
    inventory,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
28
    pack,
29
    repository,
0.23.28 by John Arbash Meinel
Gotta import 'trace' if you want to use trace.mutter()
30
    trace,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
31
    ui,
32
    )
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
33
from bzrlib.btree_index import (
34
    BTreeBuilder,
35
    BTreeGraphIndex,
36
    )
0.17.9 by Robert Collins
Initial stab at repository format support.
37
from bzrlib.index import GraphIndex, GraphIndexBuilder
38
from bzrlib.repository import InterPackRepo
39
from bzrlib.plugins.groupcompress.groupcompress import (
40
    _GCGraphIndex,
41
    GroupCompressVersionedFiles,
42
    )
43
from bzrlib.osutils import rand_chars
44
from bzrlib.repofmt.pack_repo import (
45
    Pack,
46
    NewPack,
47
    KnitPackRepository,
48
    RepositoryPackCollection,
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
49
    RepositoryFormatKnitPack6,
50
    RepositoryFormatKnitPack6RichRoot,
0.17.9 by Robert Collins
Initial stab at repository format support.
51
    Packer,
52
    ReconcilePacker,
53
    OptimisingPacker,
54
    )
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
55
try:
56
    from bzrlib.repofmt.pack_repo import (
0.17.26 by Robert Collins
Working better --gc-plain-chk.
57
    CHKInventoryRepository,
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
58
    RepositoryFormatPackDevelopment5,
59
    RepositoryFormatPackDevelopment5Hash16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
60
##    RepositoryFormatPackDevelopment5Hash16b,
61
##    RepositoryFormatPackDevelopment5Hash63,
62
##    RepositoryFormatPackDevelopment5Hash127a,
63
##    RepositoryFormatPackDevelopment5Hash127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
64
    RepositoryFormatPackDevelopment5Hash255,
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
65
    )
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
66
    from bzrlib import chk_map
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
67
    chk_support = True
68
except ImportError:
69
    chk_support = False
0.17.9 by Robert Collins
Initial stab at repository format support.
70
71
72
def open_pack(self):
0.17.22 by Robert Collins
really get gc working with 1.10
73
    return self._pack_collection.pack_factory(self._pack_collection,
74
        upload_suffix=self.suffix,
0.17.9 by Robert Collins
Initial stab at repository format support.
75
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
76
77
78
Packer.open_pack = open_pack
79
80
81
class GCPack(NewPack):
82
0.17.22 by Robert Collins
really get gc working with 1.10
83
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
0.17.9 by Robert Collins
Initial stab at repository format support.
84
        """Create a NewPack instance.
85
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
86
        :param pack_collection: A PackCollection into which this is being
87
            inserted.
0.17.9 by Robert Collins
Initial stab at repository format support.
88
        :param upload_suffix: An optional suffix to be given to any temporary
89
            files created during the pack creation. e.g '.autopack'
90
        :param file_mode: An optional file mode to create the new files with.
91
        """
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
92
        # replaced from bzr.dev to:
93
        # - change inventory reference list length to 1
94
        # - change texts reference lists to 1
95
        # TODO: patch this to be parameterised upstream
96
        
0.17.9 by Robert Collins
Initial stab at repository format support.
97
        # The relative locations of the packs are constrained, but all are
98
        # passed in because the caller has them, so as to avoid object churn.
0.17.22 by Robert Collins
really get gc working with 1.10
99
        index_builder_class = pack_collection._index_builder_class
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
100
        if chk_support:
101
            # from brisbane-core
102
            if pack_collection.chk_index is not None:
103
                chk_index = index_builder_class(reference_lists=0)
104
            else:
105
                chk_index = None
106
            Pack.__init__(self,
107
                # Revisions: parents list, no text compression.
108
                index_builder_class(reference_lists=1),
109
                # Inventory: We want to map compression only, but currently the
110
                # knit code hasn't been updated enough to understand that, so we
111
                # have a regular 2-list index giving parents and compression
112
                # source.
113
                index_builder_class(reference_lists=1),
114
                # Texts: compression and per file graph, for all fileids - so two
115
                # reference lists and two elements in the key tuple.
116
                index_builder_class(reference_lists=1, key_elements=2),
117
                # Signatures: Just blobs to store, no compression, no parents
118
                # listing.
119
                index_builder_class(reference_lists=0),
120
                # CHK based storage - just blobs, no compression or parents.
121
                chk_index=chk_index
122
                )
123
        else:
124
            # from bzr.dev
125
            Pack.__init__(self,
126
                # Revisions: parents list, no text compression.
127
                index_builder_class(reference_lists=1),
128
                # Inventory: compressed, with graph for compatibility with other
129
                # existing bzrlib code.
130
                index_builder_class(reference_lists=1),
131
                # Texts: per file graph:
132
                index_builder_class(reference_lists=1, key_elements=2),
133
                # Signatures: Just blobs to store, no compression, no parents
134
                # listing.
135
                index_builder_class(reference_lists=0),
136
                )
0.17.22 by Robert Collins
really get gc working with 1.10
137
        self._pack_collection = pack_collection
138
        # When we make readonly indices, we need this.
139
        self.index_class = pack_collection._index_class
0.17.9 by Robert Collins
Initial stab at repository format support.
140
        # where should the new pack be opened
0.17.22 by Robert Collins
really get gc working with 1.10
141
        self.upload_transport = pack_collection._upload_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
142
        # where are indices written out to
0.17.22 by Robert Collins
really get gc working with 1.10
143
        self.index_transport = pack_collection._index_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
144
        # where is the pack renamed to when it is finished?
0.17.22 by Robert Collins
really get gc working with 1.10
145
        self.pack_transport = pack_collection._pack_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
146
        # What file mode to upload the pack and indices with.
147
        self._file_mode = file_mode
148
        # tracks the content written to the .pack file.
149
        self._hash = md5.new()
150
        # a four-tuple with the length in bytes of the indices, once the pack
151
        # is finalised. (rev, inv, text, sigs)
152
        self.index_sizes = None
153
        # How much data to cache when writing packs. Note that this is not
154
        # synchronised with reads, because it's not in the transport layer, so
155
        # is not safe unless the client knows it won't be reading from the pack
156
        # under creation.
157
        self._cache_limit = 0
158
        # the temporary pack file name.
159
        self.random_name = rand_chars(20) + upload_suffix
160
        # when was this pack started ?
161
        self.start_time = time.time()
162
        # open an output stream for the data added to the pack.
163
        self.write_stream = self.upload_transport.open_write_stream(
164
            self.random_name, mode=self._file_mode)
165
        if 'pack' in debug.debug_flags:
0.23.30 by John Arbash Meinel
Merge in Ian's groupcompress trunk updates
166
            trace.mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
0.17.9 by Robert Collins
Initial stab at repository format support.
167
                time.ctime(), self.upload_transport.base, self.random_name,
168
                time.time() - self.start_time)
169
        # A list of byte sequences to be written to the new pack, and the 
170
        # aggregate size of them.  Stored as a list rather than separate 
171
        # variables so that the _write_data closure below can update them.
172
        self._buffer = [[], 0]
173
        # create a callable for adding data 
174
        #
175
        # robertc says- this is a closure rather than a method on the object
176
        # so that the variables are locals, and faster than accessing object
177
        # members.
178
        def _write_data(bytes, flush=False, _buffer=self._buffer,
179
            _write=self.write_stream.write, _update=self._hash.update):
180
            _buffer[0].append(bytes)
181
            _buffer[1] += len(bytes)
182
            # buffer cap
183
            if _buffer[1] > self._cache_limit or flush:
184
                bytes = ''.join(_buffer[0])
185
                _write(bytes)
186
                _update(bytes)
187
                _buffer[:] = [[], 0]
188
        # expose this on self, for the occasion when clients want to add data.
189
        self._write_data = _write_data
190
        # a pack writer object to serialise pack records.
191
        self._writer = pack.ContainerWriter(self._write_data)
192
        self._writer.begin()
193
        # what state is the pack in? (open, finished, aborted)
194
        self._state = 'open'
195
196
197
RepositoryPackCollection.pack_factory = NewPack
198
199
class GCRepositoryPackCollection(RepositoryPackCollection):
200
201
    pack_factory = GCPack
202
203
    def _make_index(self, name, suffix):
204
        """Overridden to use BTreeGraphIndex objects."""
205
        size_offset = self._suffix_offsets[suffix]
206
        index_name = name + suffix
207
        index_size = self._names[name][size_offset]
208
        return BTreeGraphIndex(
209
            self._index_transport, index_name, index_size)
210
211
    def _start_write_group(self):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
212
        # Overridden to add 'self.pack_factory()'
0.17.9 by Robert Collins
Initial stab at repository format support.
213
        # Do not permit preparation for writing if we're not in a 'write lock'.
214
        if not self.repo.is_write_locked():
215
            raise errors.NotWriteLocked(self)
0.17.22 by Robert Collins
really get gc working with 1.10
216
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
0.17.9 by Robert Collins
Initial stab at repository format support.
217
            file_mode=self.repo.bzrdir._get_file_mode())
218
        # allow writing: queue writes to a new index
219
        self.revision_index.add_writable_index(self._new_pack.revision_index,
220
            self._new_pack)
221
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
222
            self._new_pack)
223
        self.text_index.add_writable_index(self._new_pack.text_index,
224
            self._new_pack)
225
        self.signature_index.add_writable_index(self._new_pack.signature_index,
226
            self._new_pack)
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
227
        if chk_support and self.chk_index is not None:
228
            self.chk_index.add_writable_index(self._new_pack.chk_index,
229
                self._new_pack)
230
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
0.17.9 by Robert Collins
Initial stab at repository format support.
231
232
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
233
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
234
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
235
        self.repo.texts._index._add_callback = self.text_index.add_callback
236
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
237
    def _get_filtered_inv_stream(self, source_vf, keys):
238
        """Filter the texts of inventories, to find the chk pages."""
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
239
        id_roots = []
240
        p_id_roots = []
241
        id_roots_set = set()
242
        p_id_roots_set = set()
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
243
        def _filter_inv_stream(stream):
244
            for idx, record in enumerate(stream):
245
                ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
246
                bytes = record.get_bytes_as('fulltext')
247
                chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
248
                key = chk_inv.id_to_entry.key()
249
                if key not in id_roots_set:
250
                    id_roots.append(key)
251
                    id_roots_set.add(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
252
                p_id_map = chk_inv.parent_id_basename_to_file_id
253
                if p_id_map is not None:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
254
                    key = p_id_map.key()
255
                    if key not in p_id_roots_set:
256
                        p_id_roots_set.add(key)
257
                        p_id_roots.append(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
258
                yield record
259
        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
260
        return _filter_inv_stream(stream), id_roots, p_id_roots
261
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
262
    def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots, pb=None):
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
263
        # We want to stream the keys from 'id_roots', and things they
264
        # reference, and then stream things from p_id_roots and things they
265
        # reference, and then any remaining keys that we didn't get to.
266
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
267
        # We also group referenced texts together, so if one root references a
268
        # text with prefix 'a', and another root references a node with prefix
269
        # 'a', we want to yield those nodes before we yield the nodes for 'b'
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
270
        # This keeps 'similar' nodes together.
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
271
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
272
        # Note: We probably actually want multiple streams here, to help the
273
        #       client understand that the different levels won't compress well
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
274
        #       against each other.
0.20.27 by John Arbash Meinel
Update a Note/Todo
275
        #       Test the difference between using one Group per level, and
276
        #       using 1 Group per prefix. (so '' (root) would get a group, then
277
        #       all the references to search-key 'a' would get a group, etc.)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
278
        remaining_keys = set(keys)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
279
        counter = [0]
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
280
        def _get_referenced_stream(root_keys):
281
            cur_keys = root_keys
282
            while cur_keys:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
283
                keys_by_search_prefix = {}
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
284
                remaining_keys.difference_update(cur_keys)
285
                next_keys = set()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
286
                stream = source_vf.get_record_stream(cur_keys, 'as-requested',
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
287
                                                     True)
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
288
                def next_stream():
289
                    for record in stream:
290
                        bytes = record.get_bytes_as('fulltext')
291
                        # We don't care about search_key_func for this code,
292
                        # because we only care about external references.
293
                        node = chk_map._deserialise(bytes, record.key,
294
                                                    search_key_func=None)
295
                        common_base = node._search_prefix
296
                        if isinstance(node, chk_map.InternalNode):
297
                            for prefix, value in node._items.iteritems():
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
298
                                if not isinstance(value, tuple):
299
                                    raise AssertionError("value is %s when"
300
                                        " tuple expected" % (value.__class__))
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
301
                                if value not in next_keys:
302
                                    keys_by_search_prefix.setdefault(prefix,
303
                                        []).append(value)
304
                                    next_keys.add(value)
305
                        counter[0] += 1
306
                        if pb is not None:
307
                            pb.update('chk node', counter[0])
308
                        yield record
309
                yield next_stream()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
310
                # Double check that we won't be emitting any keys twice
311
                next_keys = next_keys.intersection(remaining_keys)
312
                cur_keys = []
313
                for prefix in sorted(keys_by_search_prefix):
314
                    cur_keys.extend(keys_by_search_prefix[prefix])
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
315
        for stream in _get_referenced_stream(id_roots):
316
            yield stream
317
        for stream in _get_referenced_stream(p_id_roots):
318
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
319
        if remaining_keys:
0.23.30 by John Arbash Meinel
Merge in Ian's groupcompress trunk updates
320
            trace.note('There were %d keys in the chk index, which were not'
321
                       ' referenced from inventories', len(remaining_keys))
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
322
            stream = source_vf.get_record_stream(remaining_keys, 'unordered',
323
                                                 True)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
324
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
325
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
326
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
327
                                 reload_func=None):
328
        """Execute a series of pack operations.
329
330
        :param pack_operations: A list of [revision_count, packs_to_combine].
331
        :param _packer_class: The class of packer to use (default: Packer).
332
        :return: None.
333
        """
334
        for revision_count, packs in pack_operations:
335
            # we may have no-ops from the setup logic
336
            if len(packs) == 0:
337
                continue
338
            # Create a new temp VersionedFile instance based on these packs,
339
            # and then just fetch everything into the target
340
341
            to_copy = [('revision_index', 'revisions'),
342
                       ('inventory_index', 'inventories'),
343
                       ('text_index', 'texts'),
344
                       ('signature_index', 'signatures'),
345
                      ]
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
346
            # TODO: This is a very non-optimal ordering for chk_bytes. The
347
            #       issue is that pages that are similar are not transmitted
348
            #       together. Perhaps get_record_stream('gc-optimal') should be
349
            #       taught about how to group chk pages?
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
350
            has_chk = False
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
351
            if getattr(self, 'chk_index', None) is not None:
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
352
                has_chk = True
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
353
                to_copy.insert(2, ('chk_index', 'chk_bytes'))
354
355
            # Shouldn't we start_write_group around this?
356
            if self._new_pack is not None:
357
                raise errors.BzrError('call to %s.pack() while another pack is'
358
                                      ' being written.'
359
                                      % (self.__class__.__name__,))
0.20.33 by John Arbash Meinel
Properly name the file XXX.autopack rather than XXXautopack
360
            new_pack = self.pack_factory(self, '.autopack',
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
361
                file_mode=self.repo.bzrdir._get_file_mode())
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
362
            new_pack.set_write_cache_size(1024*1024)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
363
            # TODO: A better alternative is to probably use Packer.open_pack(), and
364
            #       then create a GroupCompressVersionedFiles() around the
365
            #       target pack to insert into.
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
366
            pb = ui.ui_factory.nested_progress_bar()
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
367
            try:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
368
                for idx, (index_name, vf_name) in enumerate(to_copy):
369
                    pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
370
                    keys = set()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
371
                    new_index = getattr(new_pack, index_name)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
372
                    new_index.set_optimize(for_size=True)
373
                    for pack in packs:
374
                        source_index = getattr(pack, index_name)
375
                        keys.update(e[1] for e in source_index.iter_all_entries())
0.23.22 by John Arbash Meinel
Add a mutter() while repacking, so that we log progress as we go along.
376
                    trace.mutter('repacking %s with %d keys',
377
                                 vf_name, len(keys))
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
378
                    source_vf = getattr(self.repo, vf_name)
379
                    target_access = knit._DirectPackAccess({})
380
                    target_access.set_writer(new_pack._writer, new_index,
381
                                             new_pack.access_tuple())
382
                    target_vf = GroupCompressVersionedFiles(
383
                        _GCGraphIndex(new_index,
384
                                      add_callback=new_index.add_nodes,
385
                                      parents=source_vf._index._parents,
386
                                      is_locked=self.repo.is_locked),
387
                        access=target_access,
388
                        delta=source_vf._delta)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
389
                    stream = None
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
390
                    child_pb = ui.ui_factory.nested_progress_bar()
391
                    try:
392
                        if has_chk:
393
                            if vf_name == 'inventories':
394
                                stream, id_roots, p_id_roots = self._get_filtered_inv_stream(
395
                                    source_vf, keys)
396
                            elif vf_name == 'chk_bytes':
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
397
                                for stream in self._get_chk_stream(source_vf, keys,
398
                                                    id_roots, p_id_roots,
399
                                                    pb=child_pb):
400
                                    target_vf.insert_record_stream(stream)
401
                                # No more to copy
402
                                stream = []
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
403
                        if stream is None:
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
404
                            def pb_stream():
405
                                substream = source_vf.get_record_stream(keys, 'gc-optimal', True)
406
                                for idx, record in enumerate(substream):
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
407
                                    child_pb.update(vf_name, idx + 1, len(keys))
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
408
                                    yield record
409
                            stream = pb_stream()
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
410
                        target_vf.insert_record_stream(stream)
411
                    finally:
412
                        child_pb.finished()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
413
                new_pack._check_references() # shouldn't be needed
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
414
            except:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
415
                pb.finished()
416
                new_pack.abort()
417
                raise
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
418
            else:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
419
                pb.finished()
420
                if not new_pack.data_inserted():
421
                    raise AssertionError('We copied from pack files,'
422
                                         ' but had no data copied')
423
                    # we need to abort somehow, because we don't want to remove
424
                    # the other packs
425
                new_pack.finish()
426
                self.allocate(new_pack)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
427
            for pack in packs:
428
                self._remove_pack_from_memory(pack)
429
        # record the newly available packs and stop advertising the old
430
        # packs
431
        self._save_pack_names(clear_obsolete_packs=True)
432
        # Move the old packs out of the way now they are no longer referenced.
433
        for revision_count, packs in pack_operations:
434
            self._obsolete_packs(packs)
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
435
0.17.9 by Robert Collins
Initial stab at repository format support.
436
437
438
class GCPackRepository(KnitPackRepository):
439
    """GC customisation of KnitPackRepository."""
440
0.20.31 by Ian Clatworthy
add coment suggesting a simplification in repofmt.py
441
    # Note: I think the CHK support can be dropped from this class as it's
442
    # implemented via the GCCHKPackRepository class defined next. IGC 20090301
443
0.17.9 by Robert Collins
Initial stab at repository format support.
444
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
445
        _serializer):
446
        """Overridden to change pack collection class."""
447
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
448
            _commit_builder_class, _serializer)
449
        # and now replace everything it did :)
450
        index_transport = self._transport.clone('indices')
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
451
        if chk_support:
452
            self._pack_collection = GCRepositoryPackCollection(self,
453
                self._transport, index_transport,
454
                self._transport.clone('upload'),
455
                self._transport.clone('packs'),
456
                _format.index_builder_class,
457
                _format.index_class,
458
                use_chk_index=self._format.supports_chks,
459
                )
460
        else:
461
            self._pack_collection = GCRepositoryPackCollection(self,
462
                self._transport, index_transport,
463
                self._transport.clone('upload'),
464
                self._transport.clone('packs'),
465
                _format.index_builder_class,
466
                _format.index_class)
0.17.9 by Robert Collins
Initial stab at repository format support.
467
        self.inventories = GroupCompressVersionedFiles(
468
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
469
                add_callback=self._pack_collection.inventory_index.add_callback,
470
                parents=True, is_locked=self.is_locked),
471
            access=self._pack_collection.inventory_index.data_access)
472
        self.revisions = GroupCompressVersionedFiles(
473
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
474
                add_callback=self._pack_collection.revision_index.add_callback,
475
                parents=True, is_locked=self.is_locked),
476
            access=self._pack_collection.revision_index.data_access,
477
            delta=False)
478
        self.signatures = GroupCompressVersionedFiles(
479
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
480
                add_callback=self._pack_collection.signature_index.add_callback,
481
                parents=False, is_locked=self.is_locked),
482
            access=self._pack_collection.signature_index.data_access,
483
            delta=False)
484
        self.texts = GroupCompressVersionedFiles(
485
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
486
                add_callback=self._pack_collection.text_index.add_callback,
487
                parents=True, is_locked=self.is_locked),
488
            access=self._pack_collection.text_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
489
        if chk_support and _format.supports_chks:
490
            # No graph, no compression:- references from chks are between
491
            # different objects not temporal versions of the same; and without
492
            # some sort of temporal structure knit compression will just fail.
493
            self.chk_bytes = GroupCompressVersionedFiles(
494
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
495
                    add_callback=self._pack_collection.chk_index.add_callback,
496
                    parents=False, is_locked=self.is_locked),
497
                access=self._pack_collection.chk_index.data_access)
498
        else:
499
            self.chk_bytes = None
0.17.9 by Robert Collins
Initial stab at repository format support.
500
        # True when the repository object is 'write locked' (as opposed to the
501
        # physical lock only taken out around changes to the pack-names list.) 
502
        # Another way to represent this would be a decorator around the control
503
        # files object that presents logical locks as physical ones - if this
504
        # gets ugly consider that alternative design. RBC 20071011
505
        self._write_lock_count = 0
506
        self._transaction = None
507
        # for tests
508
        self._reconcile_does_inventory_gc = True
509
        self._reconcile_fixes_text_parents = True
510
        self._reconcile_backsup_inventory = False
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
511
        # Note: We cannot unpack a delta that references a text we haven't seen yet.
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
512
        #       There are 2 options, work in fulltexts, or require topological
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
513
        #       sorting. Using fulltexts is more optimal for local operations,
514
        #       because the source can be smart about extracting multiple
515
        #       in-a-row (and sharing strings). Topological is better for
516
        #       remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
517
        self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
518
        self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
519
        self._fetch_uses_deltas = False
0.17.9 by Robert Collins
Initial stab at repository format support.
520
521
0.17.26 by Robert Collins
Working better --gc-plain-chk.
522
if chk_support:
523
    class GCCHKPackRepository(CHKInventoryRepository):
524
        """GC customisation of CHKInventoryRepository."""
525
526
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
527
            _serializer):
528
            """Overridden to change pack collection class."""
529
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
530
                _commit_builder_class, _serializer)
531
            # and now replace everything it did :)
532
            index_transport = self._transport.clone('indices')
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
533
            self._pack_collection = GCRepositoryPackCollection(self,
534
                self._transport, index_transport,
535
                self._transport.clone('upload'),
536
                self._transport.clone('packs'),
537
                _format.index_builder_class,
538
                _format.index_class,
539
                use_chk_index=self._format.supports_chks,
540
                )
0.17.26 by Robert Collins
Working better --gc-plain-chk.
541
            self.inventories = GroupCompressVersionedFiles(
542
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
543
                    add_callback=self._pack_collection.inventory_index.add_callback,
544
                    parents=True, is_locked=self.is_locked),
545
                access=self._pack_collection.inventory_index.data_access)
546
            self.revisions = GroupCompressVersionedFiles(
547
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
548
                    add_callback=self._pack_collection.revision_index.add_callback,
549
                    parents=True, is_locked=self.is_locked),
550
                access=self._pack_collection.revision_index.data_access,
551
                delta=False)
552
            self.signatures = GroupCompressVersionedFiles(
553
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
554
                    add_callback=self._pack_collection.signature_index.add_callback,
555
                    parents=False, is_locked=self.is_locked),
556
                access=self._pack_collection.signature_index.data_access,
557
                delta=False)
558
            self.texts = GroupCompressVersionedFiles(
559
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
560
                    add_callback=self._pack_collection.text_index.add_callback,
561
                    parents=True, is_locked=self.is_locked),
562
                access=self._pack_collection.text_index.data_access)
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
563
            assert _format.supports_chks
564
            # No parents, individual CHK pages don't have specific ancestry
565
            self.chk_bytes = GroupCompressVersionedFiles(
566
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
567
                    add_callback=self._pack_collection.chk_index.add_callback,
568
                    parents=False, is_locked=self.is_locked),
569
                access=self._pack_collection.chk_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
570
            # True when the repository object is 'write locked' (as opposed to the
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
571
            # physical lock only taken out around changes to the pack-names list.)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
572
            # Another way to represent this would be a decorator around the control
573
            # files object that presents logical locks as physical ones - if this
574
            # gets ugly consider that alternative design. RBC 20071011
575
            self._write_lock_count = 0
576
            self._transaction = None
577
            # for tests
578
            self._reconcile_does_inventory_gc = True
579
            self._reconcile_fixes_text_parents = True
580
            self._reconcile_backsup_inventory = False
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
581
            # Note: We cannot unpack a delta that references a text we haven't
582
            # seen yet. There are 2 options, work in fulltexts, or require
583
            # topological sorting. Using fulltexts is more optimal for local
584
            # operations, because the source can be smart about extracting
585
            # multiple in-a-row (and sharing strings). Topological is better
586
            # for remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
587
            self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
588
            self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
589
            self._fetch_uses_deltas = False
0.17.26 by Robert Collins
Working better --gc-plain-chk.
590
591
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
592
class RepositoryFormatPackGCPlain(RepositoryFormatKnitPack6):
0.17.9 by Robert Collins
Initial stab at repository format support.
593
    """A B+Tree index using pack repository."""
594
595
    repository_class = GCPackRepository
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
596
    rich_root_data = False
0.17.9 by Robert Collins
Initial stab at repository format support.
597
598
    def get_format_string(self):
599
        """See RepositoryFormat.get_format_string()."""
600
        return ("Bazaar development format - btree+gc "
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
601
            "(needs bzr.dev from 1.13)\n")
0.17.9 by Robert Collins
Initial stab at repository format support.
602
603
    def get_format_description(self):
604
        """See RepositoryFormat.get_format_description()."""
605
        return ("Development repository format - btree+groupcompress "
606
            ", interoperates with pack-0.92\n")
607
608
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
609
if chk_support:
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
610
    class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
611
        """A hashed CHK+group compress pack repository."""
612
613
        repository_class = GCCHKPackRepository
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
614
        rich_root_data = True
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
615
616
        def get_format_string(self):
617
            """See RepositoryFormat.get_format_string()."""
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
618
            return ('Bazaar development format - hash16chk+gc rich-root'
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
619
                    ' (needs bzr.dev from 1.13)\n')
620
621
        def get_format_description(self):
622
            """See RepositoryFormat.get_format_description()."""
623
            return ("Development repository format - hash16chk+groupcompress")
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
624
625
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
626
    class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
627
        """A hashed CHK+group compress pack repository."""
628
629
        repository_class = GCCHKPackRepository
0.17.33 by John Arbash Meinel
A couple tweaks to repofmt to allow fetching to work again.
630
        # Setting this to True causes us to use InterModel1And2, so for now set
631
        # it to False which uses InterDifferingSerializer. When IM1&2 is
632
        # removed (as it is in bzr.dev) we can set this back to True.
633
        rich_root_data = False
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
634
635
        def get_format_string(self):
636
            """See RepositoryFormat.get_format_string()."""
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
637
            return ('Bazaar development format - hash255chk+gc rich-root'
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
638
                    ' (needs bzr.dev from 1.13)\n')
639
640
        def get_format_description(self):
641
            """See RepositoryFormat.get_format_description()."""
642
            return ("Development repository format - hash255chk+groupcompress")
643
644
0.17.9 by Robert Collins
Initial stab at repository format support.
645
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
646
    """Be incompatible with the regular fetch code."""
0.17.33 by John Arbash Meinel
A couple tweaks to repofmt to allow fetching to work again.
647
    formats = (RepositoryFormatPackGCPlain,)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
648
    if chk_support:
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
649
        formats = formats + (RepositoryFormatPackGCCHK16,
650
                             RepositoryFormatPackGCCHK255)
0.17.10 by Robert Collins
Correct optimiser disabling.
651
    if isinstance(source._format, formats) or isinstance(target._format, formats):
0.17.9 by Robert Collins
Initial stab at repository format support.
652
        return False
653
    else:
654
        return orig_method(source, target)
655
656
657
InterPackRepo.is_compatible = staticmethod(pack_incompatible)