/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.9 by Robert Collins
Initial stab at repository format support.
1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
19
20
import md5
21
import time
22
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
23
from bzrlib import (
24
    debug,
25
    errors,
26
    knit,
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
27
    inventory,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
28
    pack,
29
    repository,
0.23.28 by John Arbash Meinel
Gotta import 'trace' if you want to use trace.mutter()
30
    trace,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
31
    ui,
32
    )
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
33
from bzrlib.btree_index import (
34
    BTreeBuilder,
35
    BTreeGraphIndex,
36
    )
0.17.9 by Robert Collins
Initial stab at repository format support.
37
from bzrlib.index import GraphIndex, GraphIndexBuilder
38
from bzrlib.repository import InterPackRepo
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
39
from bzrlib.plugins.groupcompress_rabin.groupcompress import (
0.17.9 by Robert Collins
Initial stab at repository format support.
40
    _GCGraphIndex,
41
    GroupCompressVersionedFiles,
42
    )
43
from bzrlib.osutils import rand_chars
44
from bzrlib.repofmt.pack_repo import (
45
    Pack,
46
    NewPack,
47
    KnitPackRepository,
48
    RepositoryPackCollection,
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
49
    RepositoryFormatPackDevelopment2,
50
    RepositoryFormatPackDevelopment2Subtree,
0.17.9 by Robert Collins
Initial stab at repository format support.
51
    RepositoryFormatKnitPack1,
52
    RepositoryFormatKnitPack3,
53
    RepositoryFormatKnitPack4,
54
    Packer,
55
    ReconcilePacker,
56
    OptimisingPacker,
57
    )
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
58
try:
59
    from bzrlib.repofmt.pack_repo import (
0.17.26 by Robert Collins
Working better --gc-plain-chk.
60
    CHKInventoryRepository,
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
61
    RepositoryFormatPackDevelopment5,
62
    RepositoryFormatPackDevelopment5Hash16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
63
##    RepositoryFormatPackDevelopment5Hash16b,
64
##    RepositoryFormatPackDevelopment5Hash63,
65
##    RepositoryFormatPackDevelopment5Hash127a,
66
##    RepositoryFormatPackDevelopment5Hash127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
67
    RepositoryFormatPackDevelopment5Hash255,
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
68
    )
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
69
    from bzrlib import chk_map
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
70
    chk_support = True
71
except ImportError:
72
    chk_support = False
0.17.9 by Robert Collins
Initial stab at repository format support.
73
from bzrlib import ui
74
75
76
def open_pack(self):
0.17.22 by Robert Collins
really get gc working with 1.10
77
    return self._pack_collection.pack_factory(self._pack_collection,
78
        upload_suffix=self.suffix,
0.17.9 by Robert Collins
Initial stab at repository format support.
79
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
80
81
82
Packer.open_pack = open_pack
83
84
85
class GCPack(NewPack):
86
0.17.22 by Robert Collins
really get gc working with 1.10
87
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
0.17.9 by Robert Collins
Initial stab at repository format support.
88
        """Create a NewPack instance.
89
90
        :param upload_transport: A writable transport for the pack to be
91
            incrementally uploaded to.
92
        :param index_transport: A writable transport for the pack's indices to
93
            be written to when the pack is finished.
94
        :param pack_transport: A writable transport for the pack to be renamed
95
            to when the upload is complete. This *must* be the same as
96
            upload_transport.clone('../packs').
97
        :param upload_suffix: An optional suffix to be given to any temporary
98
            files created during the pack creation. e.g '.autopack'
99
        :param file_mode: An optional file mode to create the new files with.
100
        """
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
101
        # replaced from bzr.dev to:
102
        # - change inventory reference list length to 1
103
        # - change texts reference lists to 1
104
        # TODO: patch this to be parameterised upstream
105
        
0.17.9 by Robert Collins
Initial stab at repository format support.
106
        # The relative locations of the packs are constrained, but all are
107
        # passed in because the caller has them, so as to avoid object churn.
0.17.22 by Robert Collins
really get gc working with 1.10
108
        index_builder_class = pack_collection._index_builder_class
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
109
        if chk_support:
110
            # from brisbane-core
111
            if pack_collection.chk_index is not None:
112
                chk_index = index_builder_class(reference_lists=0)
113
            else:
114
                chk_index = None
115
            Pack.__init__(self,
116
                # Revisions: parents list, no text compression.
117
                index_builder_class(reference_lists=1),
118
                # Inventory: We want to map compression only, but currently the
119
                # knit code hasn't been updated enough to understand that, so we
120
                # have a regular 2-list index giving parents and compression
121
                # source.
122
                index_builder_class(reference_lists=1),
123
                # Texts: compression and per file graph, for all fileids - so two
124
                # reference lists and two elements in the key tuple.
125
                index_builder_class(reference_lists=1, key_elements=2),
126
                # Signatures: Just blobs to store, no compression, no parents
127
                # listing.
128
                index_builder_class(reference_lists=0),
129
                # CHK based storage - just blobs, no compression or parents.
130
                chk_index=chk_index
131
                )
132
        else:
133
            # from bzr.dev
134
            Pack.__init__(self,
135
                # Revisions: parents list, no text compression.
136
                index_builder_class(reference_lists=1),
137
                # Inventory: compressed, with graph for compatibility with other
138
                # existing bzrlib code.
139
                index_builder_class(reference_lists=1),
140
                # Texts: per file graph:
141
                index_builder_class(reference_lists=1, key_elements=2),
142
                # Signatures: Just blobs to store, no compression, no parents
143
                # listing.
144
                index_builder_class(reference_lists=0),
145
                )
0.17.22 by Robert Collins
really get gc working with 1.10
146
        self._pack_collection = pack_collection
147
        # When we make readonly indices, we need this.
148
        self.index_class = pack_collection._index_class
0.17.9 by Robert Collins
Initial stab at repository format support.
149
        # where should the new pack be opened
0.17.22 by Robert Collins
really get gc working with 1.10
150
        self.upload_transport = pack_collection._upload_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
151
        # where are indices written out to
0.17.22 by Robert Collins
really get gc working with 1.10
152
        self.index_transport = pack_collection._index_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
153
        # where is the pack renamed to when it is finished?
0.17.22 by Robert Collins
really get gc working with 1.10
154
        self.pack_transport = pack_collection._pack_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
155
        # What file mode to upload the pack and indices with.
156
        self._file_mode = file_mode
157
        # tracks the content written to the .pack file.
158
        self._hash = md5.new()
159
        # a four-tuple with the length in bytes of the indices, once the pack
160
        # is finalised. (rev, inv, text, sigs)
161
        self.index_sizes = None
162
        # How much data to cache when writing packs. Note that this is not
163
        # synchronised with reads, because it's not in the transport layer, so
164
        # is not safe unless the client knows it won't be reading from the pack
165
        # under creation.
166
        self._cache_limit = 0
167
        # the temporary pack file name.
168
        self.random_name = rand_chars(20) + upload_suffix
169
        # when was this pack started ?
170
        self.start_time = time.time()
171
        # open an output stream for the data added to the pack.
172
        self.write_stream = self.upload_transport.open_write_stream(
173
            self.random_name, mode=self._file_mode)
174
        if 'pack' in debug.debug_flags:
175
            mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
176
                time.ctime(), self.upload_transport.base, self.random_name,
177
                time.time() - self.start_time)
178
        # A list of byte sequences to be written to the new pack, and the 
179
        # aggregate size of them.  Stored as a list rather than separate 
180
        # variables so that the _write_data closure below can update them.
181
        self._buffer = [[], 0]
182
        # create a callable for adding data 
183
        #
184
        # robertc says- this is a closure rather than a method on the object
185
        # so that the variables are locals, and faster than accessing object
186
        # members.
187
        def _write_data(bytes, flush=False, _buffer=self._buffer,
188
            _write=self.write_stream.write, _update=self._hash.update):
189
            _buffer[0].append(bytes)
190
            _buffer[1] += len(bytes)
191
            # buffer cap
192
            if _buffer[1] > self._cache_limit or flush:
193
                bytes = ''.join(_buffer[0])
194
                _write(bytes)
195
                _update(bytes)
196
                _buffer[:] = [[], 0]
197
        # expose this on self, for the occasion when clients want to add data.
198
        self._write_data = _write_data
199
        # a pack writer object to serialise pack records.
200
        self._writer = pack.ContainerWriter(self._write_data)
201
        self._writer.begin()
202
        # what state is the pack in? (open, finished, aborted)
203
        self._state = 'open'
204
205
206
RepositoryPackCollection.pack_factory = NewPack
207
208
class GCRepositoryPackCollection(RepositoryPackCollection):
209
210
    pack_factory = GCPack
211
212
    def _make_index(self, name, suffix):
213
        """Overridden to use BTreeGraphIndex objects."""
214
        size_offset = self._suffix_offsets[suffix]
215
        index_name = name + suffix
216
        index_size = self._names[name][size_offset]
217
        return BTreeGraphIndex(
218
            self._index_transport, index_name, index_size)
219
220
    def _start_write_group(self):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
221
        # Overridden to add 'self.pack_factory()'
0.17.9 by Robert Collins
Initial stab at repository format support.
222
        # Do not permit preparation for writing if we're not in a 'write lock'.
223
        if not self.repo.is_write_locked():
224
            raise errors.NotWriteLocked(self)
0.17.22 by Robert Collins
really get gc working with 1.10
225
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
0.17.9 by Robert Collins
Initial stab at repository format support.
226
            file_mode=self.repo.bzrdir._get_file_mode())
227
        # allow writing: queue writes to a new index
228
        self.revision_index.add_writable_index(self._new_pack.revision_index,
229
            self._new_pack)
230
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
231
            self._new_pack)
232
        self.text_index.add_writable_index(self._new_pack.text_index,
233
            self._new_pack)
234
        self.signature_index.add_writable_index(self._new_pack.signature_index,
235
            self._new_pack)
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
236
        if chk_support and self.chk_index is not None:
237
            self.chk_index.add_writable_index(self._new_pack.chk_index,
238
                self._new_pack)
239
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
0.17.9 by Robert Collins
Initial stab at repository format support.
240
241
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
242
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
243
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
244
        self.repo.texts._index._add_callback = self.text_index.add_callback
245
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
246
    def _get_filtered_inv_stream(self, source_vf, keys):
247
        """Filter the texts of inventories, to find the chk pages."""
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
248
        id_roots = []
249
        p_id_roots = []
250
        id_roots_set = set()
251
        p_id_roots_set = set()
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
252
        def _filter_inv_stream(stream):
253
            for idx, record in enumerate(stream):
254
                ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
255
                bytes = record.get_bytes_as('fulltext')
256
                chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
257
                key = chk_inv.id_to_entry.key()
258
                if key not in id_roots_set:
259
                    id_roots.append(key)
260
                    id_roots_set.add(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
261
                p_id_map = chk_inv.parent_id_basename_to_file_id
262
                if p_id_map is not None:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
263
                    key = p_id_map.key()
264
                    if key not in p_id_roots_set:
265
                        p_id_roots_set.add(key)
266
                        p_id_roots.append(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
267
                yield record
268
        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
269
        return _filter_inv_stream(stream), id_roots, p_id_roots
270
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
271
    def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots, pb=None):
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
272
        # We want to stream the keys from 'id_roots', and things they
273
        # reference, and then stream things from p_id_roots and things they
274
        # reference, and then any remaining keys that we didn't get to.
275
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
276
        # We also group referenced texts together, so if one root references a
277
        # text with prefix 'a', and another root references a node with prefix
278
        # 'a', we want to yield those nodes before we yield the nodes for 'b'
279
        # This keeps 'similar' nodes together
280
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
281
        # Note: We probably actually want multiple streams here, to help the
282
        #       client understand that the different levels won't compress well
283
        #       against eachother
0.20.27 by John Arbash Meinel
Update a Note/Todo
284
        #       Test the difference between using one Group per level, and
285
        #       using 1 Group per prefix. (so '' (root) would get a group, then
286
        #       all the references to search-key 'a' would get a group, etc.)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
287
        remaining_keys = set(keys)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
288
        counter = [0]
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
289
        def _get_referenced_stream(root_keys):
290
            cur_keys = root_keys
291
            while cur_keys:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
292
                keys_by_search_prefix = {}
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
293
                remaining_keys.difference_update(cur_keys)
294
                next_keys = set()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
295
                stream = source_vf.get_record_stream(cur_keys, 'as-requested',
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
296
                                                     True)
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
297
                def next_stream():
298
                    for record in stream:
299
                        bytes = record.get_bytes_as('fulltext')
300
                        # We don't care about search_key_func for this code,
301
                        # because we only care about external references.
302
                        node = chk_map._deserialise(bytes, record.key,
303
                                                    search_key_func=None)
304
                        common_base = node._search_prefix
305
                        if isinstance(node, chk_map.InternalNode):
306
                            for prefix, value in node._items.iteritems():
307
                                assert isinstance(value, tuple)
308
                                if value not in next_keys:
309
                                    keys_by_search_prefix.setdefault(prefix,
310
                                        []).append(value)
311
                                    next_keys.add(value)
312
                        counter[0] += 1
313
                        if pb is not None:
314
                            pb.update('chk node', counter[0])
315
                        yield record
316
                yield next_stream()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
317
                # Double check that we won't be emitting any keys twice
318
                next_keys = next_keys.intersection(remaining_keys)
319
                cur_keys = []
320
                for prefix in sorted(keys_by_search_prefix):
321
                    cur_keys.extend(keys_by_search_prefix[prefix])
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
322
        for stream in _get_referenced_stream(id_roots):
323
            yield stream
324
        for stream in _get_referenced_stream(p_id_roots):
325
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
326
        if remaining_keys:
327
            trace.note('There were %d keys in the chk index, which'
328
                       ' were not referenced from inventories',
329
                       len(remaining_keys))
330
            stream = source_vf.get_record_stream(remaining_keys, 'unordered',
331
                                                 True)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
332
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
333
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
334
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
335
                                 reload_func=None):
336
        """Execute a series of pack operations.
337
338
        :param pack_operations: A list of [revision_count, packs_to_combine].
339
        :param _packer_class: The class of packer to use (default: Packer).
340
        :return: None.
341
        """
342
        for revision_count, packs in pack_operations:
343
            # we may have no-ops from the setup logic
344
            if len(packs) == 0:
345
                continue
346
            # Create a new temp VersionedFile instance based on these packs,
347
            # and then just fetch everything into the target
348
349
            # XXX: Find a way to 'set_optimize' on the newly created pack
350
            #      indexes
351
            #    def open_pack(self):
352
            #       """Open a pack for the pack we are creating."""
353
            #       new_pack = super(OptimisingPacker, self).open_pack()
354
            #       # Turn on the optimization flags for all the index builders.
355
            #       new_pack.revision_index.set_optimize(for_size=True)
356
            #       new_pack.inventory_index.set_optimize(for_size=True)
357
            #       new_pack.text_index.set_optimize(for_size=True)
358
            #       new_pack.signature_index.set_optimize(for_size=True)
359
            #       return new_pack
360
            to_copy = [('revision_index', 'revisions'),
361
                       ('inventory_index', 'inventories'),
362
                       ('text_index', 'texts'),
363
                       ('signature_index', 'signatures'),
364
                      ]
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
365
            # TODO: This is a very non-optimal ordering for chk_bytes. The
366
            #       issue is that pages that are similar are not transmitted
367
            #       together. Perhaps get_record_stream('gc-optimal') should be
368
            #       taught about how to group chk pages?
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
369
            has_chk = False
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
370
            if getattr(self, 'chk_index', None) is not None:
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
371
                has_chk = True
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
372
                to_copy.insert(2, ('chk_index', 'chk_bytes'))
373
374
            # Shouldn't we start_write_group around this?
375
            if self._new_pack is not None:
376
                raise errors.BzrError('call to %s.pack() while another pack is'
377
                                      ' being written.'
378
                                      % (self.__class__.__name__,))
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
379
            new_pack = self.pack_factory(self, 'autopack',
380
                                         self.repo.bzrdir._get_file_mode())
381
            new_pack.set_write_cache_size(1024*1024)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
382
            # TODO: A better alternative is to probably use Packer.open_pack(), and
383
            #       then create a GroupCompressVersionedFiles() around the
384
            #       target pack to insert into.
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
385
            pb = ui.ui_factory.nested_progress_bar()
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
386
            try:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
387
                for idx, (index_name, vf_name) in enumerate(to_copy):
388
                    pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
389
                    keys = set()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
390
                    new_index = getattr(new_pack, index_name)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
391
                    new_index.set_optimize(for_size=True)
392
                    for pack in packs:
393
                        source_index = getattr(pack, index_name)
394
                        keys.update(e[1] for e in source_index.iter_all_entries())
0.23.22 by John Arbash Meinel
Add a mutter() while repacking, so that we log progress as we go along.
395
                    trace.mutter('repacking %s with %d keys',
396
                                 vf_name, len(keys))
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
397
                    source_vf = getattr(self.repo, vf_name)
398
                    target_access = knit._DirectPackAccess({})
399
                    target_access.set_writer(new_pack._writer, new_index,
400
                                             new_pack.access_tuple())
401
                    target_vf = GroupCompressVersionedFiles(
402
                        _GCGraphIndex(new_index,
403
                                      add_callback=new_index.add_nodes,
404
                                      parents=source_vf._index._parents,
405
                                      is_locked=self.repo.is_locked),
406
                        access=target_access,
407
                        delta=source_vf._delta)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
408
                    stream = None
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
409
                    child_pb = ui.ui_factory.nested_progress_bar()
410
                    try:
411
                        if has_chk:
412
                            if vf_name == 'inventories':
413
                                stream, id_roots, p_id_roots = self._get_filtered_inv_stream(
414
                                    source_vf, keys)
415
                            elif vf_name == 'chk_bytes':
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
416
                                for stream in self._get_chk_stream(source_vf, keys,
417
                                                    id_roots, p_id_roots,
418
                                                    pb=child_pb):
419
                                    target_vf.insert_record_stream(stream)
420
                                # No more to copy
421
                                stream = []
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
422
                        if stream is None:
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
423
                            def pb_stream():
424
                                substream = source_vf.get_record_stream(keys, 'gc-optimal', True)
425
                                for idx, record in enumerate(substream):
426
                                    child_pb.update(vf_name, idx, len(keys))
427
                                    yield record
428
                            stream = pb_stream()
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
429
                        target_vf.insert_record_stream(stream)
430
                    finally:
431
                        child_pb.finished()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
432
                new_pack._check_references() # shouldn't be needed
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
433
            except:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
434
                pb.finished()
435
                new_pack.abort()
436
                raise
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
437
            else:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
438
                pb.finished()
439
                if not new_pack.data_inserted():
440
                    raise AssertionError('We copied from pack files,'
441
                                         ' but had no data copied')
442
                    # we need to abort somehow, because we don't want to remove
443
                    # the other packs
444
                new_pack.finish()
445
                self.allocate(new_pack)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
446
            for pack in packs:
447
                self._remove_pack_from_memory(pack)
448
        # record the newly available packs and stop advertising the old
449
        # packs
450
        self._save_pack_names(clear_obsolete_packs=True)
451
        # Move the old packs out of the way now they are no longer referenced.
452
        for revision_count, packs in pack_operations:
453
            self._obsolete_packs(packs)
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
454
0.17.9 by Robert Collins
Initial stab at repository format support.
455
456
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
457
class GCRPackRepository(KnitPackRepository):
0.17.9 by Robert Collins
Initial stab at repository format support.
458
    """GC customisation of KnitPackRepository."""
459
460
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
461
        _serializer):
462
        """Overridden to change pack collection class."""
463
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
464
            _commit_builder_class, _serializer)
465
        # and now replace everything it did :)
466
        index_transport = self._transport.clone('indices')
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
467
        if chk_support:
468
            self._pack_collection = GCRepositoryPackCollection(self,
469
                self._transport, index_transport,
470
                self._transport.clone('upload'),
471
                self._transport.clone('packs'),
472
                _format.index_builder_class,
473
                _format.index_class,
474
                use_chk_index=self._format.supports_chks,
475
                )
476
        else:
477
            self._pack_collection = GCRepositoryPackCollection(self,
478
                self._transport, index_transport,
479
                self._transport.clone('upload'),
480
                self._transport.clone('packs'),
481
                _format.index_builder_class,
482
                _format.index_class)
0.17.9 by Robert Collins
Initial stab at repository format support.
483
        self.inventories = GroupCompressVersionedFiles(
484
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
485
                add_callback=self._pack_collection.inventory_index.add_callback,
486
                parents=True, is_locked=self.is_locked),
487
            access=self._pack_collection.inventory_index.data_access)
488
        self.revisions = GroupCompressVersionedFiles(
489
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
490
                add_callback=self._pack_collection.revision_index.add_callback,
491
                parents=True, is_locked=self.is_locked),
492
            access=self._pack_collection.revision_index.data_access,
493
            delta=False)
494
        self.signatures = GroupCompressVersionedFiles(
495
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
496
                add_callback=self._pack_collection.signature_index.add_callback,
497
                parents=False, is_locked=self.is_locked),
498
            access=self._pack_collection.signature_index.data_access,
499
            delta=False)
500
        self.texts = GroupCompressVersionedFiles(
501
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
502
                add_callback=self._pack_collection.text_index.add_callback,
503
                parents=True, is_locked=self.is_locked),
504
            access=self._pack_collection.text_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
505
        if chk_support and _format.supports_chks:
506
            # No graph, no compression:- references from chks are between
507
            # different objects not temporal versions of the same; and without
508
            # some sort of temporal structure knit compression will just fail.
509
            self.chk_bytes = GroupCompressVersionedFiles(
510
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
511
                    add_callback=self._pack_collection.chk_index.add_callback,
512
                    parents=False, is_locked=self.is_locked),
513
                access=self._pack_collection.chk_index.data_access)
514
        else:
515
            self.chk_bytes = None
0.17.9 by Robert Collins
Initial stab at repository format support.
516
        # True when the repository object is 'write locked' (as opposed to the
517
        # physical lock only taken out around changes to the pack-names list.) 
518
        # Another way to represent this would be a decorator around the control
519
        # files object that presents logical locks as physical ones - if this
520
        # gets ugly consider that alternative design. RBC 20071011
521
        self._write_lock_count = 0
522
        self._transaction = None
523
        # for tests
524
        self._reconcile_does_inventory_gc = True
525
        self._reconcile_fixes_text_parents = True
526
        self._reconcile_backsup_inventory = False
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
527
        # Note: We cannot unpack a delta that references a text we haven't seen yet.
528
        #       there are 2 options, work in fulltexts, or require topological
529
        #       sorting. Using fulltexts is more optimal for local operations,
530
        #       because the source can be smart about extracting multiple
531
        #       in-a-row (and sharing strings). Topological is better for
532
        #       remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
533
        self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
534
        self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
535
        self._fetch_uses_deltas = False
0.17.9 by Robert Collins
Initial stab at repository format support.
536
537
0.17.26 by Robert Collins
Working better --gc-plain-chk.
538
if chk_support:
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
539
    class GCRCHKPackRepository(CHKInventoryRepository):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
540
        """GC customisation of CHKInventoryRepository."""
541
542
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
543
            _serializer):
544
            """Overridden to change pack collection class."""
545
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
546
                _commit_builder_class, _serializer)
547
            # and now replace everything it did :)
548
            index_transport = self._transport.clone('indices')
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
549
            self._pack_collection = GCRepositoryPackCollection(self,
550
                self._transport, index_transport,
551
                self._transport.clone('upload'),
552
                self._transport.clone('packs'),
553
                _format.index_builder_class,
554
                _format.index_class,
555
                use_chk_index=self._format.supports_chks,
556
                )
0.17.26 by Robert Collins
Working better --gc-plain-chk.
557
            self.inventories = GroupCompressVersionedFiles(
558
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
559
                    add_callback=self._pack_collection.inventory_index.add_callback,
560
                    parents=True, is_locked=self.is_locked),
561
                access=self._pack_collection.inventory_index.data_access)
562
            self.revisions = GroupCompressVersionedFiles(
563
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
564
                    add_callback=self._pack_collection.revision_index.add_callback,
565
                    parents=True, is_locked=self.is_locked),
566
                access=self._pack_collection.revision_index.data_access,
567
                delta=False)
568
            self.signatures = GroupCompressVersionedFiles(
569
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
570
                    add_callback=self._pack_collection.signature_index.add_callback,
571
                    parents=False, is_locked=self.is_locked),
572
                access=self._pack_collection.signature_index.data_access,
573
                delta=False)
574
            self.texts = GroupCompressVersionedFiles(
575
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
576
                    add_callback=self._pack_collection.text_index.add_callback,
577
                    parents=True, is_locked=self.is_locked),
578
                access=self._pack_collection.text_index.data_access)
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
579
            assert _format.supports_chks
580
            # No parents, individual CHK pages don't have specific ancestry
581
            self.chk_bytes = GroupCompressVersionedFiles(
582
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
583
                    add_callback=self._pack_collection.chk_index.add_callback,
584
                    parents=False, is_locked=self.is_locked),
585
                access=self._pack_collection.chk_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
586
            # True when the repository object is 'write locked' (as opposed to the
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
587
            # physical lock only taken out around changes to the pack-names list.)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
588
            # Another way to represent this would be a decorator around the control
589
            # files object that presents logical locks as physical ones - if this
590
            # gets ugly consider that alternative design. RBC 20071011
591
            self._write_lock_count = 0
592
            self._transaction = None
593
            # for tests
594
            self._reconcile_does_inventory_gc = True
595
            self._reconcile_fixes_text_parents = True
596
            self._reconcile_backsup_inventory = False
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
597
            self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
598
            self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
599
            self._fetch_uses_deltas = False
0.17.26 by Robert Collins
Working better --gc-plain-chk.
600
601
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
602
class RepositoryFormatPackGCRabin(RepositoryFormatPackDevelopment2):
0.17.9 by Robert Collins
Initial stab at repository format support.
603
    """A B+Tree index using pack repository."""
604
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
605
    repository_class = GCRPackRepository
0.17.9 by Robert Collins
Initial stab at repository format support.
606
607
    def get_format_string(self):
608
        """See RepositoryFormat.get_format_string()."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
609
        return ("Bazaar development format - btree+gcr "
610
            "(needs bzr.dev from 1.13)\n")
0.17.9 by Robert Collins
Initial stab at repository format support.
611
612
    def get_format_description(self):
613
        """See RepositoryFormat.get_format_description()."""
614
        return ("Development repository format - btree+groupcompress "
615
            ", interoperates with pack-0.92\n")
616
617
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
618
if chk_support:
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
619
    class RepositoryFormatPackGCRabinCHK16(RepositoryFormatPackDevelopment5Hash16):
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
620
        """A hashed CHK+group compress pack repository."""
621
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
622
        repository_class = GCRCHKPackRepository
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
623
624
        def get_format_string(self):
625
            """See RepositoryFormat.get_format_string()."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
626
            return ('Bazaar development format - hash16chk+gcr'
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
627
                    ' (needs bzr.dev from 1.13)\n')
628
629
        def get_format_description(self):
630
            """See RepositoryFormat.get_format_description()."""
631
            return ("Development repository format - hash16chk+groupcompress")
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
632
633
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
634
    class RepositoryFormatPackGCRabinCHK255(RepositoryFormatPackDevelopment5Hash255):
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
635
        """A hashed CHK+group compress pack repository."""
636
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
637
        repository_class = GCRCHKPackRepository
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
638
639
        def get_format_string(self):
640
            """See RepositoryFormat.get_format_string()."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
641
            return ('Bazaar development format - hash255chk+gcr'
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
642
                    ' (needs bzr.dev from 1.13)\n')
643
644
        def get_format_description(self):
645
            """See RepositoryFormat.get_format_description()."""
646
            return ("Development repository format - hash255chk+groupcompress")
647
648
0.17.9 by Robert Collins
Initial stab at repository format support.
649
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
650
    """Be incompatible with the regular fetch code."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
651
    formats = (RepositoryFormatPackGCRabin,)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
652
    if chk_support:
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
653
        formats = formats + (RepositoryFormatPackGCRabinCHK16,
654
                             RepositoryFormatPackGCRabinCHK255)
0.17.10 by Robert Collins
Correct optimiser disabling.
655
    if isinstance(source._format, formats) or isinstance(target._format, formats):
0.17.9 by Robert Collins
Initial stab at repository format support.
656
        return False
657
    else:
658
        return orig_method(source, target)
659
660
661
InterPackRepo.is_compatible = staticmethod(pack_incompatible)