/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.9 by Robert Collins
Initial stab at repository format support.
1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
19
20
import md5
21
import time
22
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
23
from bzrlib import (
24
    debug,
25
    errors,
26
    knit,
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
27
    inventory,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
28
    pack,
29
    repository,
30
    ui,
31
    )
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
32
from bzrlib.btree_index import (
33
    BTreeBuilder,
34
    BTreeGraphIndex,
35
    )
0.17.9 by Robert Collins
Initial stab at repository format support.
36
from bzrlib.index import GraphIndex, GraphIndexBuilder
37
from bzrlib.repository import InterPackRepo
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
38
from bzrlib.plugins.groupcompress_rabin.groupcompress import (
0.17.9 by Robert Collins
Initial stab at repository format support.
39
    _GCGraphIndex,
40
    GroupCompressVersionedFiles,
41
    )
42
from bzrlib.osutils import rand_chars
43
from bzrlib.repofmt.pack_repo import (
44
    Pack,
45
    NewPack,
46
    KnitPackRepository,
47
    RepositoryPackCollection,
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
48
    RepositoryFormatPackDevelopment2,
49
    RepositoryFormatPackDevelopment2Subtree,
0.17.9 by Robert Collins
Initial stab at repository format support.
50
    RepositoryFormatKnitPack1,
51
    RepositoryFormatKnitPack3,
52
    RepositoryFormatKnitPack4,
53
    Packer,
54
    ReconcilePacker,
55
    OptimisingPacker,
56
    )
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
57
try:
58
    from bzrlib.repofmt.pack_repo import (
0.17.26 by Robert Collins
Working better --gc-plain-chk.
59
    CHKInventoryRepository,
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
60
    RepositoryFormatPackDevelopment5,
61
    RepositoryFormatPackDevelopment5Hash16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
62
##    RepositoryFormatPackDevelopment5Hash16b,
63
##    RepositoryFormatPackDevelopment5Hash63,
64
##    RepositoryFormatPackDevelopment5Hash127a,
65
##    RepositoryFormatPackDevelopment5Hash127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
66
    RepositoryFormatPackDevelopment5Hash255,
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
67
    )
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
68
    from bzrlib import chk_map
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
69
    chk_support = True
70
except ImportError:
71
    chk_support = False
0.17.9 by Robert Collins
Initial stab at repository format support.
72
from bzrlib import ui
73
74
75
def open_pack(self):
0.17.22 by Robert Collins
really get gc working with 1.10
76
    return self._pack_collection.pack_factory(self._pack_collection,
77
        upload_suffix=self.suffix,
0.17.9 by Robert Collins
Initial stab at repository format support.
78
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
79
80
81
Packer.open_pack = open_pack
82
83
84
class GCPack(NewPack):
85
0.17.22 by Robert Collins
really get gc working with 1.10
86
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
0.17.9 by Robert Collins
Initial stab at repository format support.
87
        """Create a NewPack instance.
88
89
        :param upload_transport: A writable transport for the pack to be
90
            incrementally uploaded to.
91
        :param index_transport: A writable transport for the pack's indices to
92
            be written to when the pack is finished.
93
        :param pack_transport: A writable transport for the pack to be renamed
94
            to when the upload is complete. This *must* be the same as
95
            upload_transport.clone('../packs').
96
        :param upload_suffix: An optional suffix to be given to any temporary
97
            files created during the pack creation. e.g '.autopack'
98
        :param file_mode: An optional file mode to create the new files with.
99
        """
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
100
        # replaced from bzr.dev to:
101
        # - change inventory reference list length to 1
102
        # - change texts reference lists to 1
103
        # TODO: patch this to be parameterised upstream
104
        
0.17.9 by Robert Collins
Initial stab at repository format support.
105
        # The relative locations of the packs are constrained, but all are
106
        # passed in because the caller has them, so as to avoid object churn.
0.17.22 by Robert Collins
really get gc working with 1.10
107
        index_builder_class = pack_collection._index_builder_class
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
108
        if chk_support:
109
            # from brisbane-core
110
            if pack_collection.chk_index is not None:
111
                chk_index = index_builder_class(reference_lists=0)
112
            else:
113
                chk_index = None
114
            Pack.__init__(self,
115
                # Revisions: parents list, no text compression.
116
                index_builder_class(reference_lists=1),
117
                # Inventory: We want to map compression only, but currently the
118
                # knit code hasn't been updated enough to understand that, so we
119
                # have a regular 2-list index giving parents and compression
120
                # source.
121
                index_builder_class(reference_lists=1),
122
                # Texts: compression and per file graph, for all fileids - so two
123
                # reference lists and two elements in the key tuple.
124
                index_builder_class(reference_lists=1, key_elements=2),
125
                # Signatures: Just blobs to store, no compression, no parents
126
                # listing.
127
                index_builder_class(reference_lists=0),
128
                # CHK based storage - just blobs, no compression or parents.
129
                chk_index=chk_index
130
                )
131
        else:
132
            # from bzr.dev
133
            Pack.__init__(self,
134
                # Revisions: parents list, no text compression.
135
                index_builder_class(reference_lists=1),
136
                # Inventory: compressed, with graph for compatibility with other
137
                # existing bzrlib code.
138
                index_builder_class(reference_lists=1),
139
                # Texts: per file graph:
140
                index_builder_class(reference_lists=1, key_elements=2),
141
                # Signatures: Just blobs to store, no compression, no parents
142
                # listing.
143
                index_builder_class(reference_lists=0),
144
                )
0.17.22 by Robert Collins
really get gc working with 1.10
145
        self._pack_collection = pack_collection
146
        # When we make readonly indices, we need this.
147
        self.index_class = pack_collection._index_class
0.17.9 by Robert Collins
Initial stab at repository format support.
148
        # where should the new pack be opened
0.17.22 by Robert Collins
really get gc working with 1.10
149
        self.upload_transport = pack_collection._upload_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
150
        # where are indices written out to
0.17.22 by Robert Collins
really get gc working with 1.10
151
        self.index_transport = pack_collection._index_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
152
        # where is the pack renamed to when it is finished?
0.17.22 by Robert Collins
really get gc working with 1.10
153
        self.pack_transport = pack_collection._pack_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
154
        # What file mode to upload the pack and indices with.
155
        self._file_mode = file_mode
156
        # tracks the content written to the .pack file.
157
        self._hash = md5.new()
158
        # a four-tuple with the length in bytes of the indices, once the pack
159
        # is finalised. (rev, inv, text, sigs)
160
        self.index_sizes = None
161
        # How much data to cache when writing packs. Note that this is not
162
        # synchronised with reads, because it's not in the transport layer, so
163
        # is not safe unless the client knows it won't be reading from the pack
164
        # under creation.
165
        self._cache_limit = 0
166
        # the temporary pack file name.
167
        self.random_name = rand_chars(20) + upload_suffix
168
        # when was this pack started ?
169
        self.start_time = time.time()
170
        # open an output stream for the data added to the pack.
171
        self.write_stream = self.upload_transport.open_write_stream(
172
            self.random_name, mode=self._file_mode)
173
        if 'pack' in debug.debug_flags:
174
            mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
175
                time.ctime(), self.upload_transport.base, self.random_name,
176
                time.time() - self.start_time)
177
        # A list of byte sequences to be written to the new pack, and the 
178
        # aggregate size of them.  Stored as a list rather than separate 
179
        # variables so that the _write_data closure below can update them.
180
        self._buffer = [[], 0]
181
        # create a callable for adding data 
182
        #
183
        # robertc says- this is a closure rather than a method on the object
184
        # so that the variables are locals, and faster than accessing object
185
        # members.
186
        def _write_data(bytes, flush=False, _buffer=self._buffer,
187
            _write=self.write_stream.write, _update=self._hash.update):
188
            _buffer[0].append(bytes)
189
            _buffer[1] += len(bytes)
190
            # buffer cap
191
            if _buffer[1] > self._cache_limit or flush:
192
                bytes = ''.join(_buffer[0])
193
                _write(bytes)
194
                _update(bytes)
195
                _buffer[:] = [[], 0]
196
        # expose this on self, for the occasion when clients want to add data.
197
        self._write_data = _write_data
198
        # a pack writer object to serialise pack records.
199
        self._writer = pack.ContainerWriter(self._write_data)
200
        self._writer.begin()
201
        # what state is the pack in? (open, finished, aborted)
202
        self._state = 'open'
203
204
205
RepositoryPackCollection.pack_factory = NewPack
206
207
class GCRepositoryPackCollection(RepositoryPackCollection):
208
209
    pack_factory = GCPack
210
211
    def _make_index(self, name, suffix):
212
        """Overridden to use BTreeGraphIndex objects."""
213
        size_offset = self._suffix_offsets[suffix]
214
        index_name = name + suffix
215
        index_size = self._names[name][size_offset]
216
        return BTreeGraphIndex(
217
            self._index_transport, index_name, index_size)
218
219
    def _start_write_group(self):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
220
        # Overridden to add 'self.pack_factory()'
0.17.9 by Robert Collins
Initial stab at repository format support.
221
        # Do not permit preparation for writing if we're not in a 'write lock'.
222
        if not self.repo.is_write_locked():
223
            raise errors.NotWriteLocked(self)
0.17.22 by Robert Collins
really get gc working with 1.10
224
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
0.17.9 by Robert Collins
Initial stab at repository format support.
225
            file_mode=self.repo.bzrdir._get_file_mode())
226
        # allow writing: queue writes to a new index
227
        self.revision_index.add_writable_index(self._new_pack.revision_index,
228
            self._new_pack)
229
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
230
            self._new_pack)
231
        self.text_index.add_writable_index(self._new_pack.text_index,
232
            self._new_pack)
233
        self.signature_index.add_writable_index(self._new_pack.signature_index,
234
            self._new_pack)
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
235
        if chk_support and self.chk_index is not None:
236
            self.chk_index.add_writable_index(self._new_pack.chk_index,
237
                self._new_pack)
238
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
0.17.9 by Robert Collins
Initial stab at repository format support.
239
240
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
241
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
242
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
243
        self.repo.texts._index._add_callback = self.text_index.add_callback
244
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
245
    def _get_filtered_inv_stream(self, source_vf, keys):
246
        """Filter the texts of inventories, to find the chk pages."""
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
247
        id_roots = []
248
        p_id_roots = []
249
        id_roots_set = set()
250
        p_id_roots_set = set()
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
251
        def _filter_inv_stream(stream):
252
            for idx, record in enumerate(stream):
253
                ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
254
                bytes = record.get_bytes_as('fulltext')
255
                chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
256
                key = chk_inv.id_to_entry.key()
257
                if key not in id_roots_set:
258
                    id_roots.append(key)
259
                    id_roots_set.add(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
260
                p_id_map = chk_inv.parent_id_basename_to_file_id
261
                if p_id_map is not None:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
262
                    key = p_id_map.key()
263
                    if key not in p_id_roots_set:
264
                        p_id_roots_set.add(key)
265
                        p_id_roots.append(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
266
                yield record
267
        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
268
        return _filter_inv_stream(stream), id_roots, p_id_roots
269
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
270
    def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots, pb=None):
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
271
        # We want to stream the keys from 'id_roots', and things they
272
        # reference, and then stream things from p_id_roots and things they
273
        # reference, and then any remaining keys that we didn't get to.
274
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
275
        # We also group referenced texts together, so if one root references a
276
        # text with prefix 'a', and another root references a node with prefix
277
        # 'a', we want to yield those nodes before we yield the nodes for 'b'
278
        # This keeps 'similar' nodes together
279
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
280
        # Note: We probably actually want multiple streams here, to help the
281
        #       client understand that the different levels won't compress well
282
        #       against eachother
0.20.27 by John Arbash Meinel
Update a Note/Todo
283
        #       Test the difference between using one Group per level, and
284
        #       using 1 Group per prefix. (so '' (root) would get a group, then
285
        #       all the references to search-key 'a' would get a group, etc.)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
286
        remaining_keys = set(keys)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
287
        counter = [0]
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
288
        def _get_referenced_stream(root_keys):
289
            cur_keys = root_keys
290
            while cur_keys:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
291
                keys_by_search_prefix = {}
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
292
                remaining_keys.difference_update(cur_keys)
293
                next_keys = set()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
294
                stream = source_vf.get_record_stream(cur_keys, 'as-requested',
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
295
                                                     True)
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
296
                def next_stream():
297
                    for record in stream:
298
                        bytes = record.get_bytes_as('fulltext')
299
                        # We don't care about search_key_func for this code,
300
                        # because we only care about external references.
301
                        node = chk_map._deserialise(bytes, record.key,
302
                                                    search_key_func=None)
303
                        common_base = node._search_prefix
304
                        if isinstance(node, chk_map.InternalNode):
305
                            for prefix, value in node._items.iteritems():
306
                                assert isinstance(value, tuple)
307
                                if value not in next_keys:
308
                                    keys_by_search_prefix.setdefault(prefix,
309
                                        []).append(value)
310
                                    next_keys.add(value)
311
                        counter[0] += 1
312
                        if pb is not None:
313
                            pb.update('chk node', counter[0])
314
                        yield record
315
                yield next_stream()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
316
                # Double check that we won't be emitting any keys twice
317
                next_keys = next_keys.intersection(remaining_keys)
318
                cur_keys = []
319
                for prefix in sorted(keys_by_search_prefix):
320
                    cur_keys.extend(keys_by_search_prefix[prefix])
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
321
        for stream in _get_referenced_stream(id_roots):
322
            yield stream
323
        for stream in _get_referenced_stream(p_id_roots):
324
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
325
        if remaining_keys:
326
            trace.note('There were %d keys in the chk index, which'
327
                       ' were not referenced from inventories',
328
                       len(remaining_keys))
329
            stream = source_vf.get_record_stream(remaining_keys, 'unordered',
330
                                                 True)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
331
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
332
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
333
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
334
                                 reload_func=None):
335
        """Execute a series of pack operations.
336
337
        :param pack_operations: A list of [revision_count, packs_to_combine].
338
        :param _packer_class: The class of packer to use (default: Packer).
339
        :return: None.
340
        """
341
        for revision_count, packs in pack_operations:
342
            # we may have no-ops from the setup logic
343
            if len(packs) == 0:
344
                continue
345
            # Create a new temp VersionedFile instance based on these packs,
346
            # and then just fetch everything into the target
347
348
            # XXX: Find a way to 'set_optimize' on the newly created pack
349
            #      indexes
350
            #    def open_pack(self):
351
            #       """Open a pack for the pack we are creating."""
352
            #       new_pack = super(OptimisingPacker, self).open_pack()
353
            #       # Turn on the optimization flags for all the index builders.
354
            #       new_pack.revision_index.set_optimize(for_size=True)
355
            #       new_pack.inventory_index.set_optimize(for_size=True)
356
            #       new_pack.text_index.set_optimize(for_size=True)
357
            #       new_pack.signature_index.set_optimize(for_size=True)
358
            #       return new_pack
359
            to_copy = [('revision_index', 'revisions'),
360
                       ('inventory_index', 'inventories'),
361
                       ('text_index', 'texts'),
362
                       ('signature_index', 'signatures'),
363
                      ]
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
364
            # TODO: This is a very non-optimal ordering for chk_bytes. The
365
            #       issue is that pages that are similar are not transmitted
366
            #       together. Perhaps get_record_stream('gc-optimal') should be
367
            #       taught about how to group chk pages?
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
368
            has_chk = False
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
369
            if getattr(self, 'chk_index', None) is not None:
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
370
                has_chk = True
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
371
                to_copy.insert(2, ('chk_index', 'chk_bytes'))
372
373
            # Shouldn't we start_write_group around this?
374
            if self._new_pack is not None:
375
                raise errors.BzrError('call to %s.pack() while another pack is'
376
                                      ' being written.'
377
                                      % (self.__class__.__name__,))
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
378
            new_pack = self.pack_factory(self, 'autopack',
379
                                         self.repo.bzrdir._get_file_mode())
380
            new_pack.set_write_cache_size(1024*1024)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
381
            # TODO: A better alternative is to probably use Packer.open_pack(), and
382
            #       then create a GroupCompressVersionedFiles() around the
383
            #       target pack to insert into.
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
384
            pb = ui.ui_factory.nested_progress_bar()
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
385
            try:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
386
                for idx, (index_name, vf_name) in enumerate(to_copy):
387
                    pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
388
                    keys = set()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
389
                    new_index = getattr(new_pack, index_name)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
390
                    new_index.set_optimize(for_size=True)
391
                    for pack in packs:
392
                        source_index = getattr(pack, index_name)
393
                        keys.update(e[1] for e in source_index.iter_all_entries())
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
394
                    source_vf = getattr(self.repo, vf_name)
395
                    target_access = knit._DirectPackAccess({})
396
                    target_access.set_writer(new_pack._writer, new_index,
397
                                             new_pack.access_tuple())
398
                    target_vf = GroupCompressVersionedFiles(
399
                        _GCGraphIndex(new_index,
400
                                      add_callback=new_index.add_nodes,
401
                                      parents=source_vf._index._parents,
402
                                      is_locked=self.repo.is_locked),
403
                        access=target_access,
404
                        delta=source_vf._delta)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
405
                    stream = None
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
406
                    child_pb = ui.ui_factory.nested_progress_bar()
407
                    try:
408
                        if has_chk:
409
                            if vf_name == 'inventories':
410
                                stream, id_roots, p_id_roots = self._get_filtered_inv_stream(
411
                                    source_vf, keys)
412
                            elif vf_name == 'chk_bytes':
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
413
                                for stream in self._get_chk_stream(source_vf, keys,
414
                                                    id_roots, p_id_roots,
415
                                                    pb=child_pb):
416
                                    target_vf.insert_record_stream(stream)
417
                                # No more to copy
418
                                stream = []
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
419
                        if stream is None:
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
420
                            def pb_stream():
421
                                substream = source_vf.get_record_stream(keys, 'gc-optimal', True)
422
                                for idx, record in enumerate(substream):
423
                                    child_pb.update(vf_name, idx, len(keys))
424
                                    yield record
425
                            stream = pb_stream()
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
426
                        target_vf.insert_record_stream(stream)
427
                    finally:
428
                        child_pb.finished()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
429
                new_pack._check_references() # shouldn't be needed
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
430
            except:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
431
                pb.finished()
432
                new_pack.abort()
433
                raise
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
434
            else:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
435
                pb.finished()
436
                if not new_pack.data_inserted():
437
                    raise AssertionError('We copied from pack files,'
438
                                         ' but had no data copied')
439
                    # we need to abort somehow, because we don't want to remove
440
                    # the other packs
441
                new_pack.finish()
442
                self.allocate(new_pack)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
443
            for pack in packs:
444
                self._remove_pack_from_memory(pack)
445
        # record the newly available packs and stop advertising the old
446
        # packs
447
        self._save_pack_names(clear_obsolete_packs=True)
448
        # Move the old packs out of the way now they are no longer referenced.
449
        for revision_count, packs in pack_operations:
450
            self._obsolete_packs(packs)
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
451
0.17.9 by Robert Collins
Initial stab at repository format support.
452
453
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
454
class GCRPackRepository(KnitPackRepository):
0.17.9 by Robert Collins
Initial stab at repository format support.
455
    """GC customisation of KnitPackRepository."""
456
457
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
458
        _serializer):
459
        """Overridden to change pack collection class."""
460
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
461
            _commit_builder_class, _serializer)
462
        # and now replace everything it did :)
463
        index_transport = self._transport.clone('indices')
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
464
        if chk_support:
465
            self._pack_collection = GCRepositoryPackCollection(self,
466
                self._transport, index_transport,
467
                self._transport.clone('upload'),
468
                self._transport.clone('packs'),
469
                _format.index_builder_class,
470
                _format.index_class,
471
                use_chk_index=self._format.supports_chks,
472
                )
473
        else:
474
            self._pack_collection = GCRepositoryPackCollection(self,
475
                self._transport, index_transport,
476
                self._transport.clone('upload'),
477
                self._transport.clone('packs'),
478
                _format.index_builder_class,
479
                _format.index_class)
0.17.9 by Robert Collins
Initial stab at repository format support.
480
        self.inventories = GroupCompressVersionedFiles(
481
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
482
                add_callback=self._pack_collection.inventory_index.add_callback,
483
                parents=True, is_locked=self.is_locked),
484
            access=self._pack_collection.inventory_index.data_access)
485
        self.revisions = GroupCompressVersionedFiles(
486
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
487
                add_callback=self._pack_collection.revision_index.add_callback,
488
                parents=True, is_locked=self.is_locked),
489
            access=self._pack_collection.revision_index.data_access,
490
            delta=False)
491
        self.signatures = GroupCompressVersionedFiles(
492
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
493
                add_callback=self._pack_collection.signature_index.add_callback,
494
                parents=False, is_locked=self.is_locked),
495
            access=self._pack_collection.signature_index.data_access,
496
            delta=False)
497
        self.texts = GroupCompressVersionedFiles(
498
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
499
                add_callback=self._pack_collection.text_index.add_callback,
500
                parents=True, is_locked=self.is_locked),
501
            access=self._pack_collection.text_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
502
        if chk_support and _format.supports_chks:
503
            # No graph, no compression:- references from chks are between
504
            # different objects not temporal versions of the same; and without
505
            # some sort of temporal structure knit compression will just fail.
506
            self.chk_bytes = GroupCompressVersionedFiles(
507
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
508
                    add_callback=self._pack_collection.chk_index.add_callback,
509
                    parents=False, is_locked=self.is_locked),
510
                access=self._pack_collection.chk_index.data_access)
511
        else:
512
            self.chk_bytes = None
0.17.9 by Robert Collins
Initial stab at repository format support.
513
        # True when the repository object is 'write locked' (as opposed to the
514
        # physical lock only taken out around changes to the pack-names list.) 
515
        # Another way to represent this would be a decorator around the control
516
        # files object that presents logical locks as physical ones - if this
517
        # gets ugly consider that alternative design. RBC 20071011
518
        self._write_lock_count = 0
519
        self._transaction = None
520
        # for tests
521
        self._reconcile_does_inventory_gc = True
522
        self._reconcile_fixes_text_parents = True
523
        self._reconcile_backsup_inventory = False
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
524
        # Note: We cannot unpack a delta that references a text we haven't seen yet.
525
        #       there are 2 options, work in fulltexts, or require topological
526
        #       sorting. Using fulltexts is more optimal for local operations,
527
        #       because the source can be smart about extracting multiple
528
        #       in-a-row (and sharing strings). Topological is better for
529
        #       remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
530
        self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
531
        self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
532
        self._fetch_uses_deltas = False
0.17.9 by Robert Collins
Initial stab at repository format support.
533
534
0.17.26 by Robert Collins
Working better --gc-plain-chk.
535
if chk_support:
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
536
    class GCRCHKPackRepository(CHKInventoryRepository):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
537
        """GC customisation of CHKInventoryRepository."""
538
539
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
540
            _serializer):
541
            """Overridden to change pack collection class."""
542
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
543
                _commit_builder_class, _serializer)
544
            # and now replace everything it did :)
545
            index_transport = self._transport.clone('indices')
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
546
            self._pack_collection = GCRepositoryPackCollection(self,
547
                self._transport, index_transport,
548
                self._transport.clone('upload'),
549
                self._transport.clone('packs'),
550
                _format.index_builder_class,
551
                _format.index_class,
552
                use_chk_index=self._format.supports_chks,
553
                )
0.17.26 by Robert Collins
Working better --gc-plain-chk.
554
            self.inventories = GroupCompressVersionedFiles(
555
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
556
                    add_callback=self._pack_collection.inventory_index.add_callback,
557
                    parents=True, is_locked=self.is_locked),
558
                access=self._pack_collection.inventory_index.data_access)
559
            self.revisions = GroupCompressVersionedFiles(
560
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
561
                    add_callback=self._pack_collection.revision_index.add_callback,
562
                    parents=True, is_locked=self.is_locked),
563
                access=self._pack_collection.revision_index.data_access,
564
                delta=False)
565
            self.signatures = GroupCompressVersionedFiles(
566
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
567
                    add_callback=self._pack_collection.signature_index.add_callback,
568
                    parents=False, is_locked=self.is_locked),
569
                access=self._pack_collection.signature_index.data_access,
570
                delta=False)
571
            self.texts = GroupCompressVersionedFiles(
572
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
573
                    add_callback=self._pack_collection.text_index.add_callback,
574
                    parents=True, is_locked=self.is_locked),
575
                access=self._pack_collection.text_index.data_access)
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
576
            assert _format.supports_chks
577
            # No parents, individual CHK pages don't have specific ancestry
578
            self.chk_bytes = GroupCompressVersionedFiles(
579
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
580
                    add_callback=self._pack_collection.chk_index.add_callback,
581
                    parents=False, is_locked=self.is_locked),
582
                access=self._pack_collection.chk_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
583
            # True when the repository object is 'write locked' (as opposed to the
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
584
            # physical lock only taken out around changes to the pack-names list.)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
585
            # Another way to represent this would be a decorator around the control
586
            # files object that presents logical locks as physical ones - if this
587
            # gets ugly consider that alternative design. RBC 20071011
588
            self._write_lock_count = 0
589
            self._transaction = None
590
            # for tests
591
            self._reconcile_does_inventory_gc = True
592
            self._reconcile_fixes_text_parents = True
593
            self._reconcile_backsup_inventory = False
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
594
            self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
595
            self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
596
            self._fetch_uses_deltas = False
0.17.26 by Robert Collins
Working better --gc-plain-chk.
597
598
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
599
class RepositoryFormatPackGCRabin(RepositoryFormatPackDevelopment2):
0.17.9 by Robert Collins
Initial stab at repository format support.
600
    """A B+Tree index using pack repository."""
601
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
602
    repository_class = GCRPackRepository
0.17.9 by Robert Collins
Initial stab at repository format support.
603
604
    def get_format_string(self):
605
        """See RepositoryFormat.get_format_string()."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
606
        return ("Bazaar development format - btree+gcr "
607
            "(needs bzr.dev from 1.13)\n")
0.17.9 by Robert Collins
Initial stab at repository format support.
608
609
    def get_format_description(self):
610
        """See RepositoryFormat.get_format_description()."""
611
        return ("Development repository format - btree+groupcompress "
612
            ", interoperates with pack-0.92\n")
613
614
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
615
if chk_support:
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
616
    class RepositoryFormatPackGCRabinCHK16(RepositoryFormatPackDevelopment5Hash16):
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
617
        """A hashed CHK+group compress pack repository."""
618
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
619
        repository_class = GCRCHKPackRepository
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
620
621
        def get_format_string(self):
622
            """See RepositoryFormat.get_format_string()."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
623
            return ('Bazaar development format - hash16chk+gcr'
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
624
                    ' (needs bzr.dev from 1.13)\n')
625
626
        def get_format_description(self):
627
            """See RepositoryFormat.get_format_description()."""
628
            return ("Development repository format - hash16chk+groupcompress")
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
629
630
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
631
    class RepositoryFormatPackGCRabinCHK255(RepositoryFormatPackDevelopment5Hash255):
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
632
        """A hashed CHK+group compress pack repository."""
633
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
634
        repository_class = GCRCHKPackRepository
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
635
636
        def get_format_string(self):
637
            """See RepositoryFormat.get_format_string()."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
638
            return ('Bazaar development format - hash255chk+gcr'
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
639
                    ' (needs bzr.dev from 1.13)\n')
640
641
        def get_format_description(self):
642
            """See RepositoryFormat.get_format_description()."""
643
            return ("Development repository format - hash255chk+groupcompress")
644
645
0.17.9 by Robert Collins
Initial stab at repository format support.
646
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
647
    """Be incompatible with the regular fetch code."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
648
    formats = (RepositoryFormatPackGCRabin,)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
649
    if chk_support:
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
650
        formats = formats + (RepositoryFormatPackGCRabinCHK16,
651
                             RepositoryFormatPackGCRabinCHK255)
0.17.10 by Robert Collins
Correct optimiser disabling.
652
    if isinstance(source._format, formats) or isinstance(target._format, formats):
0.17.9 by Robert Collins
Initial stab at repository format support.
653
        return False
654
    else:
655
        return orig_method(source, target)
656
657
658
InterPackRepo.is_compatible = staticmethod(pack_incompatible)