/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.9 by Robert Collins
Initial stab at repository format support.
1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
19
20
import md5
21
import time
22
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
23
from bzrlib import (
24
    debug,
25
    errors,
26
    knit,
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
27
    inventory,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
28
    pack,
29
    repository,
30
    ui,
31
    )
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
32
from bzrlib.btree_index import (
33
    BTreeBuilder,
34
    BTreeGraphIndex,
35
    )
0.17.9 by Robert Collins
Initial stab at repository format support.
36
from bzrlib.index import GraphIndex, GraphIndexBuilder
37
from bzrlib.repository import InterPackRepo
38
from bzrlib.plugins.groupcompress.groupcompress import (
39
    _GCGraphIndex,
40
    GroupCompressVersionedFiles,
41
    )
42
from bzrlib.osutils import rand_chars
43
from bzrlib.repofmt.pack_repo import (
44
    Pack,
45
    NewPack,
46
    KnitPackRepository,
47
    RepositoryPackCollection,
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
48
    RepositoryFormatPackDevelopment2,
49
    RepositoryFormatPackDevelopment2Subtree,
0.17.9 by Robert Collins
Initial stab at repository format support.
50
    RepositoryFormatKnitPack1,
51
    RepositoryFormatKnitPack3,
52
    RepositoryFormatKnitPack4,
53
    Packer,
54
    ReconcilePacker,
55
    OptimisingPacker,
56
    )
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
57
try:
58
    from bzrlib.repofmt.pack_repo import (
0.17.26 by Robert Collins
Working better --gc-plain-chk.
59
    CHKInventoryRepository,
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
60
    RepositoryFormatPackDevelopment5,
61
    RepositoryFormatPackDevelopment5Hash16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
62
##    RepositoryFormatPackDevelopment5Hash16b,
63
##    RepositoryFormatPackDevelopment5Hash63,
64
##    RepositoryFormatPackDevelopment5Hash127a,
65
##    RepositoryFormatPackDevelopment5Hash127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
66
    RepositoryFormatPackDevelopment5Hash255,
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
67
    )
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
68
    from bzrlib import chk_map
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
69
    chk_support = True
70
except ImportError:
71
    chk_support = False
0.17.9 by Robert Collins
Initial stab at repository format support.
72
from bzrlib import ui
73
74
75
def open_pack(self):
0.17.22 by Robert Collins
really get gc working with 1.10
76
    return self._pack_collection.pack_factory(self._pack_collection,
77
        upload_suffix=self.suffix,
0.17.9 by Robert Collins
Initial stab at repository format support.
78
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
79
80
81
Packer.open_pack = open_pack
82
83
84
class GCPack(NewPack):
85
0.17.22 by Robert Collins
really get gc working with 1.10
86
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
0.17.9 by Robert Collins
Initial stab at repository format support.
87
        """Create a NewPack instance.
88
89
        :param upload_transport: A writable transport for the pack to be
90
            incrementally uploaded to.
91
        :param index_transport: A writable transport for the pack's indices to
92
            be written to when the pack is finished.
93
        :param pack_transport: A writable transport for the pack to be renamed
94
            to when the upload is complete. This *must* be the same as
95
            upload_transport.clone('../packs').
96
        :param upload_suffix: An optional suffix to be given to any temporary
97
            files created during the pack creation. e.g '.autopack'
98
        :param file_mode: An optional file mode to create the new files with.
99
        """
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
100
        # replaced from bzr.dev to:
101
        # - change inventory reference list length to 1
102
        # - change texts reference lists to 1
103
        # TODO: patch this to be parameterised upstream
104
        
0.17.9 by Robert Collins
Initial stab at repository format support.
105
        # The relative locations of the packs are constrained, but all are
106
        # passed in because the caller has them, so as to avoid object churn.
0.17.22 by Robert Collins
really get gc working with 1.10
107
        index_builder_class = pack_collection._index_builder_class
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
108
        if chk_support:
109
            # from brisbane-core
110
            if pack_collection.chk_index is not None:
111
                chk_index = index_builder_class(reference_lists=0)
112
            else:
113
                chk_index = None
114
            Pack.__init__(self,
115
                # Revisions: parents list, no text compression.
116
                index_builder_class(reference_lists=1),
117
                # Inventory: We want to map compression only, but currently the
118
                # knit code hasn't been updated enough to understand that, so we
119
                # have a regular 2-list index giving parents and compression
120
                # source.
121
                index_builder_class(reference_lists=1),
122
                # Texts: compression and per file graph, for all fileids - so two
123
                # reference lists and two elements in the key tuple.
124
                index_builder_class(reference_lists=1, key_elements=2),
125
                # Signatures: Just blobs to store, no compression, no parents
126
                # listing.
127
                index_builder_class(reference_lists=0),
128
                # CHK based storage - just blobs, no compression or parents.
129
                chk_index=chk_index
130
                )
131
        else:
132
            # from bzr.dev
133
            Pack.__init__(self,
134
                # Revisions: parents list, no text compression.
135
                index_builder_class(reference_lists=1),
136
                # Inventory: compressed, with graph for compatibility with other
137
                # existing bzrlib code.
138
                index_builder_class(reference_lists=1),
139
                # Texts: per file graph:
140
                index_builder_class(reference_lists=1, key_elements=2),
141
                # Signatures: Just blobs to store, no compression, no parents
142
                # listing.
143
                index_builder_class(reference_lists=0),
144
                )
0.17.22 by Robert Collins
really get gc working with 1.10
145
        self._pack_collection = pack_collection
146
        # When we make readonly indices, we need this.
147
        self.index_class = pack_collection._index_class
0.17.9 by Robert Collins
Initial stab at repository format support.
148
        # where should the new pack be opened
0.17.22 by Robert Collins
really get gc working with 1.10
149
        self.upload_transport = pack_collection._upload_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
150
        # where are indices written out to
0.17.22 by Robert Collins
really get gc working with 1.10
151
        self.index_transport = pack_collection._index_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
152
        # where is the pack renamed to when it is finished?
0.17.22 by Robert Collins
really get gc working with 1.10
153
        self.pack_transport = pack_collection._pack_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
154
        # What file mode to upload the pack and indices with.
155
        self._file_mode = file_mode
156
        # tracks the content written to the .pack file.
157
        self._hash = md5.new()
158
        # a four-tuple with the length in bytes of the indices, once the pack
159
        # is finalised. (rev, inv, text, sigs)
160
        self.index_sizes = None
161
        # How much data to cache when writing packs. Note that this is not
162
        # synchronised with reads, because it's not in the transport layer, so
163
        # is not safe unless the client knows it won't be reading from the pack
164
        # under creation.
165
        self._cache_limit = 0
166
        # the temporary pack file name.
167
        self.random_name = rand_chars(20) + upload_suffix
168
        # when was this pack started ?
169
        self.start_time = time.time()
170
        # open an output stream for the data added to the pack.
171
        self.write_stream = self.upload_transport.open_write_stream(
172
            self.random_name, mode=self._file_mode)
173
        if 'pack' in debug.debug_flags:
174
            mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
175
                time.ctime(), self.upload_transport.base, self.random_name,
176
                time.time() - self.start_time)
177
        # A list of byte sequences to be written to the new pack, and the 
178
        # aggregate size of them.  Stored as a list rather than separate 
179
        # variables so that the _write_data closure below can update them.
180
        self._buffer = [[], 0]
181
        # create a callable for adding data 
182
        #
183
        # robertc says- this is a closure rather than a method on the object
184
        # so that the variables are locals, and faster than accessing object
185
        # members.
186
        def _write_data(bytes, flush=False, _buffer=self._buffer,
187
            _write=self.write_stream.write, _update=self._hash.update):
188
            _buffer[0].append(bytes)
189
            _buffer[1] += len(bytes)
190
            # buffer cap
191
            if _buffer[1] > self._cache_limit or flush:
192
                bytes = ''.join(_buffer[0])
193
                _write(bytes)
194
                _update(bytes)
195
                _buffer[:] = [[], 0]
196
        # expose this on self, for the occasion when clients want to add data.
197
        self._write_data = _write_data
198
        # a pack writer object to serialise pack records.
199
        self._writer = pack.ContainerWriter(self._write_data)
200
        self._writer.begin()
201
        # what state is the pack in? (open, finished, aborted)
202
        self._state = 'open'
203
204
205
RepositoryPackCollection.pack_factory = NewPack
206
207
class GCRepositoryPackCollection(RepositoryPackCollection):
208
209
    pack_factory = GCPack
210
211
    def _make_index(self, name, suffix):
212
        """Overridden to use BTreeGraphIndex objects."""
213
        size_offset = self._suffix_offsets[suffix]
214
        index_name = name + suffix
215
        index_size = self._names[name][size_offset]
216
        return BTreeGraphIndex(
217
            self._index_transport, index_name, index_size)
218
219
    def _start_write_group(self):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
220
        # Overridden to add 'self.pack_factory()'
0.17.9 by Robert Collins
Initial stab at repository format support.
221
        # Do not permit preparation for writing if we're not in a 'write lock'.
222
        if not self.repo.is_write_locked():
223
            raise errors.NotWriteLocked(self)
0.17.22 by Robert Collins
really get gc working with 1.10
224
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
0.17.9 by Robert Collins
Initial stab at repository format support.
225
            file_mode=self.repo.bzrdir._get_file_mode())
226
        # allow writing: queue writes to a new index
227
        self.revision_index.add_writable_index(self._new_pack.revision_index,
228
            self._new_pack)
229
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
230
            self._new_pack)
231
        self.text_index.add_writable_index(self._new_pack.text_index,
232
            self._new_pack)
233
        self.signature_index.add_writable_index(self._new_pack.signature_index,
234
            self._new_pack)
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
235
        if chk_support and self.chk_index is not None:
236
            self.chk_index.add_writable_index(self._new_pack.chk_index,
237
                self._new_pack)
238
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
0.17.9 by Robert Collins
Initial stab at repository format support.
239
240
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
241
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
242
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
243
        self.repo.texts._index._add_callback = self.text_index.add_callback
244
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
245
    def _get_filtered_inv_stream(self, source_vf, keys):
246
        """Filter the texts of inventories, to find the chk pages."""
247
        id_roots = set()
248
        p_id_roots = set()
249
        def _filter_inv_stream(stream):
250
            for idx, record in enumerate(stream):
251
                ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
252
                bytes = record.get_bytes_as('fulltext')
253
                chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
254
                id_roots.add(chk_inv.id_to_entry.key())
255
                p_id_map = chk_inv.parent_id_basename_to_file_id
256
                if p_id_map is not None:
257
                    p_id_roots.add(p_id_map.key())
258
                yield record
259
        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
260
        return _filter_inv_stream(stream), id_roots, p_id_roots
261
262
    def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots):
263
        # We want to stream the keys from 'id_roots', and things they
264
        # reference, and then stream things from p_id_roots and things they
265
        # reference, and then any remaining keys that we didn't get to.
266
267
        # Note: We probably actually want multiple streams here, to help the
268
        #       client understand that the different levels won't compress well
269
        #       against eachother
270
        remaining_keys = set(keys)
271
        def _get_referenced_stream(root_keys):
272
            cur_keys = root_keys
273
            while cur_keys:
274
                remaining_keys.difference_update(cur_keys)
275
                next_keys = set()
276
                stream = source_vf.get_record_stream(cur_keys, 'unordered',
277
                                                     True)
278
                for record in stream:
279
                    bytes = record.get_bytes_as('fulltext')
280
                    # We don't care about search_key_func for this code,
281
                    # because we only care about external references.
282
                    node = chk_map._deserialise(bytes, record.key,
283
                                                search_key_func=None)
284
                    next_keys.update(node.refs())
285
                    yield record
286
                cur_keys = next_keys.intersection(remaining_keys)
287
        for record in _get_referenced_stream(id_roots):
288
            yield record
289
        for record in _get_referenced_stream(p_id_roots):
290
            yield record
291
        if remaining_keys:
292
            trace.note('There were %d keys in the chk index, which'
293
                       ' were not referenced from inventories',
294
                       len(remaining_keys))
295
            stream = source_vf.get_record_stream(remaining_keys, 'unordered',
296
                                                 True)
297
            for record in stream:
298
                yield record
299
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
300
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
301
                                 reload_func=None):
302
        """Execute a series of pack operations.
303
304
        :param pack_operations: A list of [revision_count, packs_to_combine].
305
        :param _packer_class: The class of packer to use (default: Packer).
306
        :return: None.
307
        """
308
        for revision_count, packs in pack_operations:
309
            # we may have no-ops from the setup logic
310
            if len(packs) == 0:
311
                continue
312
            # Create a new temp VersionedFile instance based on these packs,
313
            # and then just fetch everything into the target
314
315
            # XXX: Find a way to 'set_optimize' on the newly created pack
316
            #      indexes
317
            #    def open_pack(self):
318
            #       """Open a pack for the pack we are creating."""
319
            #       new_pack = super(OptimisingPacker, self).open_pack()
320
            #       # Turn on the optimization flags for all the index builders.
321
            #       new_pack.revision_index.set_optimize(for_size=True)
322
            #       new_pack.inventory_index.set_optimize(for_size=True)
323
            #       new_pack.text_index.set_optimize(for_size=True)
324
            #       new_pack.signature_index.set_optimize(for_size=True)
325
            #       return new_pack
326
            to_copy = [('revision_index', 'revisions'),
327
                       ('inventory_index', 'inventories'),
328
                       ('text_index', 'texts'),
329
                       ('signature_index', 'signatures'),
330
                      ]
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
331
            # TODO: This is a very non-optimal ordering for chk_bytes. The
332
            #       issue is that pages that are similar are not transmitted
333
            #       together. Perhaps get_record_stream('gc-optimal') should be
334
            #       taught about how to group chk pages?
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
335
            has_chk = False
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
336
            if getattr(self, 'chk_index', None) is not None:
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
337
                has_chk = True
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
338
                to_copy.insert(2, ('chk_index', 'chk_bytes'))
339
340
            # Shouldn't we start_write_group around this?
341
            if self._new_pack is not None:
342
                raise errors.BzrError('call to %s.pack() while another pack is'
343
                                      ' being written.'
344
                                      % (self.__class__.__name__,))
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
345
            new_pack = self.pack_factory(self, 'autopack',
346
                                         self.repo.bzrdir._get_file_mode())
347
            new_pack.set_write_cache_size(1024*1024)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
348
            # TODO: A better alternative is to probably use Packer.open_pack(), and
349
            #       then create a GroupCompressVersionedFiles() around the
350
            #       target pack to insert into.
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
351
            pb = ui.ui_factory.nested_progress_bar()
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
352
            try:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
353
                for idx, (index_name, vf_name) in enumerate(to_copy):
354
                    pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
355
                    keys = set()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
356
                    new_index = getattr(new_pack, index_name)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
357
                    new_index.set_optimize(for_size=True)
358
                    for pack in packs:
359
                        source_index = getattr(pack, index_name)
360
                        keys.update(e[1] for e in source_index.iter_all_entries())
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
361
                    source_vf = getattr(self.repo, vf_name)
362
                    target_access = knit._DirectPackAccess({})
363
                    target_access.set_writer(new_pack._writer, new_index,
364
                                             new_pack.access_tuple())
365
                    target_vf = GroupCompressVersionedFiles(
366
                        _GCGraphIndex(new_index,
367
                                      add_callback=new_index.add_nodes,
368
                                      parents=source_vf._index._parents,
369
                                      is_locked=self.repo.is_locked),
370
                        access=target_access,
371
                        delta=source_vf._delta)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
372
                    stream = None
373
                    if has_chk:
374
                        if vf_name == 'inventories':
375
                            stream, id_roots, p_id_roots = self._get_filtered_inv_stream(
376
                                source_vf, keys)
377
                        elif vf_name == 'chk_bytes':
378
                            stream = self._get_chk_stream(source_vf, keys,
379
                                                          id_roots, p_id_roots)
380
                    if stream is None:
381
                        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
382
                    target_vf.insert_record_stream(stream)
383
                new_pack._check_references() # shouldn't be needed
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
384
            except:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
385
                pb.finished()
386
                new_pack.abort()
387
                raise
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
388
            else:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
389
                pb.finished()
390
                if not new_pack.data_inserted():
391
                    raise AssertionError('We copied from pack files,'
392
                                         ' but had no data copied')
393
                    # we need to abort somehow, because we don't want to remove
394
                    # the other packs
395
                new_pack.finish()
396
                self.allocate(new_pack)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
397
            for pack in packs:
398
                self._remove_pack_from_memory(pack)
399
        # record the newly available packs and stop advertising the old
400
        # packs
401
        self._save_pack_names(clear_obsolete_packs=True)
402
        # Move the old packs out of the way now they are no longer referenced.
403
        for revision_count, packs in pack_operations:
404
            self._obsolete_packs(packs)
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
405
0.17.9 by Robert Collins
Initial stab at repository format support.
406
407
408
class GCPackRepository(KnitPackRepository):
409
    """GC customisation of KnitPackRepository."""
410
411
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
412
        _serializer):
413
        """Overridden to change pack collection class."""
414
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
415
            _commit_builder_class, _serializer)
416
        # and now replace everything it did :)
417
        index_transport = self._transport.clone('indices')
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
418
        if chk_support:
419
            self._pack_collection = GCRepositoryPackCollection(self,
420
                self._transport, index_transport,
421
                self._transport.clone('upload'),
422
                self._transport.clone('packs'),
423
                _format.index_builder_class,
424
                _format.index_class,
425
                use_chk_index=self._format.supports_chks,
426
                )
427
        else:
428
            self._pack_collection = GCRepositoryPackCollection(self,
429
                self._transport, index_transport,
430
                self._transport.clone('upload'),
431
                self._transport.clone('packs'),
432
                _format.index_builder_class,
433
                _format.index_class)
0.17.9 by Robert Collins
Initial stab at repository format support.
434
        self.inventories = GroupCompressVersionedFiles(
435
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
436
                add_callback=self._pack_collection.inventory_index.add_callback,
437
                parents=True, is_locked=self.is_locked),
438
            access=self._pack_collection.inventory_index.data_access)
439
        self.revisions = GroupCompressVersionedFiles(
440
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
441
                add_callback=self._pack_collection.revision_index.add_callback,
442
                parents=True, is_locked=self.is_locked),
443
            access=self._pack_collection.revision_index.data_access,
444
            delta=False)
445
        self.signatures = GroupCompressVersionedFiles(
446
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
447
                add_callback=self._pack_collection.signature_index.add_callback,
448
                parents=False, is_locked=self.is_locked),
449
            access=self._pack_collection.signature_index.data_access,
450
            delta=False)
451
        self.texts = GroupCompressVersionedFiles(
452
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
453
                add_callback=self._pack_collection.text_index.add_callback,
454
                parents=True, is_locked=self.is_locked),
455
            access=self._pack_collection.text_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
456
        if chk_support and _format.supports_chks:
457
            # No graph, no compression:- references from chks are between
458
            # different objects not temporal versions of the same; and without
459
            # some sort of temporal structure knit compression will just fail.
460
            self.chk_bytes = GroupCompressVersionedFiles(
461
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
462
                    add_callback=self._pack_collection.chk_index.add_callback,
463
                    parents=False, is_locked=self.is_locked),
464
                access=self._pack_collection.chk_index.data_access)
465
        else:
466
            self.chk_bytes = None
0.17.9 by Robert Collins
Initial stab at repository format support.
467
        # True when the repository object is 'write locked' (as opposed to the
468
        # physical lock only taken out around changes to the pack-names list.) 
469
        # Another way to represent this would be a decorator around the control
470
        # files object that presents logical locks as physical ones - if this
471
        # gets ugly consider that alternative design. RBC 20071011
472
        self._write_lock_count = 0
473
        self._transaction = None
474
        # for tests
475
        self._reconcile_does_inventory_gc = True
476
        self._reconcile_fixes_text_parents = True
477
        self._reconcile_backsup_inventory = False
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
478
        # Note: We cannot unpack a delta that references a text we haven't seen yet.
479
        #       there are 2 options, work in fulltexts, or require topological
480
        #       sorting. Using fulltexts is more optimal for local operations,
481
        #       because the source can be smart about extracting multiple
482
        #       in-a-row (and sharing strings). Topological is better for
483
        #       remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
484
        self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
485
        self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
486
        self._fetch_uses_deltas = False
0.17.9 by Robert Collins
Initial stab at repository format support.
487
488
0.17.26 by Robert Collins
Working better --gc-plain-chk.
489
if chk_support:
490
    class GCCHKPackRepository(CHKInventoryRepository):
491
        """GC customisation of CHKInventoryRepository."""
492
493
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
494
            _serializer):
495
            """Overridden to change pack collection class."""
496
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
497
                _commit_builder_class, _serializer)
498
            # and now replace everything it did :)
499
            index_transport = self._transport.clone('indices')
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
500
            self._pack_collection = GCRepositoryPackCollection(self,
501
                self._transport, index_transport,
502
                self._transport.clone('upload'),
503
                self._transport.clone('packs'),
504
                _format.index_builder_class,
505
                _format.index_class,
506
                use_chk_index=self._format.supports_chks,
507
                )
0.17.26 by Robert Collins
Working better --gc-plain-chk.
508
            self.inventories = GroupCompressVersionedFiles(
509
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
510
                    add_callback=self._pack_collection.inventory_index.add_callback,
511
                    parents=True, is_locked=self.is_locked),
512
                access=self._pack_collection.inventory_index.data_access)
513
            self.revisions = GroupCompressVersionedFiles(
514
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
515
                    add_callback=self._pack_collection.revision_index.add_callback,
516
                    parents=True, is_locked=self.is_locked),
517
                access=self._pack_collection.revision_index.data_access,
518
                delta=False)
519
            self.signatures = GroupCompressVersionedFiles(
520
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
521
                    add_callback=self._pack_collection.signature_index.add_callback,
522
                    parents=False, is_locked=self.is_locked),
523
                access=self._pack_collection.signature_index.data_access,
524
                delta=False)
525
            self.texts = GroupCompressVersionedFiles(
526
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
527
                    add_callback=self._pack_collection.text_index.add_callback,
528
                    parents=True, is_locked=self.is_locked),
529
                access=self._pack_collection.text_index.data_access)
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
530
            assert _format.supports_chks
531
            # No parents, individual CHK pages don't have specific ancestry
532
            self.chk_bytes = GroupCompressVersionedFiles(
533
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
534
                    add_callback=self._pack_collection.chk_index.add_callback,
535
                    parents=False, is_locked=self.is_locked),
536
                access=self._pack_collection.chk_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
537
            # True when the repository object is 'write locked' (as opposed to the
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
538
            # physical lock only taken out around changes to the pack-names list.)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
539
            # Another way to represent this would be a decorator around the control
540
            # files object that presents logical locks as physical ones - if this
541
            # gets ugly consider that alternative design. RBC 20071011
542
            self._write_lock_count = 0
543
            self._transaction = None
544
            # for tests
545
            self._reconcile_does_inventory_gc = True
546
            self._reconcile_fixes_text_parents = True
547
            self._reconcile_backsup_inventory = False
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
548
            self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
549
            self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
550
            self._fetch_uses_deltas = False
0.17.26 by Robert Collins
Working better --gc-plain-chk.
551
552
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
553
class RepositoryFormatPackGCPlain(RepositoryFormatPackDevelopment2):
0.17.9 by Robert Collins
Initial stab at repository format support.
554
    """A B+Tree index using pack repository."""
555
556
    repository_class = GCPackRepository
557
558
    def get_format_string(self):
559
        """See RepositoryFormat.get_format_string()."""
560
        return ("Bazaar development format - btree+gc "
561
            "(needs bzr.dev from 1.6)\n")
562
563
    def get_format_description(self):
564
        """See RepositoryFormat.get_format_description()."""
565
        return ("Development repository format - btree+groupcompress "
566
            ", interoperates with pack-0.92\n")
567
568
569
class RepositoryFormatPackGCRichRoot(RepositoryFormatKnitPack4):
570
    """A B+Tree index using pack repository."""
571
572
    repository_class = GCPackRepository
573
574
    def get_format_string(self):
575
        """See RepositoryFormat.get_format_string()."""
576
        return ("Bazaar development format - btree+gc-rich-root "
577
            "(needs bzr.dev from 1.6)\n")
578
579
    def get_format_description(self):
580
        """See RepositoryFormat.get_format_description()."""
581
        return ("Development repository format - btree+groupcompress "
582
            ", interoperates with rich-root-pack\n")
583
584
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
585
class RepositoryFormatPackGCSubtrees(RepositoryFormatPackDevelopment2Subtree):
0.17.9 by Robert Collins
Initial stab at repository format support.
586
    """A B+Tree index using pack repository."""
587
588
    repository_class = GCPackRepository
589
590
    def get_format_string(self):
591
        """See RepositoryFormat.get_format_string()."""
592
        return ("Bazaar development format - btree+gc-subtrees "
593
            "(needs bzr.dev from 1.6)\n")
594
595
    def get_format_description(self):
596
        """See RepositoryFormat.get_format_description()."""
597
        return ("Development repository format - btree+groupcompress "
598
            ", interoperates with pack-0.92-subtrees\n")
599
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
600
if chk_support:
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
601
    class RepositoryFormatPackGCPlainCHK(RepositoryFormatPackDevelopment5):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
602
        """A CHK+group compress pack repository."""
603
0.17.26 by Robert Collins
Working better --gc-plain-chk.
604
        repository_class = GCCHKPackRepository
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
605
606
        def get_format_string(self):
607
            """See RepositoryFormat.get_format_string()."""
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
608
            return ('Bazaar development format - chk+gc'
609
                    ' (needs bzr.dev from 1.13)\n')
610
611
        def get_format_description(self):
612
            """See RepositoryFormat.get_format_description()."""
613
            return ("Development repository format - chk+groupcompress")
614
0.21.2 by John Arbash Meinel
Bring in the trunk simplifications.
615
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
616
    class RepositoryFormatPackGCPlainCHK16(RepositoryFormatPackDevelopment5Hash16):
617
        """A hashed CHK+group compress pack repository."""
618
619
        repository_class = GCCHKPackRepository
620
621
        def get_format_string(self):
622
            """See RepositoryFormat.get_format_string()."""
623
            return ('Bazaar development format - hash16chk+gc'
624
                    ' (needs bzr.dev from 1.13)\n')
625
626
        def get_format_description(self):
627
            """See RepositoryFormat.get_format_description()."""
628
            return ("Development repository format - hash16chk+groupcompress")
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
629
630
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
631
##    class RepositoryFormatPackGCPlainCHK16b(RepositoryFormatPackDevelopment5Hash16b):
632
##        """A hashed CHK+group compress pack repository."""
633
##
634
##        repository_class = GCCHKPackRepository
635
##
636
##        def get_format_string(self):
637
##            """See RepositoryFormat.get_format_string()."""
638
##            return ('Bazaar development format - hash16bchk+gc'
639
##                    ' (needs bzr.dev from 1.13)\n')
640
##
641
##        def get_format_description(self):
642
##            """See RepositoryFormat.get_format_description()."""
643
##            return ("Development repository format - hash16bchk+groupcompress")
644
##
645
##
646
##    class RepositoryFormatPackGCPlainCHK63(RepositoryFormatPackDevelopment5Hash63):
647
##        """A hashed CHK+group compress pack repository."""
648
##
649
##        repository_class = GCCHKPackRepository
650
##
651
##        def get_format_string(self):
652
##            """See RepositoryFormat.get_format_string()."""
653
##            return ('Bazaar development format - hash63+gc'
654
##                    ' (needs bzr.dev from 1.13)\n')
655
##
656
##        def get_format_description(self):
657
##            """See RepositoryFormat.get_format_description()."""
658
##            return ("Development repository format - hash63+groupcompress")
659
##
660
##
661
##    class RepositoryFormatPackGCPlainCHK127a(RepositoryFormatPackDevelopment5Hash127a):
662
##        """A hashed CHK+group compress pack repository."""
663
##
664
##        repository_class = GCCHKPackRepository
665
##
666
##        def get_format_string(self):
667
##            """See RepositoryFormat.get_format_string()."""
668
##            return ('Bazaar development format - hash127a+gc'
669
##                    ' (needs bzr.dev from 1.13)\n')
670
##
671
##        def get_format_description(self):
672
##            """See RepositoryFormat.get_format_description()."""
673
##            return ("Development repository format - hash127a+groupcompress")
674
##
675
##
676
##    class RepositoryFormatPackGCPlainCHK127b(RepositoryFormatPackDevelopment5Hash127b):
677
##        """A hashed CHK+group compress pack repository."""
678
##
679
##        repository_class = GCCHKPackRepository
680
##
681
##        def get_format_string(self):
682
##            """See RepositoryFormat.get_format_string()."""
683
##            return ('Bazaar development format - hash127b+gc'
684
##                    ' (needs bzr.dev from 1.13)\n')
685
##
686
##        def get_format_description(self):
687
##            """See RepositoryFormat.get_format_description()."""
688
##            return ("Development repository format - hash127b+groupcompress")
689
690
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
691
    class RepositoryFormatPackGCPlainCHK255(RepositoryFormatPackDevelopment5Hash255):
692
        """A hashed CHK+group compress pack repository."""
693
694
        repository_class = GCCHKPackRepository
695
696
        def get_format_string(self):
697
            """See RepositoryFormat.get_format_string()."""
698
            return ('Bazaar development format - hash255chk+gc'
699
                    ' (needs bzr.dev from 1.13)\n')
700
701
        def get_format_description(self):
702
            """See RepositoryFormat.get_format_description()."""
703
            return ("Development repository format - hash255chk+groupcompress")
704
705
0.17.9 by Robert Collins
Initial stab at repository format support.
706
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
707
    """Be incompatible with the regular fetch code."""
0.17.9 by Robert Collins
Initial stab at repository format support.
708
    formats = (RepositoryFormatPackGCPlain, RepositoryFormatPackGCRichRoot,
709
        RepositoryFormatPackGCSubtrees)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
710
    if chk_support:
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
711
        formats = formats + (RepositoryFormatPackGCPlainCHK,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
712
                             RepositoryFormatPackGCPlainCHK16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
713
                             ## RepositoryFormatPackGCPlainCHK16b,
714
                             ## RepositoryFormatPackGCPlainCHK63,
715
                             ## RepositoryFormatPackGCPlainCHK127a,
716
                             ## RepositoryFormatPackGCPlainCHK127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
717
                             RepositoryFormatPackGCPlainCHK255)
0.17.10 by Robert Collins
Correct optimiser disabling.
718
    if isinstance(source._format, formats) or isinstance(target._format, formats):
0.17.9 by Robert Collins
Initial stab at repository format support.
719
        return False
720
    else:
721
        return orig_method(source, target)
722
723
724
InterPackRepo.is_compatible = staticmethod(pack_incompatible)