/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.9 by Robert Collins
Initial stab at repository format support.
1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
19
20
import md5
21
import time
22
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
23
from bzrlib import (
24
    debug,
25
    errors,
26
    knit,
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
27
    inventory,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
28
    pack,
29
    repository,
30
    ui,
31
    )
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
32
from bzrlib.btree_index import (
33
    BTreeBuilder,
34
    BTreeGraphIndex,
35
    )
0.17.9 by Robert Collins
Initial stab at repository format support.
36
from bzrlib.index import GraphIndex, GraphIndexBuilder
37
from bzrlib.repository import InterPackRepo
38
from bzrlib.plugins.groupcompress.groupcompress import (
39
    _GCGraphIndex,
40
    GroupCompressVersionedFiles,
41
    )
42
from bzrlib.osutils import rand_chars
43
from bzrlib.repofmt.pack_repo import (
44
    Pack,
45
    NewPack,
46
    KnitPackRepository,
47
    RepositoryPackCollection,
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
48
    RepositoryFormatPackDevelopment2,
49
    RepositoryFormatPackDevelopment2Subtree,
0.17.9 by Robert Collins
Initial stab at repository format support.
50
    RepositoryFormatKnitPack1,
51
    RepositoryFormatKnitPack3,
52
    RepositoryFormatKnitPack4,
53
    Packer,
54
    ReconcilePacker,
55
    OptimisingPacker,
56
    )
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
57
try:
58
    from bzrlib.repofmt.pack_repo import (
0.17.26 by Robert Collins
Working better --gc-plain-chk.
59
    CHKInventoryRepository,
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
60
    RepositoryFormatPackDevelopment5,
61
    RepositoryFormatPackDevelopment5Hash16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
62
##    RepositoryFormatPackDevelopment5Hash16b,
63
##    RepositoryFormatPackDevelopment5Hash63,
64
##    RepositoryFormatPackDevelopment5Hash127a,
65
##    RepositoryFormatPackDevelopment5Hash127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
66
    RepositoryFormatPackDevelopment5Hash255,
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
67
    )
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
68
    from bzrlib import chk_map
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
69
    chk_support = True
70
except ImportError:
71
    chk_support = False
0.17.9 by Robert Collins
Initial stab at repository format support.
72
from bzrlib import ui
73
74
75
def open_pack(self):
0.17.22 by Robert Collins
really get gc working with 1.10
76
    return self._pack_collection.pack_factory(self._pack_collection,
77
        upload_suffix=self.suffix,
0.17.9 by Robert Collins
Initial stab at repository format support.
78
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
79
80
81
Packer.open_pack = open_pack
82
83
84
class GCPack(NewPack):
85
0.17.22 by Robert Collins
really get gc working with 1.10
86
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
0.17.9 by Robert Collins
Initial stab at repository format support.
87
        """Create a NewPack instance.
88
89
        :param upload_transport: A writable transport for the pack to be
90
            incrementally uploaded to.
91
        :param index_transport: A writable transport for the pack's indices to
92
            be written to when the pack is finished.
93
        :param pack_transport: A writable transport for the pack to be renamed
94
            to when the upload is complete. This *must* be the same as
95
            upload_transport.clone('../packs').
96
        :param upload_suffix: An optional suffix to be given to any temporary
97
            files created during the pack creation. e.g '.autopack'
98
        :param file_mode: An optional file mode to create the new files with.
99
        """
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
100
        # replaced from bzr.dev to:
101
        # - change inventory reference list length to 1
102
        # - change texts reference lists to 1
103
        # TODO: patch this to be parameterised upstream
104
        
0.17.9 by Robert Collins
Initial stab at repository format support.
105
        # The relative locations of the packs are constrained, but all are
106
        # passed in because the caller has them, so as to avoid object churn.
0.17.22 by Robert Collins
really get gc working with 1.10
107
        index_builder_class = pack_collection._index_builder_class
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
108
        if chk_support:
109
            # from brisbane-core
110
            if pack_collection.chk_index is not None:
111
                chk_index = index_builder_class(reference_lists=0)
112
            else:
113
                chk_index = None
114
            Pack.__init__(self,
115
                # Revisions: parents list, no text compression.
116
                index_builder_class(reference_lists=1),
117
                # Inventory: We want to map compression only, but currently the
118
                # knit code hasn't been updated enough to understand that, so we
119
                # have a regular 2-list index giving parents and compression
120
                # source.
121
                index_builder_class(reference_lists=1),
122
                # Texts: compression and per file graph, for all fileids - so two
123
                # reference lists and two elements in the key tuple.
124
                index_builder_class(reference_lists=1, key_elements=2),
125
                # Signatures: Just blobs to store, no compression, no parents
126
                # listing.
127
                index_builder_class(reference_lists=0),
128
                # CHK based storage - just blobs, no compression or parents.
129
                chk_index=chk_index
130
                )
131
        else:
132
            # from bzr.dev
133
            Pack.__init__(self,
134
                # Revisions: parents list, no text compression.
135
                index_builder_class(reference_lists=1),
136
                # Inventory: compressed, with graph for compatibility with other
137
                # existing bzrlib code.
138
                index_builder_class(reference_lists=1),
139
                # Texts: per file graph:
140
                index_builder_class(reference_lists=1, key_elements=2),
141
                # Signatures: Just blobs to store, no compression, no parents
142
                # listing.
143
                index_builder_class(reference_lists=0),
144
                )
0.17.22 by Robert Collins
really get gc working with 1.10
145
        self._pack_collection = pack_collection
146
        # When we make readonly indices, we need this.
147
        self.index_class = pack_collection._index_class
0.17.9 by Robert Collins
Initial stab at repository format support.
148
        # where should the new pack be opened
0.17.22 by Robert Collins
really get gc working with 1.10
149
        self.upload_transport = pack_collection._upload_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
150
        # where are indices written out to
0.17.22 by Robert Collins
really get gc working with 1.10
151
        self.index_transport = pack_collection._index_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
152
        # where is the pack renamed to when it is finished?
0.17.22 by Robert Collins
really get gc working with 1.10
153
        self.pack_transport = pack_collection._pack_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
154
        # What file mode to upload the pack and indices with.
155
        self._file_mode = file_mode
156
        # tracks the content written to the .pack file.
157
        self._hash = md5.new()
158
        # a four-tuple with the length in bytes of the indices, once the pack
159
        # is finalised. (rev, inv, text, sigs)
160
        self.index_sizes = None
161
        # How much data to cache when writing packs. Note that this is not
162
        # synchronised with reads, because it's not in the transport layer, so
163
        # is not safe unless the client knows it won't be reading from the pack
164
        # under creation.
165
        self._cache_limit = 0
166
        # the temporary pack file name.
167
        self.random_name = rand_chars(20) + upload_suffix
168
        # when was this pack started ?
169
        self.start_time = time.time()
170
        # open an output stream for the data added to the pack.
171
        self.write_stream = self.upload_transport.open_write_stream(
172
            self.random_name, mode=self._file_mode)
173
        if 'pack' in debug.debug_flags:
174
            mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
175
                time.ctime(), self.upload_transport.base, self.random_name,
176
                time.time() - self.start_time)
177
        # A list of byte sequences to be written to the new pack, and the 
178
        # aggregate size of them.  Stored as a list rather than separate 
179
        # variables so that the _write_data closure below can update them.
180
        self._buffer = [[], 0]
181
        # create a callable for adding data 
182
        #
183
        # robertc says- this is a closure rather than a method on the object
184
        # so that the variables are locals, and faster than accessing object
185
        # members.
186
        def _write_data(bytes, flush=False, _buffer=self._buffer,
187
            _write=self.write_stream.write, _update=self._hash.update):
188
            _buffer[0].append(bytes)
189
            _buffer[1] += len(bytes)
190
            # buffer cap
191
            if _buffer[1] > self._cache_limit or flush:
192
                bytes = ''.join(_buffer[0])
193
                _write(bytes)
194
                _update(bytes)
195
                _buffer[:] = [[], 0]
196
        # expose this on self, for the occasion when clients want to add data.
197
        self._write_data = _write_data
198
        # a pack writer object to serialise pack records.
199
        self._writer = pack.ContainerWriter(self._write_data)
200
        self._writer.begin()
201
        # what state is the pack in? (open, finished, aborted)
202
        self._state = 'open'
203
204
205
RepositoryPackCollection.pack_factory = NewPack
206
207
class GCRepositoryPackCollection(RepositoryPackCollection):
208
209
    pack_factory = GCPack
210
211
    def _make_index(self, name, suffix):
212
        """Overridden to use BTreeGraphIndex objects."""
213
        size_offset = self._suffix_offsets[suffix]
214
        index_name = name + suffix
215
        index_size = self._names[name][size_offset]
216
        return BTreeGraphIndex(
217
            self._index_transport, index_name, index_size)
218
219
    def _start_write_group(self):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
220
        # Overridden to add 'self.pack_factory()'
0.17.9 by Robert Collins
Initial stab at repository format support.
221
        # Do not permit preparation for writing if we're not in a 'write lock'.
222
        if not self.repo.is_write_locked():
223
            raise errors.NotWriteLocked(self)
0.17.22 by Robert Collins
really get gc working with 1.10
224
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
0.17.9 by Robert Collins
Initial stab at repository format support.
225
            file_mode=self.repo.bzrdir._get_file_mode())
226
        # allow writing: queue writes to a new index
227
        self.revision_index.add_writable_index(self._new_pack.revision_index,
228
            self._new_pack)
229
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
230
            self._new_pack)
231
        self.text_index.add_writable_index(self._new_pack.text_index,
232
            self._new_pack)
233
        self.signature_index.add_writable_index(self._new_pack.signature_index,
234
            self._new_pack)
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
235
        if chk_support and self.chk_index is not None:
236
            self.chk_index.add_writable_index(self._new_pack.chk_index,
237
                self._new_pack)
238
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
0.17.9 by Robert Collins
Initial stab at repository format support.
239
240
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
241
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
242
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
243
        self.repo.texts._index._add_callback = self.text_index.add_callback
244
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
245
    def _get_filtered_inv_stream(self, source_vf, keys):
246
        """Filter the texts of inventories, to find the chk pages."""
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
247
        id_roots = []
248
        p_id_roots = []
249
        id_roots_set = set()
250
        p_id_roots_set = set()
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
251
        def _filter_inv_stream(stream):
252
            for idx, record in enumerate(stream):
253
                ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
254
                bytes = record.get_bytes_as('fulltext')
255
                chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
256
                key = chk_inv.id_to_entry.key()
257
                if key not in id_roots_set:
258
                    id_roots.append(key)
259
                    id_roots_set.add(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
260
                p_id_map = chk_inv.parent_id_basename_to_file_id
261
                if p_id_map is not None:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
262
                    key = p_id_map.key()
263
                    if key not in p_id_roots_set:
264
                        p_id_roots_set.add(key)
265
                        p_id_roots.append(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
266
                yield record
267
        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
268
        return _filter_inv_stream(stream), id_roots, p_id_roots
269
270
    def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots):
271
        # We want to stream the keys from 'id_roots', and things they
272
        # reference, and then stream things from p_id_roots and things they
273
        # reference, and then any remaining keys that we didn't get to.
274
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
275
        # We also group referenced texts together, so if one root references a
276
        # text with prefix 'a', and another root references a node with prefix
277
        # 'a', we want to yield those nodes before we yield the nodes for 'b'
278
        # This keeps 'similar' nodes together
279
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
280
        # Note: We probably actually want multiple streams here, to help the
281
        #       client understand that the different levels won't compress well
282
        #       against eachother
283
        remaining_keys = set(keys)
284
        def _get_referenced_stream(root_keys):
285
            cur_keys = root_keys
286
            while cur_keys:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
287
                keys_by_search_prefix = {}
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
288
                remaining_keys.difference_update(cur_keys)
289
                next_keys = set()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
290
                stream = source_vf.get_record_stream(cur_keys, 'as-requested',
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
291
                                                     True)
292
                for record in stream:
293
                    bytes = record.get_bytes_as('fulltext')
294
                    # We don't care about search_key_func for this code,
295
                    # because we only care about external references.
296
                    node = chk_map._deserialise(bytes, record.key,
297
                                                search_key_func=None)
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
298
                    common_base = node._search_prefix
299
                    if isinstance(node, chk_map.InternalNode):
300
                        for prefix, value in node._items.iteritems():
301
                            assert isinstance(value, tuple)
302
                            if value not in next_keys:
303
                                keys_by_search_prefix.setdefault(prefix,
304
                                    []).append(value)
305
                                next_keys.add(value)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
306
                    yield record
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
307
                # Double check that we won't be emitting any keys twice
308
                next_keys = next_keys.intersection(remaining_keys)
309
                cur_keys = []
310
                for prefix in sorted(keys_by_search_prefix):
311
                    cur_keys.extend(keys_by_search_prefix[prefix])
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
312
        for record in _get_referenced_stream(id_roots):
313
            yield record
314
        for record in _get_referenced_stream(p_id_roots):
315
            yield record
316
        if remaining_keys:
317
            trace.note('There were %d keys in the chk index, which'
318
                       ' were not referenced from inventories',
319
                       len(remaining_keys))
320
            stream = source_vf.get_record_stream(remaining_keys, 'unordered',
321
                                                 True)
322
            for record in stream:
323
                yield record
324
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
325
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
326
                                 reload_func=None):
327
        """Execute a series of pack operations.
328
329
        :param pack_operations: A list of [revision_count, packs_to_combine].
330
        :param _packer_class: The class of packer to use (default: Packer).
331
        :return: None.
332
        """
333
        for revision_count, packs in pack_operations:
334
            # we may have no-ops from the setup logic
335
            if len(packs) == 0:
336
                continue
337
            # Create a new temp VersionedFile instance based on these packs,
338
            # and then just fetch everything into the target
339
340
            # XXX: Find a way to 'set_optimize' on the newly created pack
341
            #      indexes
342
            #    def open_pack(self):
343
            #       """Open a pack for the pack we are creating."""
344
            #       new_pack = super(OptimisingPacker, self).open_pack()
345
            #       # Turn on the optimization flags for all the index builders.
346
            #       new_pack.revision_index.set_optimize(for_size=True)
347
            #       new_pack.inventory_index.set_optimize(for_size=True)
348
            #       new_pack.text_index.set_optimize(for_size=True)
349
            #       new_pack.signature_index.set_optimize(for_size=True)
350
            #       return new_pack
351
            to_copy = [('revision_index', 'revisions'),
352
                       ('inventory_index', 'inventories'),
353
                       ('text_index', 'texts'),
354
                       ('signature_index', 'signatures'),
355
                      ]
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
356
            # TODO: This is a very non-optimal ordering for chk_bytes. The
357
            #       issue is that pages that are similar are not transmitted
358
            #       together. Perhaps get_record_stream('gc-optimal') should be
359
            #       taught about how to group chk pages?
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
360
            has_chk = False
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
361
            if getattr(self, 'chk_index', None) is not None:
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
362
                has_chk = True
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
363
                to_copy.insert(2, ('chk_index', 'chk_bytes'))
364
365
            # Shouldn't we start_write_group around this?
366
            if self._new_pack is not None:
367
                raise errors.BzrError('call to %s.pack() while another pack is'
368
                                      ' being written.'
369
                                      % (self.__class__.__name__,))
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
370
            new_pack = self.pack_factory(self, 'autopack',
371
                                         self.repo.bzrdir._get_file_mode())
372
            new_pack.set_write_cache_size(1024*1024)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
373
            # TODO: A better alternative is to probably use Packer.open_pack(), and
374
            #       then create a GroupCompressVersionedFiles() around the
375
            #       target pack to insert into.
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
376
            pb = ui.ui_factory.nested_progress_bar()
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
377
            try:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
378
                for idx, (index_name, vf_name) in enumerate(to_copy):
379
                    pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
380
                    keys = set()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
381
                    new_index = getattr(new_pack, index_name)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
382
                    new_index.set_optimize(for_size=True)
383
                    for pack in packs:
384
                        source_index = getattr(pack, index_name)
385
                        keys.update(e[1] for e in source_index.iter_all_entries())
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
386
                    source_vf = getattr(self.repo, vf_name)
387
                    target_access = knit._DirectPackAccess({})
388
                    target_access.set_writer(new_pack._writer, new_index,
389
                                             new_pack.access_tuple())
390
                    target_vf = GroupCompressVersionedFiles(
391
                        _GCGraphIndex(new_index,
392
                                      add_callback=new_index.add_nodes,
393
                                      parents=source_vf._index._parents,
394
                                      is_locked=self.repo.is_locked),
395
                        access=target_access,
396
                        delta=source_vf._delta)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
397
                    stream = None
398
                    if has_chk:
399
                        if vf_name == 'inventories':
400
                            stream, id_roots, p_id_roots = self._get_filtered_inv_stream(
401
                                source_vf, keys)
402
                        elif vf_name == 'chk_bytes':
403
                            stream = self._get_chk_stream(source_vf, keys,
404
                                                          id_roots, p_id_roots)
405
                    if stream is None:
406
                        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
407
                    target_vf.insert_record_stream(stream)
408
                new_pack._check_references() # shouldn't be needed
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
409
            except:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
410
                pb.finished()
411
                new_pack.abort()
412
                raise
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
413
            else:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
414
                pb.finished()
415
                if not new_pack.data_inserted():
416
                    raise AssertionError('We copied from pack files,'
417
                                         ' but had no data copied')
418
                    # we need to abort somehow, because we don't want to remove
419
                    # the other packs
420
                new_pack.finish()
421
                self.allocate(new_pack)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
422
            for pack in packs:
423
                self._remove_pack_from_memory(pack)
424
        # record the newly available packs and stop advertising the old
425
        # packs
426
        self._save_pack_names(clear_obsolete_packs=True)
427
        # Move the old packs out of the way now they are no longer referenced.
428
        for revision_count, packs in pack_operations:
429
            self._obsolete_packs(packs)
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
430
0.17.9 by Robert Collins
Initial stab at repository format support.
431
432
433
class GCPackRepository(KnitPackRepository):
434
    """GC customisation of KnitPackRepository."""
435
436
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
437
        _serializer):
438
        """Overridden to change pack collection class."""
439
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
440
            _commit_builder_class, _serializer)
441
        # and now replace everything it did :)
442
        index_transport = self._transport.clone('indices')
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
443
        if chk_support:
444
            self._pack_collection = GCRepositoryPackCollection(self,
445
                self._transport, index_transport,
446
                self._transport.clone('upload'),
447
                self._transport.clone('packs'),
448
                _format.index_builder_class,
449
                _format.index_class,
450
                use_chk_index=self._format.supports_chks,
451
                )
452
        else:
453
            self._pack_collection = GCRepositoryPackCollection(self,
454
                self._transport, index_transport,
455
                self._transport.clone('upload'),
456
                self._transport.clone('packs'),
457
                _format.index_builder_class,
458
                _format.index_class)
0.17.9 by Robert Collins
Initial stab at repository format support.
459
        self.inventories = GroupCompressVersionedFiles(
460
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
461
                add_callback=self._pack_collection.inventory_index.add_callback,
462
                parents=True, is_locked=self.is_locked),
463
            access=self._pack_collection.inventory_index.data_access)
464
        self.revisions = GroupCompressVersionedFiles(
465
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
466
                add_callback=self._pack_collection.revision_index.add_callback,
467
                parents=True, is_locked=self.is_locked),
468
            access=self._pack_collection.revision_index.data_access,
469
            delta=False)
470
        self.signatures = GroupCompressVersionedFiles(
471
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
472
                add_callback=self._pack_collection.signature_index.add_callback,
473
                parents=False, is_locked=self.is_locked),
474
            access=self._pack_collection.signature_index.data_access,
475
            delta=False)
476
        self.texts = GroupCompressVersionedFiles(
477
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
478
                add_callback=self._pack_collection.text_index.add_callback,
479
                parents=True, is_locked=self.is_locked),
480
            access=self._pack_collection.text_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
481
        if chk_support and _format.supports_chks:
482
            # No graph, no compression:- references from chks are between
483
            # different objects not temporal versions of the same; and without
484
            # some sort of temporal structure knit compression will just fail.
485
            self.chk_bytes = GroupCompressVersionedFiles(
486
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
487
                    add_callback=self._pack_collection.chk_index.add_callback,
488
                    parents=False, is_locked=self.is_locked),
489
                access=self._pack_collection.chk_index.data_access)
490
        else:
491
            self.chk_bytes = None
0.17.9 by Robert Collins
Initial stab at repository format support.
492
        # True when the repository object is 'write locked' (as opposed to the
493
        # physical lock only taken out around changes to the pack-names list.) 
494
        # Another way to represent this would be a decorator around the control
495
        # files object that presents logical locks as physical ones - if this
496
        # gets ugly consider that alternative design. RBC 20071011
497
        self._write_lock_count = 0
498
        self._transaction = None
499
        # for tests
500
        self._reconcile_does_inventory_gc = True
501
        self._reconcile_fixes_text_parents = True
502
        self._reconcile_backsup_inventory = False
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
503
        # Note: We cannot unpack a delta that references a text we haven't seen yet.
504
        #       there are 2 options, work in fulltexts, or require topological
505
        #       sorting. Using fulltexts is more optimal for local operations,
506
        #       because the source can be smart about extracting multiple
507
        #       in-a-row (and sharing strings). Topological is better for
508
        #       remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
509
        self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
510
        self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
511
        self._fetch_uses_deltas = False
0.17.9 by Robert Collins
Initial stab at repository format support.
512
513
0.17.26 by Robert Collins
Working better --gc-plain-chk.
514
if chk_support:
515
    class GCCHKPackRepository(CHKInventoryRepository):
516
        """GC customisation of CHKInventoryRepository."""
517
518
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
519
            _serializer):
520
            """Overridden to change pack collection class."""
521
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
522
                _commit_builder_class, _serializer)
523
            # and now replace everything it did :)
524
            index_transport = self._transport.clone('indices')
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
525
            self._pack_collection = GCRepositoryPackCollection(self,
526
                self._transport, index_transport,
527
                self._transport.clone('upload'),
528
                self._transport.clone('packs'),
529
                _format.index_builder_class,
530
                _format.index_class,
531
                use_chk_index=self._format.supports_chks,
532
                )
0.17.26 by Robert Collins
Working better --gc-plain-chk.
533
            self.inventories = GroupCompressVersionedFiles(
534
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
535
                    add_callback=self._pack_collection.inventory_index.add_callback,
536
                    parents=True, is_locked=self.is_locked),
537
                access=self._pack_collection.inventory_index.data_access)
538
            self.revisions = GroupCompressVersionedFiles(
539
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
540
                    add_callback=self._pack_collection.revision_index.add_callback,
541
                    parents=True, is_locked=self.is_locked),
542
                access=self._pack_collection.revision_index.data_access,
543
                delta=False)
544
            self.signatures = GroupCompressVersionedFiles(
545
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
546
                    add_callback=self._pack_collection.signature_index.add_callback,
547
                    parents=False, is_locked=self.is_locked),
548
                access=self._pack_collection.signature_index.data_access,
549
                delta=False)
550
            self.texts = GroupCompressVersionedFiles(
551
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
552
                    add_callback=self._pack_collection.text_index.add_callback,
553
                    parents=True, is_locked=self.is_locked),
554
                access=self._pack_collection.text_index.data_access)
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
555
            assert _format.supports_chks
556
            # No parents, individual CHK pages don't have specific ancestry
557
            self.chk_bytes = GroupCompressVersionedFiles(
558
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
559
                    add_callback=self._pack_collection.chk_index.add_callback,
560
                    parents=False, is_locked=self.is_locked),
561
                access=self._pack_collection.chk_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
562
            # True when the repository object is 'write locked' (as opposed to the
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
563
            # physical lock only taken out around changes to the pack-names list.)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
564
            # Another way to represent this would be a decorator around the control
565
            # files object that presents logical locks as physical ones - if this
566
            # gets ugly consider that alternative design. RBC 20071011
567
            self._write_lock_count = 0
568
            self._transaction = None
569
            # for tests
570
            self._reconcile_does_inventory_gc = True
571
            self._reconcile_fixes_text_parents = True
572
            self._reconcile_backsup_inventory = False
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
573
            self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
574
            self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
575
            self._fetch_uses_deltas = False
0.17.26 by Robert Collins
Working better --gc-plain-chk.
576
577
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
578
class RepositoryFormatPackGCPlain(RepositoryFormatPackDevelopment2):
0.17.9 by Robert Collins
Initial stab at repository format support.
579
    """A B+Tree index using pack repository."""
580
581
    repository_class = GCPackRepository
582
583
    def get_format_string(self):
584
        """See RepositoryFormat.get_format_string()."""
585
        return ("Bazaar development format - btree+gc "
586
            "(needs bzr.dev from 1.6)\n")
587
588
    def get_format_description(self):
589
        """See RepositoryFormat.get_format_description()."""
590
        return ("Development repository format - btree+groupcompress "
591
            ", interoperates with pack-0.92\n")
592
593
594
class RepositoryFormatPackGCRichRoot(RepositoryFormatKnitPack4):
595
    """A B+Tree index using pack repository."""
596
597
    repository_class = GCPackRepository
598
599
    def get_format_string(self):
600
        """See RepositoryFormat.get_format_string()."""
601
        return ("Bazaar development format - btree+gc-rich-root "
602
            "(needs bzr.dev from 1.6)\n")
603
604
    def get_format_description(self):
605
        """See RepositoryFormat.get_format_description()."""
606
        return ("Development repository format - btree+groupcompress "
607
            ", interoperates with rich-root-pack\n")
608
609
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
610
class RepositoryFormatPackGCSubtrees(RepositoryFormatPackDevelopment2Subtree):
0.17.9 by Robert Collins
Initial stab at repository format support.
611
    """A B+Tree index using pack repository."""
612
613
    repository_class = GCPackRepository
614
615
    def get_format_string(self):
616
        """See RepositoryFormat.get_format_string()."""
617
        return ("Bazaar development format - btree+gc-subtrees "
618
            "(needs bzr.dev from 1.6)\n")
619
620
    def get_format_description(self):
621
        """See RepositoryFormat.get_format_description()."""
622
        return ("Development repository format - btree+groupcompress "
623
            ", interoperates with pack-0.92-subtrees\n")
624
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
625
if chk_support:
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
626
    class RepositoryFormatPackGCPlainCHK(RepositoryFormatPackDevelopment5):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
627
        """A CHK+group compress pack repository."""
628
0.17.26 by Robert Collins
Working better --gc-plain-chk.
629
        repository_class = GCCHKPackRepository
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
630
631
        def get_format_string(self):
632
            """See RepositoryFormat.get_format_string()."""
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
633
            return ('Bazaar development format - chk+gc'
634
                    ' (needs bzr.dev from 1.13)\n')
635
636
        def get_format_description(self):
637
            """See RepositoryFormat.get_format_description()."""
638
            return ("Development repository format - chk+groupcompress")
639
0.21.2 by John Arbash Meinel
Bring in the trunk simplifications.
640
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
641
    class RepositoryFormatPackGCPlainCHK16(RepositoryFormatPackDevelopment5Hash16):
642
        """A hashed CHK+group compress pack repository."""
643
644
        repository_class = GCCHKPackRepository
645
646
        def get_format_string(self):
647
            """See RepositoryFormat.get_format_string()."""
648
            return ('Bazaar development format - hash16chk+gc'
649
                    ' (needs bzr.dev from 1.13)\n')
650
651
        def get_format_description(self):
652
            """See RepositoryFormat.get_format_description()."""
653
            return ("Development repository format - hash16chk+groupcompress")
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
654
655
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
656
##    class RepositoryFormatPackGCPlainCHK16b(RepositoryFormatPackDevelopment5Hash16b):
657
##        """A hashed CHK+group compress pack repository."""
658
##
659
##        repository_class = GCCHKPackRepository
660
##
661
##        def get_format_string(self):
662
##            """See RepositoryFormat.get_format_string()."""
663
##            return ('Bazaar development format - hash16bchk+gc'
664
##                    ' (needs bzr.dev from 1.13)\n')
665
##
666
##        def get_format_description(self):
667
##            """See RepositoryFormat.get_format_description()."""
668
##            return ("Development repository format - hash16bchk+groupcompress")
669
##
670
##
671
##    class RepositoryFormatPackGCPlainCHK63(RepositoryFormatPackDevelopment5Hash63):
672
##        """A hashed CHK+group compress pack repository."""
673
##
674
##        repository_class = GCCHKPackRepository
675
##
676
##        def get_format_string(self):
677
##            """See RepositoryFormat.get_format_string()."""
678
##            return ('Bazaar development format - hash63+gc'
679
##                    ' (needs bzr.dev from 1.13)\n')
680
##
681
##        def get_format_description(self):
682
##            """See RepositoryFormat.get_format_description()."""
683
##            return ("Development repository format - hash63+groupcompress")
684
##
685
##
686
##    class RepositoryFormatPackGCPlainCHK127a(RepositoryFormatPackDevelopment5Hash127a):
687
##        """A hashed CHK+group compress pack repository."""
688
##
689
##        repository_class = GCCHKPackRepository
690
##
691
##        def get_format_string(self):
692
##            """See RepositoryFormat.get_format_string()."""
693
##            return ('Bazaar development format - hash127a+gc'
694
##                    ' (needs bzr.dev from 1.13)\n')
695
##
696
##        def get_format_description(self):
697
##            """See RepositoryFormat.get_format_description()."""
698
##            return ("Development repository format - hash127a+groupcompress")
699
##
700
##
701
##    class RepositoryFormatPackGCPlainCHK127b(RepositoryFormatPackDevelopment5Hash127b):
702
##        """A hashed CHK+group compress pack repository."""
703
##
704
##        repository_class = GCCHKPackRepository
705
##
706
##        def get_format_string(self):
707
##            """See RepositoryFormat.get_format_string()."""
708
##            return ('Bazaar development format - hash127b+gc'
709
##                    ' (needs bzr.dev from 1.13)\n')
710
##
711
##        def get_format_description(self):
712
##            """See RepositoryFormat.get_format_description()."""
713
##            return ("Development repository format - hash127b+groupcompress")
714
715
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
716
    class RepositoryFormatPackGCPlainCHK255(RepositoryFormatPackDevelopment5Hash255):
717
        """A hashed CHK+group compress pack repository."""
718
719
        repository_class = GCCHKPackRepository
720
721
        def get_format_string(self):
722
            """See RepositoryFormat.get_format_string()."""
723
            return ('Bazaar development format - hash255chk+gc'
724
                    ' (needs bzr.dev from 1.13)\n')
725
726
        def get_format_description(self):
727
            """See RepositoryFormat.get_format_description()."""
728
            return ("Development repository format - hash255chk+groupcompress")
729
730
0.17.9 by Robert Collins
Initial stab at repository format support.
731
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
732
    """Be incompatible with the regular fetch code."""
0.17.9 by Robert Collins
Initial stab at repository format support.
733
    formats = (RepositoryFormatPackGCPlain, RepositoryFormatPackGCRichRoot,
734
        RepositoryFormatPackGCSubtrees)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
735
    if chk_support:
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
736
        formats = formats + (RepositoryFormatPackGCPlainCHK,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
737
                             RepositoryFormatPackGCPlainCHK16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
738
                             ## RepositoryFormatPackGCPlainCHK16b,
739
                             ## RepositoryFormatPackGCPlainCHK63,
740
                             ## RepositoryFormatPackGCPlainCHK127a,
741
                             ## RepositoryFormatPackGCPlainCHK127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
742
                             RepositoryFormatPackGCPlainCHK255)
0.17.10 by Robert Collins
Correct optimiser disabling.
743
    if isinstance(source._format, formats) or isinstance(target._format, formats):
0.17.9 by Robert Collins
Initial stab at repository format support.
744
        return False
745
    else:
746
        return orig_method(source, target)
747
748
749
InterPackRepo.is_compatible = staticmethod(pack_incompatible)