/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.9 by Robert Collins
Initial stab at repository format support.
1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
19
20
import md5
21
import time
22
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
23
from bzrlib import (
24
    debug,
25
    errors,
26
    knit,
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
27
    inventory,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
28
    pack,
29
    repository,
30
    ui,
31
    )
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
32
from bzrlib.btree_index import (
33
    BTreeBuilder,
34
    BTreeGraphIndex,
35
    )
0.17.9 by Robert Collins
Initial stab at repository format support.
36
from bzrlib.index import GraphIndex, GraphIndexBuilder
37
from bzrlib.repository import InterPackRepo
38
from bzrlib.plugins.groupcompress.groupcompress import (
39
    _GCGraphIndex,
40
    GroupCompressVersionedFiles,
41
    )
42
from bzrlib.osutils import rand_chars
43
from bzrlib.repofmt.pack_repo import (
44
    Pack,
45
    NewPack,
46
    KnitPackRepository,
47
    RepositoryPackCollection,
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
48
    RepositoryFormatPackDevelopment2,
49
    RepositoryFormatPackDevelopment2Subtree,
0.17.9 by Robert Collins
Initial stab at repository format support.
50
    RepositoryFormatKnitPack1,
51
    RepositoryFormatKnitPack3,
52
    RepositoryFormatKnitPack4,
53
    Packer,
54
    ReconcilePacker,
55
    OptimisingPacker,
56
    )
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
57
try:
58
    from bzrlib.repofmt.pack_repo import (
0.17.26 by Robert Collins
Working better --gc-plain-chk.
59
    CHKInventoryRepository,
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
60
    RepositoryFormatPackDevelopment5,
61
    RepositoryFormatPackDevelopment5Hash16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
62
##    RepositoryFormatPackDevelopment5Hash16b,
63
##    RepositoryFormatPackDevelopment5Hash63,
64
##    RepositoryFormatPackDevelopment5Hash127a,
65
##    RepositoryFormatPackDevelopment5Hash127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
66
    RepositoryFormatPackDevelopment5Hash255,
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
67
    )
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
68
    from bzrlib import chk_map
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
69
    chk_support = True
70
except ImportError:
71
    chk_support = False
0.17.9 by Robert Collins
Initial stab at repository format support.
72
from bzrlib import ui
73
74
75
def open_pack(self):
0.17.22 by Robert Collins
really get gc working with 1.10
76
    return self._pack_collection.pack_factory(self._pack_collection,
77
        upload_suffix=self.suffix,
0.17.9 by Robert Collins
Initial stab at repository format support.
78
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
79
80
81
Packer.open_pack = open_pack
82
83
84
class GCPack(NewPack):
85
0.17.22 by Robert Collins
really get gc working with 1.10
86
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
0.17.9 by Robert Collins
Initial stab at repository format support.
87
        """Create a NewPack instance.
88
89
        :param upload_transport: A writable transport for the pack to be
90
            incrementally uploaded to.
91
        :param index_transport: A writable transport for the pack's indices to
92
            be written to when the pack is finished.
93
        :param pack_transport: A writable transport for the pack to be renamed
94
            to when the upload is complete. This *must* be the same as
95
            upload_transport.clone('../packs').
96
        :param upload_suffix: An optional suffix to be given to any temporary
97
            files created during the pack creation. e.g '.autopack'
98
        :param file_mode: An optional file mode to create the new files with.
99
        """
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
100
        # replaced from bzr.dev to:
101
        # - change inventory reference list length to 1
102
        # - change texts reference lists to 1
103
        # TODO: patch this to be parameterised upstream
104
        
0.17.9 by Robert Collins
Initial stab at repository format support.
105
        # The relative locations of the packs are constrained, but all are
106
        # passed in because the caller has them, so as to avoid object churn.
0.17.22 by Robert Collins
really get gc working with 1.10
107
        index_builder_class = pack_collection._index_builder_class
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
108
        if chk_support:
109
            # from brisbane-core
110
            if pack_collection.chk_index is not None:
111
                chk_index = index_builder_class(reference_lists=0)
112
            else:
113
                chk_index = None
114
            Pack.__init__(self,
115
                # Revisions: parents list, no text compression.
116
                index_builder_class(reference_lists=1),
117
                # Inventory: We want to map compression only, but currently the
118
                # knit code hasn't been updated enough to understand that, so we
119
                # have a regular 2-list index giving parents and compression
120
                # source.
121
                index_builder_class(reference_lists=1),
122
                # Texts: compression and per file graph, for all fileids - so two
123
                # reference lists and two elements in the key tuple.
124
                index_builder_class(reference_lists=1, key_elements=2),
125
                # Signatures: Just blobs to store, no compression, no parents
126
                # listing.
127
                index_builder_class(reference_lists=0),
128
                # CHK based storage - just blobs, no compression or parents.
129
                chk_index=chk_index
130
                )
131
        else:
132
            # from bzr.dev
133
            Pack.__init__(self,
134
                # Revisions: parents list, no text compression.
135
                index_builder_class(reference_lists=1),
136
                # Inventory: compressed, with graph for compatibility with other
137
                # existing bzrlib code.
138
                index_builder_class(reference_lists=1),
139
                # Texts: per file graph:
140
                index_builder_class(reference_lists=1, key_elements=2),
141
                # Signatures: Just blobs to store, no compression, no parents
142
                # listing.
143
                index_builder_class(reference_lists=0),
144
                )
0.17.22 by Robert Collins
really get gc working with 1.10
145
        self._pack_collection = pack_collection
146
        # When we make readonly indices, we need this.
147
        self.index_class = pack_collection._index_class
0.17.9 by Robert Collins
Initial stab at repository format support.
148
        # where should the new pack be opened
0.17.22 by Robert Collins
really get gc working with 1.10
149
        self.upload_transport = pack_collection._upload_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
150
        # where are indices written out to
0.17.22 by Robert Collins
really get gc working with 1.10
151
        self.index_transport = pack_collection._index_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
152
        # where is the pack renamed to when it is finished?
0.17.22 by Robert Collins
really get gc working with 1.10
153
        self.pack_transport = pack_collection._pack_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
154
        # What file mode to upload the pack and indices with.
155
        self._file_mode = file_mode
156
        # tracks the content written to the .pack file.
157
        self._hash = md5.new()
158
        # a four-tuple with the length in bytes of the indices, once the pack
159
        # is finalised. (rev, inv, text, sigs)
160
        self.index_sizes = None
161
        # How much data to cache when writing packs. Note that this is not
162
        # synchronised with reads, because it's not in the transport layer, so
163
        # is not safe unless the client knows it won't be reading from the pack
164
        # under creation.
165
        self._cache_limit = 0
166
        # the temporary pack file name.
167
        self.random_name = rand_chars(20) + upload_suffix
168
        # when was this pack started ?
169
        self.start_time = time.time()
170
        # open an output stream for the data added to the pack.
171
        self.write_stream = self.upload_transport.open_write_stream(
172
            self.random_name, mode=self._file_mode)
173
        if 'pack' in debug.debug_flags:
174
            mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
175
                time.ctime(), self.upload_transport.base, self.random_name,
176
                time.time() - self.start_time)
177
        # A list of byte sequences to be written to the new pack, and the 
178
        # aggregate size of them.  Stored as a list rather than separate 
179
        # variables so that the _write_data closure below can update them.
180
        self._buffer = [[], 0]
181
        # create a callable for adding data 
182
        #
183
        # robertc says- this is a closure rather than a method on the object
184
        # so that the variables are locals, and faster than accessing object
185
        # members.
186
        def _write_data(bytes, flush=False, _buffer=self._buffer,
187
            _write=self.write_stream.write, _update=self._hash.update):
188
            _buffer[0].append(bytes)
189
            _buffer[1] += len(bytes)
190
            # buffer cap
191
            if _buffer[1] > self._cache_limit or flush:
192
                bytes = ''.join(_buffer[0])
193
                _write(bytes)
194
                _update(bytes)
195
                _buffer[:] = [[], 0]
196
        # expose this on self, for the occasion when clients want to add data.
197
        self._write_data = _write_data
198
        # a pack writer object to serialise pack records.
199
        self._writer = pack.ContainerWriter(self._write_data)
200
        self._writer.begin()
201
        # what state is the pack in? (open, finished, aborted)
202
        self._state = 'open'
203
204
205
RepositoryPackCollection.pack_factory = NewPack
206
207
class GCRepositoryPackCollection(RepositoryPackCollection):
208
209
    pack_factory = GCPack
210
211
    def _make_index(self, name, suffix):
212
        """Overridden to use BTreeGraphIndex objects."""
213
        size_offset = self._suffix_offsets[suffix]
214
        index_name = name + suffix
215
        index_size = self._names[name][size_offset]
216
        return BTreeGraphIndex(
217
            self._index_transport, index_name, index_size)
218
219
    def _start_write_group(self):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
220
        # Overridden to add 'self.pack_factory()'
0.17.9 by Robert Collins
Initial stab at repository format support.
221
        # Do not permit preparation for writing if we're not in a 'write lock'.
222
        if not self.repo.is_write_locked():
223
            raise errors.NotWriteLocked(self)
0.17.22 by Robert Collins
really get gc working with 1.10
224
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
0.17.9 by Robert Collins
Initial stab at repository format support.
225
            file_mode=self.repo.bzrdir._get_file_mode())
226
        # allow writing: queue writes to a new index
227
        self.revision_index.add_writable_index(self._new_pack.revision_index,
228
            self._new_pack)
229
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
230
            self._new_pack)
231
        self.text_index.add_writable_index(self._new_pack.text_index,
232
            self._new_pack)
233
        self.signature_index.add_writable_index(self._new_pack.signature_index,
234
            self._new_pack)
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
235
        if chk_support and self.chk_index is not None:
236
            self.chk_index.add_writable_index(self._new_pack.chk_index,
237
                self._new_pack)
238
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
0.17.9 by Robert Collins
Initial stab at repository format support.
239
240
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
241
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
242
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
243
        self.repo.texts._index._add_callback = self.text_index.add_callback
244
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
245
    def _get_filtered_inv_stream(self, source_vf, keys):
246
        """Filter the texts of inventories, to find the chk pages."""
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
247
        id_roots = []
248
        p_id_roots = []
249
        id_roots_set = set()
250
        p_id_roots_set = set()
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
251
        def _filter_inv_stream(stream):
252
            for idx, record in enumerate(stream):
253
                ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
254
                bytes = record.get_bytes_as('fulltext')
255
                chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
256
                key = chk_inv.id_to_entry.key()
257
                if key not in id_roots_set:
258
                    id_roots.append(key)
259
                    id_roots_set.add(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
260
                p_id_map = chk_inv.parent_id_basename_to_file_id
261
                if p_id_map is not None:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
262
                    key = p_id_map.key()
263
                    if key not in p_id_roots_set:
264
                        p_id_roots_set.add(key)
265
                        p_id_roots.append(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
266
                yield record
267
        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
268
        return _filter_inv_stream(stream), id_roots, p_id_roots
269
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
270
    def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots, pb=None):
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
271
        # We want to stream the keys from 'id_roots', and things they
272
        # reference, and then stream things from p_id_roots and things they
273
        # reference, and then any remaining keys that we didn't get to.
274
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
275
        # We also group referenced texts together, so if one root references a
276
        # text with prefix 'a', and another root references a node with prefix
277
        # 'a', we want to yield those nodes before we yield the nodes for 'b'
278
        # This keeps 'similar' nodes together
279
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
280
        # Note: We probably actually want multiple streams here, to help the
281
        #       client understand that the different levels won't compress well
282
        #       against eachother
283
        remaining_keys = set(keys)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
284
        counter = [0]
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
285
        def _get_referenced_stream(root_keys):
286
            cur_keys = root_keys
287
            while cur_keys:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
288
                keys_by_search_prefix = {}
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
289
                remaining_keys.difference_update(cur_keys)
290
                next_keys = set()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
291
                stream = source_vf.get_record_stream(cur_keys, 'as-requested',
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
292
                                                     True)
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
293
                def next_stream():
294
                    for record in stream:
295
                        bytes = record.get_bytes_as('fulltext')
296
                        # We don't care about search_key_func for this code,
297
                        # because we only care about external references.
298
                        node = chk_map._deserialise(bytes, record.key,
299
                                                    search_key_func=None)
300
                        common_base = node._search_prefix
301
                        if isinstance(node, chk_map.InternalNode):
302
                            for prefix, value in node._items.iteritems():
303
                                assert isinstance(value, tuple)
304
                                if value not in next_keys:
305
                                    keys_by_search_prefix.setdefault(prefix,
306
                                        []).append(value)
307
                                    next_keys.add(value)
308
                        counter[0] += 1
309
                        if pb is not None:
310
                            pb.update('chk node', counter[0])
311
                        yield record
312
                yield next_stream()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
313
                # Double check that we won't be emitting any keys twice
314
                next_keys = next_keys.intersection(remaining_keys)
315
                cur_keys = []
316
                for prefix in sorted(keys_by_search_prefix):
317
                    cur_keys.extend(keys_by_search_prefix[prefix])
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
318
        for stream in _get_referenced_stream(id_roots):
319
            yield stream
320
        for stream in _get_referenced_stream(p_id_roots):
321
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
322
        if remaining_keys:
323
            trace.note('There were %d keys in the chk index, which'
324
                       ' were not referenced from inventories',
325
                       len(remaining_keys))
326
            stream = source_vf.get_record_stream(remaining_keys, 'unordered',
327
                                                 True)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
328
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
329
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
330
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
331
                                 reload_func=None):
332
        """Execute a series of pack operations.
333
334
        :param pack_operations: A list of [revision_count, packs_to_combine].
335
        :param _packer_class: The class of packer to use (default: Packer).
336
        :return: None.
337
        """
338
        for revision_count, packs in pack_operations:
339
            # we may have no-ops from the setup logic
340
            if len(packs) == 0:
341
                continue
342
            # Create a new temp VersionedFile instance based on these packs,
343
            # and then just fetch everything into the target
344
345
            # XXX: Find a way to 'set_optimize' on the newly created pack
346
            #      indexes
347
            #    def open_pack(self):
348
            #       """Open a pack for the pack we are creating."""
349
            #       new_pack = super(OptimisingPacker, self).open_pack()
350
            #       # Turn on the optimization flags for all the index builders.
351
            #       new_pack.revision_index.set_optimize(for_size=True)
352
            #       new_pack.inventory_index.set_optimize(for_size=True)
353
            #       new_pack.text_index.set_optimize(for_size=True)
354
            #       new_pack.signature_index.set_optimize(for_size=True)
355
            #       return new_pack
356
            to_copy = [('revision_index', 'revisions'),
357
                       ('inventory_index', 'inventories'),
358
                       ('text_index', 'texts'),
359
                       ('signature_index', 'signatures'),
360
                      ]
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
361
            # TODO: This is a very non-optimal ordering for chk_bytes. The
362
            #       issue is that pages that are similar are not transmitted
363
            #       together. Perhaps get_record_stream('gc-optimal') should be
364
            #       taught about how to group chk pages?
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
365
            has_chk = False
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
366
            if getattr(self, 'chk_index', None) is not None:
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
367
                has_chk = True
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
368
                to_copy.insert(2, ('chk_index', 'chk_bytes'))
369
370
            # Shouldn't we start_write_group around this?
371
            if self._new_pack is not None:
372
                raise errors.BzrError('call to %s.pack() while another pack is'
373
                                      ' being written.'
374
                                      % (self.__class__.__name__,))
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
375
            new_pack = self.pack_factory(self, 'autopack',
376
                                         self.repo.bzrdir._get_file_mode())
377
            new_pack.set_write_cache_size(1024*1024)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
378
            # TODO: A better alternative is to probably use Packer.open_pack(), and
379
            #       then create a GroupCompressVersionedFiles() around the
380
            #       target pack to insert into.
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
381
            pb = ui.ui_factory.nested_progress_bar()
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
382
            try:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
383
                for idx, (index_name, vf_name) in enumerate(to_copy):
384
                    pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
385
                    keys = set()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
386
                    new_index = getattr(new_pack, index_name)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
387
                    new_index.set_optimize(for_size=True)
388
                    for pack in packs:
389
                        source_index = getattr(pack, index_name)
390
                        keys.update(e[1] for e in source_index.iter_all_entries())
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
391
                    source_vf = getattr(self.repo, vf_name)
392
                    target_access = knit._DirectPackAccess({})
393
                    target_access.set_writer(new_pack._writer, new_index,
394
                                             new_pack.access_tuple())
395
                    target_vf = GroupCompressVersionedFiles(
396
                        _GCGraphIndex(new_index,
397
                                      add_callback=new_index.add_nodes,
398
                                      parents=source_vf._index._parents,
399
                                      is_locked=self.repo.is_locked),
400
                        access=target_access,
401
                        delta=source_vf._delta)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
402
                    stream = None
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
403
                    child_pb = ui.ui_factory.nested_progress_bar()
404
                    try:
405
                        if has_chk:
406
                            if vf_name == 'inventories':
407
                                stream, id_roots, p_id_roots = self._get_filtered_inv_stream(
408
                                    source_vf, keys)
409
                            elif vf_name == 'chk_bytes':
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
410
                                for stream in self._get_chk_stream(source_vf, keys,
411
                                                    id_roots, p_id_roots,
412
                                                    pb=child_pb):
413
                                    target_vf.insert_record_stream(stream)
414
                                # No more to copy
415
                                stream = []
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
416
                        if stream is None:
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
417
                            def pb_stream():
418
                                substream = source_vf.get_record_stream(keys, 'gc-optimal', True)
419
                                for idx, record in enumerate(substream):
420
                                    child_pb.update(vf_name, idx, len(keys))
421
                                    yield record
422
                            stream = pb_stream()
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
423
                        target_vf.insert_record_stream(stream)
424
                    finally:
425
                        child_pb.finished()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
426
                new_pack._check_references() # shouldn't be needed
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
427
            except:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
428
                pb.finished()
429
                new_pack.abort()
430
                raise
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
431
            else:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
432
                pb.finished()
433
                if not new_pack.data_inserted():
434
                    raise AssertionError('We copied from pack files,'
435
                                         ' but had no data copied')
436
                    # we need to abort somehow, because we don't want to remove
437
                    # the other packs
438
                new_pack.finish()
439
                self.allocate(new_pack)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
440
            for pack in packs:
441
                self._remove_pack_from_memory(pack)
442
        # record the newly available packs and stop advertising the old
443
        # packs
444
        self._save_pack_names(clear_obsolete_packs=True)
445
        # Move the old packs out of the way now they are no longer referenced.
446
        for revision_count, packs in pack_operations:
447
            self._obsolete_packs(packs)
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
448
0.17.9 by Robert Collins
Initial stab at repository format support.
449
450
451
class GCPackRepository(KnitPackRepository):
452
    """GC customisation of KnitPackRepository."""
453
454
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
455
        _serializer):
456
        """Overridden to change pack collection class."""
457
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
458
            _commit_builder_class, _serializer)
459
        # and now replace everything it did :)
460
        index_transport = self._transport.clone('indices')
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
461
        if chk_support:
462
            self._pack_collection = GCRepositoryPackCollection(self,
463
                self._transport, index_transport,
464
                self._transport.clone('upload'),
465
                self._transport.clone('packs'),
466
                _format.index_builder_class,
467
                _format.index_class,
468
                use_chk_index=self._format.supports_chks,
469
                )
470
        else:
471
            self._pack_collection = GCRepositoryPackCollection(self,
472
                self._transport, index_transport,
473
                self._transport.clone('upload'),
474
                self._transport.clone('packs'),
475
                _format.index_builder_class,
476
                _format.index_class)
0.17.9 by Robert Collins
Initial stab at repository format support.
477
        self.inventories = GroupCompressVersionedFiles(
478
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
479
                add_callback=self._pack_collection.inventory_index.add_callback,
480
                parents=True, is_locked=self.is_locked),
481
            access=self._pack_collection.inventory_index.data_access)
482
        self.revisions = GroupCompressVersionedFiles(
483
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
484
                add_callback=self._pack_collection.revision_index.add_callback,
485
                parents=True, is_locked=self.is_locked),
486
            access=self._pack_collection.revision_index.data_access,
487
            delta=False)
488
        self.signatures = GroupCompressVersionedFiles(
489
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
490
                add_callback=self._pack_collection.signature_index.add_callback,
491
                parents=False, is_locked=self.is_locked),
492
            access=self._pack_collection.signature_index.data_access,
493
            delta=False)
494
        self.texts = GroupCompressVersionedFiles(
495
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
496
                add_callback=self._pack_collection.text_index.add_callback,
497
                parents=True, is_locked=self.is_locked),
498
            access=self._pack_collection.text_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
499
        if chk_support and _format.supports_chks:
500
            # No graph, no compression:- references from chks are between
501
            # different objects not temporal versions of the same; and without
502
            # some sort of temporal structure knit compression will just fail.
503
            self.chk_bytes = GroupCompressVersionedFiles(
504
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
505
                    add_callback=self._pack_collection.chk_index.add_callback,
506
                    parents=False, is_locked=self.is_locked),
507
                access=self._pack_collection.chk_index.data_access)
508
        else:
509
            self.chk_bytes = None
0.17.9 by Robert Collins
Initial stab at repository format support.
510
        # True when the repository object is 'write locked' (as opposed to the
511
        # physical lock only taken out around changes to the pack-names list.) 
512
        # Another way to represent this would be a decorator around the control
513
        # files object that presents logical locks as physical ones - if this
514
        # gets ugly consider that alternative design. RBC 20071011
515
        self._write_lock_count = 0
516
        self._transaction = None
517
        # for tests
518
        self._reconcile_does_inventory_gc = True
519
        self._reconcile_fixes_text_parents = True
520
        self._reconcile_backsup_inventory = False
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
521
        # Note: We cannot unpack a delta that references a text we haven't seen yet.
522
        #       there are 2 options, work in fulltexts, or require topological
523
        #       sorting. Using fulltexts is more optimal for local operations,
524
        #       because the source can be smart about extracting multiple
525
        #       in-a-row (and sharing strings). Topological is better for
526
        #       remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
527
        self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
528
        self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
529
        self._fetch_uses_deltas = False
0.17.9 by Robert Collins
Initial stab at repository format support.
530
531
0.17.26 by Robert Collins
Working better --gc-plain-chk.
532
if chk_support:
533
    class GCCHKPackRepository(CHKInventoryRepository):
534
        """GC customisation of CHKInventoryRepository."""
535
536
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
537
            _serializer):
538
            """Overridden to change pack collection class."""
539
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
540
                _commit_builder_class, _serializer)
541
            # and now replace everything it did :)
542
            index_transport = self._transport.clone('indices')
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
543
            self._pack_collection = GCRepositoryPackCollection(self,
544
                self._transport, index_transport,
545
                self._transport.clone('upload'),
546
                self._transport.clone('packs'),
547
                _format.index_builder_class,
548
                _format.index_class,
549
                use_chk_index=self._format.supports_chks,
550
                )
0.17.26 by Robert Collins
Working better --gc-plain-chk.
551
            self.inventories = GroupCompressVersionedFiles(
552
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
553
                    add_callback=self._pack_collection.inventory_index.add_callback,
554
                    parents=True, is_locked=self.is_locked),
555
                access=self._pack_collection.inventory_index.data_access)
556
            self.revisions = GroupCompressVersionedFiles(
557
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
558
                    add_callback=self._pack_collection.revision_index.add_callback,
559
                    parents=True, is_locked=self.is_locked),
560
                access=self._pack_collection.revision_index.data_access,
561
                delta=False)
562
            self.signatures = GroupCompressVersionedFiles(
563
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
564
                    add_callback=self._pack_collection.signature_index.add_callback,
565
                    parents=False, is_locked=self.is_locked),
566
                access=self._pack_collection.signature_index.data_access,
567
                delta=False)
568
            self.texts = GroupCompressVersionedFiles(
569
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
570
                    add_callback=self._pack_collection.text_index.add_callback,
571
                    parents=True, is_locked=self.is_locked),
572
                access=self._pack_collection.text_index.data_access)
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
573
            assert _format.supports_chks
574
            # No parents, individual CHK pages don't have specific ancestry
575
            self.chk_bytes = GroupCompressVersionedFiles(
576
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
577
                    add_callback=self._pack_collection.chk_index.add_callback,
578
                    parents=False, is_locked=self.is_locked),
579
                access=self._pack_collection.chk_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
580
            # True when the repository object is 'write locked' (as opposed to the
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
581
            # physical lock only taken out around changes to the pack-names list.)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
582
            # Another way to represent this would be a decorator around the control
583
            # files object that presents logical locks as physical ones - if this
584
            # gets ugly consider that alternative design. RBC 20071011
585
            self._write_lock_count = 0
586
            self._transaction = None
587
            # for tests
588
            self._reconcile_does_inventory_gc = True
589
            self._reconcile_fixes_text_parents = True
590
            self._reconcile_backsup_inventory = False
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
591
            self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
592
            self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
593
            self._fetch_uses_deltas = False
0.17.26 by Robert Collins
Working better --gc-plain-chk.
594
595
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
596
class RepositoryFormatPackGCPlain(RepositoryFormatPackDevelopment2):
0.17.9 by Robert Collins
Initial stab at repository format support.
597
    """A B+Tree index using pack repository."""
598
599
    repository_class = GCPackRepository
600
601
    def get_format_string(self):
602
        """See RepositoryFormat.get_format_string()."""
603
        return ("Bazaar development format - btree+gc "
604
            "(needs bzr.dev from 1.6)\n")
605
606
    def get_format_description(self):
607
        """See RepositoryFormat.get_format_description()."""
608
        return ("Development repository format - btree+groupcompress "
609
            ", interoperates with pack-0.92\n")
610
611
612
class RepositoryFormatPackGCRichRoot(RepositoryFormatKnitPack4):
613
    """A B+Tree index using pack repository."""
614
615
    repository_class = GCPackRepository
616
617
    def get_format_string(self):
618
        """See RepositoryFormat.get_format_string()."""
619
        return ("Bazaar development format - btree+gc-rich-root "
620
            "(needs bzr.dev from 1.6)\n")
621
622
    def get_format_description(self):
623
        """See RepositoryFormat.get_format_description()."""
624
        return ("Development repository format - btree+groupcompress "
625
            ", interoperates with rich-root-pack\n")
626
627
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
628
class RepositoryFormatPackGCSubtrees(RepositoryFormatPackDevelopment2Subtree):
0.17.9 by Robert Collins
Initial stab at repository format support.
629
    """A B+Tree index using pack repository."""
630
631
    repository_class = GCPackRepository
632
633
    def get_format_string(self):
634
        """See RepositoryFormat.get_format_string()."""
635
        return ("Bazaar development format - btree+gc-subtrees "
636
            "(needs bzr.dev from 1.6)\n")
637
638
    def get_format_description(self):
639
        """See RepositoryFormat.get_format_description()."""
640
        return ("Development repository format - btree+groupcompress "
641
            ", interoperates with pack-0.92-subtrees\n")
642
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
643
if chk_support:
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
644
    class RepositoryFormatPackGCPlainCHK(RepositoryFormatPackDevelopment5):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
645
        """A CHK+group compress pack repository."""
646
0.17.26 by Robert Collins
Working better --gc-plain-chk.
647
        repository_class = GCCHKPackRepository
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
648
649
        def get_format_string(self):
650
            """See RepositoryFormat.get_format_string()."""
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
651
            return ('Bazaar development format - chk+gc'
652
                    ' (needs bzr.dev from 1.13)\n')
653
654
        def get_format_description(self):
655
            """See RepositoryFormat.get_format_description()."""
656
            return ("Development repository format - chk+groupcompress")
657
0.21.2 by John Arbash Meinel
Bring in the trunk simplifications.
658
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
659
    class RepositoryFormatPackGCPlainCHK16(RepositoryFormatPackDevelopment5Hash16):
660
        """A hashed CHK+group compress pack repository."""
661
662
        repository_class = GCCHKPackRepository
663
664
        def get_format_string(self):
665
            """See RepositoryFormat.get_format_string()."""
666
            return ('Bazaar development format - hash16chk+gc'
667
                    ' (needs bzr.dev from 1.13)\n')
668
669
        def get_format_description(self):
670
            """See RepositoryFormat.get_format_description()."""
671
            return ("Development repository format - hash16chk+groupcompress")
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
672
673
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
674
##    class RepositoryFormatPackGCPlainCHK16b(RepositoryFormatPackDevelopment5Hash16b):
675
##        """A hashed CHK+group compress pack repository."""
676
##
677
##        repository_class = GCCHKPackRepository
678
##
679
##        def get_format_string(self):
680
##            """See RepositoryFormat.get_format_string()."""
681
##            return ('Bazaar development format - hash16bchk+gc'
682
##                    ' (needs bzr.dev from 1.13)\n')
683
##
684
##        def get_format_description(self):
685
##            """See RepositoryFormat.get_format_description()."""
686
##            return ("Development repository format - hash16bchk+groupcompress")
687
##
688
##
689
##    class RepositoryFormatPackGCPlainCHK63(RepositoryFormatPackDevelopment5Hash63):
690
##        """A hashed CHK+group compress pack repository."""
691
##
692
##        repository_class = GCCHKPackRepository
693
##
694
##        def get_format_string(self):
695
##            """See RepositoryFormat.get_format_string()."""
696
##            return ('Bazaar development format - hash63+gc'
697
##                    ' (needs bzr.dev from 1.13)\n')
698
##
699
##        def get_format_description(self):
700
##            """See RepositoryFormat.get_format_description()."""
701
##            return ("Development repository format - hash63+groupcompress")
702
##
703
##
704
##    class RepositoryFormatPackGCPlainCHK127a(RepositoryFormatPackDevelopment5Hash127a):
705
##        """A hashed CHK+group compress pack repository."""
706
##
707
##        repository_class = GCCHKPackRepository
708
##
709
##        def get_format_string(self):
710
##            """See RepositoryFormat.get_format_string()."""
711
##            return ('Bazaar development format - hash127a+gc'
712
##                    ' (needs bzr.dev from 1.13)\n')
713
##
714
##        def get_format_description(self):
715
##            """See RepositoryFormat.get_format_description()."""
716
##            return ("Development repository format - hash127a+groupcompress")
717
##
718
##
719
##    class RepositoryFormatPackGCPlainCHK127b(RepositoryFormatPackDevelopment5Hash127b):
720
##        """A hashed CHK+group compress pack repository."""
721
##
722
##        repository_class = GCCHKPackRepository
723
##
724
##        def get_format_string(self):
725
##            """See RepositoryFormat.get_format_string()."""
726
##            return ('Bazaar development format - hash127b+gc'
727
##                    ' (needs bzr.dev from 1.13)\n')
728
##
729
##        def get_format_description(self):
730
##            """See RepositoryFormat.get_format_description()."""
731
##            return ("Development repository format - hash127b+groupcompress")
732
733
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
734
    class RepositoryFormatPackGCPlainCHK255(RepositoryFormatPackDevelopment5Hash255):
735
        """A hashed CHK+group compress pack repository."""
736
737
        repository_class = GCCHKPackRepository
738
739
        def get_format_string(self):
740
            """See RepositoryFormat.get_format_string()."""
741
            return ('Bazaar development format - hash255chk+gc'
742
                    ' (needs bzr.dev from 1.13)\n')
743
744
        def get_format_description(self):
745
            """See RepositoryFormat.get_format_description()."""
746
            return ("Development repository format - hash255chk+groupcompress")
747
748
0.17.9 by Robert Collins
Initial stab at repository format support.
749
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
750
    """Be incompatible with the regular fetch code."""
0.17.9 by Robert Collins
Initial stab at repository format support.
751
    formats = (RepositoryFormatPackGCPlain, RepositoryFormatPackGCRichRoot,
752
        RepositoryFormatPackGCSubtrees)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
753
    if chk_support:
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
754
        formats = formats + (RepositoryFormatPackGCPlainCHK,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
755
                             RepositoryFormatPackGCPlainCHK16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
756
                             ## RepositoryFormatPackGCPlainCHK16b,
757
                             ## RepositoryFormatPackGCPlainCHK63,
758
                             ## RepositoryFormatPackGCPlainCHK127a,
759
                             ## RepositoryFormatPackGCPlainCHK127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
760
                             RepositoryFormatPackGCPlainCHK255)
0.17.10 by Robert Collins
Correct optimiser disabling.
761
    if isinstance(source._format, formats) or isinstance(target._format, formats):
0.17.9 by Robert Collins
Initial stab at repository format support.
762
        return False
763
    else:
764
        return orig_method(source, target)
765
766
767
InterPackRepo.is_compatible = staticmethod(pack_incompatible)