/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.9 by Robert Collins
Initial stab at repository format support.
1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
19
20
import md5
21
import time
22
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
23
from bzrlib import (
24
    debug,
25
    errors,
26
    knit,
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
27
    inventory,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
28
    pack,
29
    repository,
30
    ui,
31
    )
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
32
from bzrlib.btree_index import (
33
    BTreeBuilder,
34
    BTreeGraphIndex,
35
    )
0.17.9 by Robert Collins
Initial stab at repository format support.
36
from bzrlib.index import GraphIndex, GraphIndexBuilder
37
from bzrlib.repository import InterPackRepo
38
from bzrlib.plugins.groupcompress.groupcompress import (
39
    _GCGraphIndex,
40
    GroupCompressVersionedFiles,
41
    )
42
from bzrlib.osutils import rand_chars
43
from bzrlib.repofmt.pack_repo import (
44
    Pack,
45
    NewPack,
46
    KnitPackRepository,
47
    RepositoryPackCollection,
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
48
    RepositoryFormatPackDevelopment2,
49
    RepositoryFormatPackDevelopment2Subtree,
0.17.9 by Robert Collins
Initial stab at repository format support.
50
    RepositoryFormatKnitPack1,
51
    RepositoryFormatKnitPack3,
52
    RepositoryFormatKnitPack4,
53
    Packer,
54
    ReconcilePacker,
55
    OptimisingPacker,
56
    )
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
57
try:
58
    from bzrlib.repofmt.pack_repo import (
0.17.26 by Robert Collins
Working better --gc-plain-chk.
59
    CHKInventoryRepository,
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
60
    RepositoryFormatPackDevelopment5,
61
    RepositoryFormatPackDevelopment5Hash16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
62
##    RepositoryFormatPackDevelopment5Hash16b,
63
##    RepositoryFormatPackDevelopment5Hash63,
64
##    RepositoryFormatPackDevelopment5Hash127a,
65
##    RepositoryFormatPackDevelopment5Hash127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
66
    RepositoryFormatPackDevelopment5Hash255,
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
67
    )
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
68
    from bzrlib import chk_map
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
69
    chk_support = True
70
except ImportError:
71
    chk_support = False
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
72
from bzrlib.trace import mutter, note
0.17.9 by Robert Collins
Initial stab at repository format support.
73
74
75
def open_pack(self):
0.17.22 by Robert Collins
really get gc working with 1.10
76
    return self._pack_collection.pack_factory(self._pack_collection,
77
        upload_suffix=self.suffix,
0.17.9 by Robert Collins
Initial stab at repository format support.
78
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
79
80
81
Packer.open_pack = open_pack
82
83
84
class GCPack(NewPack):
85
0.17.22 by Robert Collins
really get gc working with 1.10
86
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
0.17.9 by Robert Collins
Initial stab at repository format support.
87
        """Create a NewPack instance.
88
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
89
        :param pack_collection: A PackCollection into which this is being
90
            inserted.
0.17.9 by Robert Collins
Initial stab at repository format support.
91
        :param upload_suffix: An optional suffix to be given to any temporary
92
            files created during the pack creation. e.g '.autopack'
93
        :param file_mode: An optional file mode to create the new files with.
94
        """
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
95
        # replaced from bzr.dev to:
96
        # - change inventory reference list length to 1
97
        # - change texts reference lists to 1
98
        # TODO: patch this to be parameterised upstream
99
        
0.17.9 by Robert Collins
Initial stab at repository format support.
100
        # The relative locations of the packs are constrained, but all are
101
        # passed in because the caller has them, so as to avoid object churn.
0.17.22 by Robert Collins
really get gc working with 1.10
102
        index_builder_class = pack_collection._index_builder_class
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
103
        if chk_support:
104
            # from brisbane-core
105
            if pack_collection.chk_index is not None:
106
                chk_index = index_builder_class(reference_lists=0)
107
            else:
108
                chk_index = None
109
            Pack.__init__(self,
110
                # Revisions: parents list, no text compression.
111
                index_builder_class(reference_lists=1),
112
                # Inventory: We want to map compression only, but currently the
113
                # knit code hasn't been updated enough to understand that, so we
114
                # have a regular 2-list index giving parents and compression
115
                # source.
116
                index_builder_class(reference_lists=1),
117
                # Texts: compression and per file graph, for all fileids - so two
118
                # reference lists and two elements in the key tuple.
119
                index_builder_class(reference_lists=1, key_elements=2),
120
                # Signatures: Just blobs to store, no compression, no parents
121
                # listing.
122
                index_builder_class(reference_lists=0),
123
                # CHK based storage - just blobs, no compression or parents.
124
                chk_index=chk_index
125
                )
126
        else:
127
            # from bzr.dev
128
            Pack.__init__(self,
129
                # Revisions: parents list, no text compression.
130
                index_builder_class(reference_lists=1),
131
                # Inventory: compressed, with graph for compatibility with other
132
                # existing bzrlib code.
133
                index_builder_class(reference_lists=1),
134
                # Texts: per file graph:
135
                index_builder_class(reference_lists=1, key_elements=2),
136
                # Signatures: Just blobs to store, no compression, no parents
137
                # listing.
138
                index_builder_class(reference_lists=0),
139
                )
0.17.22 by Robert Collins
really get gc working with 1.10
140
        self._pack_collection = pack_collection
141
        # When we make readonly indices, we need this.
142
        self.index_class = pack_collection._index_class
0.17.9 by Robert Collins
Initial stab at repository format support.
143
        # where should the new pack be opened
0.17.22 by Robert Collins
really get gc working with 1.10
144
        self.upload_transport = pack_collection._upload_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
145
        # where are indices written out to
0.17.22 by Robert Collins
really get gc working with 1.10
146
        self.index_transport = pack_collection._index_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
147
        # where is the pack renamed to when it is finished?
0.17.22 by Robert Collins
really get gc working with 1.10
148
        self.pack_transport = pack_collection._pack_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
149
        # What file mode to upload the pack and indices with.
150
        self._file_mode = file_mode
151
        # tracks the content written to the .pack file.
152
        self._hash = md5.new()
153
        # a four-tuple with the length in bytes of the indices, once the pack
154
        # is finalised. (rev, inv, text, sigs)
155
        self.index_sizes = None
156
        # How much data to cache when writing packs. Note that this is not
157
        # synchronised with reads, because it's not in the transport layer, so
158
        # is not safe unless the client knows it won't be reading from the pack
159
        # under creation.
160
        self._cache_limit = 0
161
        # the temporary pack file name.
162
        self.random_name = rand_chars(20) + upload_suffix
163
        # when was this pack started ?
164
        self.start_time = time.time()
165
        # open an output stream for the data added to the pack.
166
        self.write_stream = self.upload_transport.open_write_stream(
167
            self.random_name, mode=self._file_mode)
168
        if 'pack' in debug.debug_flags:
169
            mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
170
                time.ctime(), self.upload_transport.base, self.random_name,
171
                time.time() - self.start_time)
172
        # A list of byte sequences to be written to the new pack, and the 
173
        # aggregate size of them.  Stored as a list rather than separate 
174
        # variables so that the _write_data closure below can update them.
175
        self._buffer = [[], 0]
176
        # create a callable for adding data 
177
        #
178
        # robertc says- this is a closure rather than a method on the object
179
        # so that the variables are locals, and faster than accessing object
180
        # members.
181
        def _write_data(bytes, flush=False, _buffer=self._buffer,
182
            _write=self.write_stream.write, _update=self._hash.update):
183
            _buffer[0].append(bytes)
184
            _buffer[1] += len(bytes)
185
            # buffer cap
186
            if _buffer[1] > self._cache_limit or flush:
187
                bytes = ''.join(_buffer[0])
188
                _write(bytes)
189
                _update(bytes)
190
                _buffer[:] = [[], 0]
191
        # expose this on self, for the occasion when clients want to add data.
192
        self._write_data = _write_data
193
        # a pack writer object to serialise pack records.
194
        self._writer = pack.ContainerWriter(self._write_data)
195
        self._writer.begin()
196
        # what state is the pack in? (open, finished, aborted)
197
        self._state = 'open'
198
199
200
RepositoryPackCollection.pack_factory = NewPack
201
202
class GCRepositoryPackCollection(RepositoryPackCollection):
203
204
    pack_factory = GCPack
205
206
    def _make_index(self, name, suffix):
207
        """Overridden to use BTreeGraphIndex objects."""
208
        size_offset = self._suffix_offsets[suffix]
209
        index_name = name + suffix
210
        index_size = self._names[name][size_offset]
211
        return BTreeGraphIndex(
212
            self._index_transport, index_name, index_size)
213
214
    def _start_write_group(self):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
215
        # Overridden to add 'self.pack_factory()'
0.17.9 by Robert Collins
Initial stab at repository format support.
216
        # Do not permit preparation for writing if we're not in a 'write lock'.
217
        if not self.repo.is_write_locked():
218
            raise errors.NotWriteLocked(self)
0.17.22 by Robert Collins
really get gc working with 1.10
219
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
0.17.9 by Robert Collins
Initial stab at repository format support.
220
            file_mode=self.repo.bzrdir._get_file_mode())
221
        # allow writing: queue writes to a new index
222
        self.revision_index.add_writable_index(self._new_pack.revision_index,
223
            self._new_pack)
224
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
225
            self._new_pack)
226
        self.text_index.add_writable_index(self._new_pack.text_index,
227
            self._new_pack)
228
        self.signature_index.add_writable_index(self._new_pack.signature_index,
229
            self._new_pack)
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
230
        if chk_support and self.chk_index is not None:
231
            self.chk_index.add_writable_index(self._new_pack.chk_index,
232
                self._new_pack)
233
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
0.17.9 by Robert Collins
Initial stab at repository format support.
234
235
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
236
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
237
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
238
        self.repo.texts._index._add_callback = self.text_index.add_callback
239
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
240
    def _get_filtered_inv_stream(self, source_vf, keys):
241
        """Filter the texts of inventories, to find the chk pages."""
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
242
        id_roots = []
243
        p_id_roots = []
244
        id_roots_set = set()
245
        p_id_roots_set = set()
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
246
        def _filter_inv_stream(stream):
247
            for idx, record in enumerate(stream):
248
                ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
249
                bytes = record.get_bytes_as('fulltext')
250
                chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
251
                key = chk_inv.id_to_entry.key()
252
                if key not in id_roots_set:
253
                    id_roots.append(key)
254
                    id_roots_set.add(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
255
                p_id_map = chk_inv.parent_id_basename_to_file_id
256
                if p_id_map is not None:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
257
                    key = p_id_map.key()
258
                    if key not in p_id_roots_set:
259
                        p_id_roots_set.add(key)
260
                        p_id_roots.append(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
261
                yield record
262
        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
263
        return _filter_inv_stream(stream), id_roots, p_id_roots
264
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
265
    def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots, pb=None):
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
266
        # We want to stream the keys from 'id_roots', and things they
267
        # reference, and then stream things from p_id_roots and things they
268
        # reference, and then any remaining keys that we didn't get to.
269
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
270
        # We also group referenced texts together, so if one root references a
271
        # text with prefix 'a', and another root references a node with prefix
272
        # 'a', we want to yield those nodes before we yield the nodes for 'b'
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
273
        # This keeps 'similar' nodes together.
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
274
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
275
        # Note: We probably actually want multiple streams here, to help the
276
        #       client understand that the different levels won't compress well
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
277
        #       against each other.
0.20.27 by John Arbash Meinel
Update a Note/Todo
278
        #       Test the difference between using one Group per level, and
279
        #       using 1 Group per prefix. (so '' (root) would get a group, then
280
        #       all the references to search-key 'a' would get a group, etc.)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
281
        remaining_keys = set(keys)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
282
        counter = [0]
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
283
        def _get_referenced_stream(root_keys):
284
            cur_keys = root_keys
285
            while cur_keys:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
286
                keys_by_search_prefix = {}
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
287
                remaining_keys.difference_update(cur_keys)
288
                next_keys = set()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
289
                stream = source_vf.get_record_stream(cur_keys, 'as-requested',
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
290
                                                     True)
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
291
                def next_stream():
292
                    for record in stream:
293
                        bytes = record.get_bytes_as('fulltext')
294
                        # We don't care about search_key_func for this code,
295
                        # because we only care about external references.
296
                        node = chk_map._deserialise(bytes, record.key,
297
                                                    search_key_func=None)
298
                        common_base = node._search_prefix
299
                        if isinstance(node, chk_map.InternalNode):
300
                            for prefix, value in node._items.iteritems():
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
301
                                if not isinstance(value, tuple):
302
                                    raise AssertionError("value is %s when"
303
                                        " tuple expected" % (value.__class__))
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
304
                                if value not in next_keys:
305
                                    keys_by_search_prefix.setdefault(prefix,
306
                                        []).append(value)
307
                                    next_keys.add(value)
308
                        counter[0] += 1
309
                        if pb is not None:
310
                            pb.update('chk node', counter[0])
311
                        yield record
312
                yield next_stream()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
313
                # Double check that we won't be emitting any keys twice
314
                next_keys = next_keys.intersection(remaining_keys)
315
                cur_keys = []
316
                for prefix in sorted(keys_by_search_prefix):
317
                    cur_keys.extend(keys_by_search_prefix[prefix])
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
318
        for stream in _get_referenced_stream(id_roots):
319
            yield stream
320
        for stream in _get_referenced_stream(p_id_roots):
321
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
322
        if remaining_keys:
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
323
            note('There were %d keys in the chk index, which were not'
324
                ' referenced from inventories', len(remaining_keys))
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
325
            stream = source_vf.get_record_stream(remaining_keys, 'unordered',
326
                                                 True)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
327
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
328
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
329
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
330
                                 reload_func=None):
331
        """Execute a series of pack operations.
332
333
        :param pack_operations: A list of [revision_count, packs_to_combine].
334
        :param _packer_class: The class of packer to use (default: Packer).
335
        :return: None.
336
        """
337
        for revision_count, packs in pack_operations:
338
            # we may have no-ops from the setup logic
339
            if len(packs) == 0:
340
                continue
341
            # Create a new temp VersionedFile instance based on these packs,
342
            # and then just fetch everything into the target
343
344
            to_copy = [('revision_index', 'revisions'),
345
                       ('inventory_index', 'inventories'),
346
                       ('text_index', 'texts'),
347
                       ('signature_index', 'signatures'),
348
                      ]
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
349
            # TODO: This is a very non-optimal ordering for chk_bytes. The
350
            #       issue is that pages that are similar are not transmitted
351
            #       together. Perhaps get_record_stream('gc-optimal') should be
352
            #       taught about how to group chk pages?
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
353
            has_chk = False
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
354
            if getattr(self, 'chk_index', None) is not None:
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
355
                has_chk = True
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
356
                to_copy.insert(2, ('chk_index', 'chk_bytes'))
357
358
            # Shouldn't we start_write_group around this?
359
            if self._new_pack is not None:
360
                raise errors.BzrError('call to %s.pack() while another pack is'
361
                                      ' being written.'
362
                                      % (self.__class__.__name__,))
0.20.33 by John Arbash Meinel
Properly name the file XXX.autopack rather than XXXautopack
363
            new_pack = self.pack_factory(self, '.autopack',
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
364
                file_mode=self.repo.bzrdir._get_file_mode())
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
365
            new_pack.set_write_cache_size(1024*1024)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
366
            # TODO: A better alternative is to probably use Packer.open_pack(), and
367
            #       then create a GroupCompressVersionedFiles() around the
368
            #       target pack to insert into.
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
369
            pb = ui.ui_factory.nested_progress_bar()
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
370
            try:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
371
                for idx, (index_name, vf_name) in enumerate(to_copy):
372
                    pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
373
                    keys = set()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
374
                    new_index = getattr(new_pack, index_name)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
375
                    new_index.set_optimize(for_size=True)
376
                    for pack in packs:
377
                        source_index = getattr(pack, index_name)
378
                        keys.update(e[1] for e in source_index.iter_all_entries())
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
379
                    source_vf = getattr(self.repo, vf_name)
380
                    target_access = knit._DirectPackAccess({})
381
                    target_access.set_writer(new_pack._writer, new_index,
382
                                             new_pack.access_tuple())
383
                    target_vf = GroupCompressVersionedFiles(
384
                        _GCGraphIndex(new_index,
385
                                      add_callback=new_index.add_nodes,
386
                                      parents=source_vf._index._parents,
387
                                      is_locked=self.repo.is_locked),
388
                        access=target_access,
389
                        delta=source_vf._delta)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
390
                    stream = None
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
391
                    child_pb = ui.ui_factory.nested_progress_bar()
392
                    try:
393
                        if has_chk:
394
                            if vf_name == 'inventories':
395
                                stream, id_roots, p_id_roots = self._get_filtered_inv_stream(
396
                                    source_vf, keys)
397
                            elif vf_name == 'chk_bytes':
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
398
                                for stream in self._get_chk_stream(source_vf, keys,
399
                                                    id_roots, p_id_roots,
400
                                                    pb=child_pb):
401
                                    target_vf.insert_record_stream(stream)
402
                                # No more to copy
403
                                stream = []
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
404
                        if stream is None:
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
405
                            def pb_stream():
406
                                substream = source_vf.get_record_stream(keys, 'gc-optimal', True)
407
                                for idx, record in enumerate(substream):
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
408
                                    child_pb.update(vf_name, idx + 1, len(keys))
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
409
                                    yield record
410
                            stream = pb_stream()
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
411
                        target_vf.insert_record_stream(stream)
412
                    finally:
413
                        child_pb.finished()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
414
                new_pack._check_references() # shouldn't be needed
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
415
            except:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
416
                pb.finished()
417
                new_pack.abort()
418
                raise
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
419
            else:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
420
                pb.finished()
421
                if not new_pack.data_inserted():
422
                    raise AssertionError('We copied from pack files,'
423
                                         ' but had no data copied')
424
                    # we need to abort somehow, because we don't want to remove
425
                    # the other packs
426
                new_pack.finish()
427
                self.allocate(new_pack)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
428
            for pack in packs:
429
                self._remove_pack_from_memory(pack)
430
        # record the newly available packs and stop advertising the old
431
        # packs
432
        self._save_pack_names(clear_obsolete_packs=True)
433
        # Move the old packs out of the way now they are no longer referenced.
434
        for revision_count, packs in pack_operations:
435
            self._obsolete_packs(packs)
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
436
0.17.9 by Robert Collins
Initial stab at repository format support.
437
438
439
class GCPackRepository(KnitPackRepository):
440
    """GC customisation of KnitPackRepository."""
441
0.20.31 by Ian Clatworthy
add coment suggesting a simplification in repofmt.py
442
    # Note: I think the CHK support can be dropped from this class as it's
443
    # implemented via the GCCHKPackRepository class defined next. IGC 20090301
444
0.17.9 by Robert Collins
Initial stab at repository format support.
445
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
446
        _serializer):
447
        """Overridden to change pack collection class."""
448
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
449
            _commit_builder_class, _serializer)
450
        # and now replace everything it did :)
451
        index_transport = self._transport.clone('indices')
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
452
        if chk_support:
453
            self._pack_collection = GCRepositoryPackCollection(self,
454
                self._transport, index_transport,
455
                self._transport.clone('upload'),
456
                self._transport.clone('packs'),
457
                _format.index_builder_class,
458
                _format.index_class,
459
                use_chk_index=self._format.supports_chks,
460
                )
461
        else:
462
            self._pack_collection = GCRepositoryPackCollection(self,
463
                self._transport, index_transport,
464
                self._transport.clone('upload'),
465
                self._transport.clone('packs'),
466
                _format.index_builder_class,
467
                _format.index_class)
0.17.9 by Robert Collins
Initial stab at repository format support.
468
        self.inventories = GroupCompressVersionedFiles(
469
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
470
                add_callback=self._pack_collection.inventory_index.add_callback,
471
                parents=True, is_locked=self.is_locked),
472
            access=self._pack_collection.inventory_index.data_access)
473
        self.revisions = GroupCompressVersionedFiles(
474
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
475
                add_callback=self._pack_collection.revision_index.add_callback,
476
                parents=True, is_locked=self.is_locked),
477
            access=self._pack_collection.revision_index.data_access,
478
            delta=False)
479
        self.signatures = GroupCompressVersionedFiles(
480
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
481
                add_callback=self._pack_collection.signature_index.add_callback,
482
                parents=False, is_locked=self.is_locked),
483
            access=self._pack_collection.signature_index.data_access,
484
            delta=False)
485
        self.texts = GroupCompressVersionedFiles(
486
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
487
                add_callback=self._pack_collection.text_index.add_callback,
488
                parents=True, is_locked=self.is_locked),
489
            access=self._pack_collection.text_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
490
        if chk_support and _format.supports_chks:
491
            # No graph, no compression:- references from chks are between
492
            # different objects not temporal versions of the same; and without
493
            # some sort of temporal structure knit compression will just fail.
494
            self.chk_bytes = GroupCompressVersionedFiles(
495
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
496
                    add_callback=self._pack_collection.chk_index.add_callback,
497
                    parents=False, is_locked=self.is_locked),
498
                access=self._pack_collection.chk_index.data_access)
499
        else:
500
            self.chk_bytes = None
0.17.9 by Robert Collins
Initial stab at repository format support.
501
        # True when the repository object is 'write locked' (as opposed to the
502
        # physical lock only taken out around changes to the pack-names list.) 
503
        # Another way to represent this would be a decorator around the control
504
        # files object that presents logical locks as physical ones - if this
505
        # gets ugly consider that alternative design. RBC 20071011
506
        self._write_lock_count = 0
507
        self._transaction = None
508
        # for tests
509
        self._reconcile_does_inventory_gc = True
510
        self._reconcile_fixes_text_parents = True
511
        self._reconcile_backsup_inventory = False
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
512
        # Note: We cannot unpack a delta that references a text we haven't seen yet.
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
513
        #       There are 2 options, work in fulltexts, or require topological
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
514
        #       sorting. Using fulltexts is more optimal for local operations,
515
        #       because the source can be smart about extracting multiple
516
        #       in-a-row (and sharing strings). Topological is better for
517
        #       remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
518
        self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
519
        self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
520
        self._fetch_uses_deltas = False
0.17.9 by Robert Collins
Initial stab at repository format support.
521
522
0.17.26 by Robert Collins
Working better --gc-plain-chk.
523
if chk_support:
524
    class GCCHKPackRepository(CHKInventoryRepository):
525
        """GC customisation of CHKInventoryRepository."""
526
527
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
528
            _serializer):
529
            """Overridden to change pack collection class."""
530
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
531
                _commit_builder_class, _serializer)
532
            # and now replace everything it did :)
533
            index_transport = self._transport.clone('indices')
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
534
            self._pack_collection = GCRepositoryPackCollection(self,
535
                self._transport, index_transport,
536
                self._transport.clone('upload'),
537
                self._transport.clone('packs'),
538
                _format.index_builder_class,
539
                _format.index_class,
540
                use_chk_index=self._format.supports_chks,
541
                )
0.17.26 by Robert Collins
Working better --gc-plain-chk.
542
            self.inventories = GroupCompressVersionedFiles(
543
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
544
                    add_callback=self._pack_collection.inventory_index.add_callback,
545
                    parents=True, is_locked=self.is_locked),
546
                access=self._pack_collection.inventory_index.data_access)
547
            self.revisions = GroupCompressVersionedFiles(
548
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
549
                    add_callback=self._pack_collection.revision_index.add_callback,
550
                    parents=True, is_locked=self.is_locked),
551
                access=self._pack_collection.revision_index.data_access,
552
                delta=False)
553
            self.signatures = GroupCompressVersionedFiles(
554
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
555
                    add_callback=self._pack_collection.signature_index.add_callback,
556
                    parents=False, is_locked=self.is_locked),
557
                access=self._pack_collection.signature_index.data_access,
558
                delta=False)
559
            self.texts = GroupCompressVersionedFiles(
560
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
561
                    add_callback=self._pack_collection.text_index.add_callback,
562
                    parents=True, is_locked=self.is_locked),
563
                access=self._pack_collection.text_index.data_access)
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
564
            assert _format.supports_chks
565
            # No parents, individual CHK pages don't have specific ancestry
566
            self.chk_bytes = GroupCompressVersionedFiles(
567
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
568
                    add_callback=self._pack_collection.chk_index.add_callback,
569
                    parents=False, is_locked=self.is_locked),
570
                access=self._pack_collection.chk_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
571
            # True when the repository object is 'write locked' (as opposed to the
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
572
            # physical lock only taken out around changes to the pack-names list.)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
573
            # Another way to represent this would be a decorator around the control
574
            # files object that presents logical locks as physical ones - if this
575
            # gets ugly consider that alternative design. RBC 20071011
576
            self._write_lock_count = 0
577
            self._transaction = None
578
            # for tests
579
            self._reconcile_does_inventory_gc = True
580
            self._reconcile_fixes_text_parents = True
581
            self._reconcile_backsup_inventory = False
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
582
            # Note: We cannot unpack a delta that references a text we haven't
583
            # seen yet. There are 2 options, work in fulltexts, or require
584
            # topological sorting. Using fulltexts is more optimal for local
585
            # operations, because the source can be smart about extracting
586
            # multiple in-a-row (and sharing strings). Topological is better
587
            # for remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
588
            self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
589
            self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
590
            self._fetch_uses_deltas = False
0.17.26 by Robert Collins
Working better --gc-plain-chk.
591
592
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
593
class RepositoryFormatPackGCPlain(RepositoryFormatPackDevelopment2):
0.17.9 by Robert Collins
Initial stab at repository format support.
594
    """A B+Tree index using pack repository."""
595
596
    repository_class = GCPackRepository
597
598
    def get_format_string(self):
599
        """See RepositoryFormat.get_format_string()."""
600
        return ("Bazaar development format - btree+gc "
601
            "(needs bzr.dev from 1.6)\n")
602
603
    def get_format_description(self):
604
        """See RepositoryFormat.get_format_description()."""
605
        return ("Development repository format - btree+groupcompress "
606
            ", interoperates with pack-0.92\n")
607
608
609
class RepositoryFormatPackGCRichRoot(RepositoryFormatKnitPack4):
610
    """A B+Tree index using pack repository."""
611
612
    repository_class = GCPackRepository
613
614
    def get_format_string(self):
615
        """See RepositoryFormat.get_format_string()."""
616
        return ("Bazaar development format - btree+gc-rich-root "
617
            "(needs bzr.dev from 1.6)\n")
618
619
    def get_format_description(self):
620
        """See RepositoryFormat.get_format_description()."""
621
        return ("Development repository format - btree+groupcompress "
622
            ", interoperates with rich-root-pack\n")
623
624
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
625
class RepositoryFormatPackGCSubtrees(RepositoryFormatPackDevelopment2Subtree):
0.17.9 by Robert Collins
Initial stab at repository format support.
626
    """A B+Tree index using pack repository."""
627
628
    repository_class = GCPackRepository
629
630
    def get_format_string(self):
631
        """See RepositoryFormat.get_format_string()."""
632
        return ("Bazaar development format - btree+gc-subtrees "
633
            "(needs bzr.dev from 1.6)\n")
634
635
    def get_format_description(self):
636
        """See RepositoryFormat.get_format_description()."""
637
        return ("Development repository format - btree+groupcompress "
638
            ", interoperates with pack-0.92-subtrees\n")
639
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
640
if chk_support:
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
641
    class RepositoryFormatPackGCPlainCHK(RepositoryFormatPackDevelopment5):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
642
        """A CHK+group compress pack repository."""
643
0.17.26 by Robert Collins
Working better --gc-plain-chk.
644
        repository_class = GCCHKPackRepository
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
645
646
        def get_format_string(self):
647
            """See RepositoryFormat.get_format_string()."""
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
648
            return ('Bazaar development format - chk+gc'
649
                    ' (needs bzr.dev from 1.13)\n')
650
651
        def get_format_description(self):
652
            """See RepositoryFormat.get_format_description()."""
653
            return ("Development repository format - chk+groupcompress")
654
0.21.2 by John Arbash Meinel
Bring in the trunk simplifications.
655
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
656
    class RepositoryFormatPackGCPlainCHK16(RepositoryFormatPackDevelopment5Hash16):
657
        """A hashed CHK+group compress pack repository."""
658
659
        repository_class = GCCHKPackRepository
660
661
        def get_format_string(self):
662
            """See RepositoryFormat.get_format_string()."""
663
            return ('Bazaar development format - hash16chk+gc'
664
                    ' (needs bzr.dev from 1.13)\n')
665
666
        def get_format_description(self):
667
            """See RepositoryFormat.get_format_description()."""
668
            return ("Development repository format - hash16chk+groupcompress")
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
669
670
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
671
##    class RepositoryFormatPackGCPlainCHK16b(RepositoryFormatPackDevelopment5Hash16b):
672
##        """A hashed CHK+group compress pack repository."""
673
##
674
##        repository_class = GCCHKPackRepository
675
##
676
##        def get_format_string(self):
677
##            """See RepositoryFormat.get_format_string()."""
678
##            return ('Bazaar development format - hash16bchk+gc'
679
##                    ' (needs bzr.dev from 1.13)\n')
680
##
681
##        def get_format_description(self):
682
##            """See RepositoryFormat.get_format_description()."""
683
##            return ("Development repository format - hash16bchk+groupcompress")
684
##
685
##
686
##    class RepositoryFormatPackGCPlainCHK63(RepositoryFormatPackDevelopment5Hash63):
687
##        """A hashed CHK+group compress pack repository."""
688
##
689
##        repository_class = GCCHKPackRepository
690
##
691
##        def get_format_string(self):
692
##            """See RepositoryFormat.get_format_string()."""
693
##            return ('Bazaar development format - hash63+gc'
694
##                    ' (needs bzr.dev from 1.13)\n')
695
##
696
##        def get_format_description(self):
697
##            """See RepositoryFormat.get_format_description()."""
698
##            return ("Development repository format - hash63+groupcompress")
699
##
700
##
701
##    class RepositoryFormatPackGCPlainCHK127a(RepositoryFormatPackDevelopment5Hash127a):
702
##        """A hashed CHK+group compress pack repository."""
703
##
704
##        repository_class = GCCHKPackRepository
705
##
706
##        def get_format_string(self):
707
##            """See RepositoryFormat.get_format_string()."""
708
##            return ('Bazaar development format - hash127a+gc'
709
##                    ' (needs bzr.dev from 1.13)\n')
710
##
711
##        def get_format_description(self):
712
##            """See RepositoryFormat.get_format_description()."""
713
##            return ("Development repository format - hash127a+groupcompress")
714
##
715
##
716
##    class RepositoryFormatPackGCPlainCHK127b(RepositoryFormatPackDevelopment5Hash127b):
717
##        """A hashed CHK+group compress pack repository."""
718
##
719
##        repository_class = GCCHKPackRepository
720
##
721
##        def get_format_string(self):
722
##            """See RepositoryFormat.get_format_string()."""
723
##            return ('Bazaar development format - hash127b+gc'
724
##                    ' (needs bzr.dev from 1.13)\n')
725
##
726
##        def get_format_description(self):
727
##            """See RepositoryFormat.get_format_description()."""
728
##            return ("Development repository format - hash127b+groupcompress")
729
730
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
731
    class RepositoryFormatPackGCPlainCHK255(RepositoryFormatPackDevelopment5Hash255):
732
        """A hashed CHK+group compress pack repository."""
733
734
        repository_class = GCCHKPackRepository
735
736
        def get_format_string(self):
737
            """See RepositoryFormat.get_format_string()."""
738
            return ('Bazaar development format - hash255chk+gc'
739
                    ' (needs bzr.dev from 1.13)\n')
740
741
        def get_format_description(self):
742
            """See RepositoryFormat.get_format_description()."""
743
            return ("Development repository format - hash255chk+groupcompress")
744
745
0.17.9 by Robert Collins
Initial stab at repository format support.
746
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
747
    """Be incompatible with the regular fetch code."""
0.17.9 by Robert Collins
Initial stab at repository format support.
748
    formats = (RepositoryFormatPackGCPlain, RepositoryFormatPackGCRichRoot,
749
        RepositoryFormatPackGCSubtrees)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
750
    if chk_support:
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
751
        formats = formats + (RepositoryFormatPackGCPlainCHK,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
752
                             RepositoryFormatPackGCPlainCHK16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
753
                             ## RepositoryFormatPackGCPlainCHK16b,
754
                             ## RepositoryFormatPackGCPlainCHK63,
755
                             ## RepositoryFormatPackGCPlainCHK127a,
756
                             ## RepositoryFormatPackGCPlainCHK127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
757
                             RepositoryFormatPackGCPlainCHK255)
0.17.10 by Robert Collins
Correct optimiser disabling.
758
    if isinstance(source._format, formats) or isinstance(target._format, formats):
0.17.9 by Robert Collins
Initial stab at repository format support.
759
        return False
760
    else:
761
        return orig_method(source, target)
762
763
764
InterPackRepo.is_compatible = staticmethod(pack_incompatible)