/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.9 by Robert Collins
Initial stab at repository format support.
1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
19
20
import md5
21
import time
22
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
23
from bzrlib import (
24
    debug,
25
    errors,
26
    knit,
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
27
    inventory,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
28
    pack,
29
    repository,
0.23.28 by John Arbash Meinel
Gotta import 'trace' if you want to use trace.mutter()
30
    trace,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
31
    ui,
32
    )
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
33
from bzrlib.btree_index import (
34
    BTreeBuilder,
35
    BTreeGraphIndex,
36
    )
0.17.9 by Robert Collins
Initial stab at repository format support.
37
from bzrlib.index import GraphIndex, GraphIndexBuilder
38
from bzrlib.repository import InterPackRepo
39
from bzrlib.plugins.groupcompress.groupcompress import (
40
    _GCGraphIndex,
41
    GroupCompressVersionedFiles,
42
    )
43
from bzrlib.osutils import rand_chars
44
from bzrlib.repofmt.pack_repo import (
45
    Pack,
46
    NewPack,
47
    KnitPackRepository,
48
    RepositoryPackCollection,
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
49
    RepositoryFormatKnitPack6,
50
    RepositoryFormatKnitPack6RichRoot,
0.17.9 by Robert Collins
Initial stab at repository format support.
51
    Packer,
52
    ReconcilePacker,
53
    OptimisingPacker,
54
    )
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
55
try:
56
    from bzrlib.repofmt.pack_repo import (
0.17.26 by Robert Collins
Working better --gc-plain-chk.
57
    CHKInventoryRepository,
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
58
    RepositoryFormatPackDevelopment5,
59
    RepositoryFormatPackDevelopment5Hash16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
60
##    RepositoryFormatPackDevelopment5Hash16b,
61
##    RepositoryFormatPackDevelopment5Hash63,
62
##    RepositoryFormatPackDevelopment5Hash127a,
63
##    RepositoryFormatPackDevelopment5Hash127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
64
    RepositoryFormatPackDevelopment5Hash255,
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
65
    )
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
66
    from bzrlib import chk_map
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
67
    chk_support = True
68
except ImportError:
69
    chk_support = False
0.17.9 by Robert Collins
Initial stab at repository format support.
70
71
72
def open_pack(self):
0.17.22 by Robert Collins
really get gc working with 1.10
73
    return self._pack_collection.pack_factory(self._pack_collection,
74
        upload_suffix=self.suffix,
0.17.9 by Robert Collins
Initial stab at repository format support.
75
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
76
77
78
Packer.open_pack = open_pack
79
80
81
class GCPack(NewPack):
82
0.17.22 by Robert Collins
really get gc working with 1.10
83
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
0.17.9 by Robert Collins
Initial stab at repository format support.
84
        """Create a NewPack instance.
85
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
86
        :param pack_collection: A PackCollection into which this is being
87
            inserted.
0.17.9 by Robert Collins
Initial stab at repository format support.
88
        :param upload_suffix: An optional suffix to be given to any temporary
89
            files created during the pack creation. e.g '.autopack'
90
        :param file_mode: An optional file mode to create the new files with.
91
        """
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
92
        # replaced from bzr.dev to:
93
        # - change inventory reference list length to 1
94
        # - change texts reference lists to 1
95
        # TODO: patch this to be parameterised upstream
96
        
0.17.9 by Robert Collins
Initial stab at repository format support.
97
        # The relative locations of the packs are constrained, but all are
98
        # passed in because the caller has them, so as to avoid object churn.
0.17.22 by Robert Collins
really get gc working with 1.10
99
        index_builder_class = pack_collection._index_builder_class
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
100
        if chk_support:
101
            # from brisbane-core
102
            if pack_collection.chk_index is not None:
103
                chk_index = index_builder_class(reference_lists=0)
104
            else:
105
                chk_index = None
106
            Pack.__init__(self,
107
                # Revisions: parents list, no text compression.
108
                index_builder_class(reference_lists=1),
109
                # Inventory: We want to map compression only, but currently the
110
                # knit code hasn't been updated enough to understand that, so we
111
                # have a regular 2-list index giving parents and compression
112
                # source.
113
                index_builder_class(reference_lists=1),
114
                # Texts: compression and per file graph, for all fileids - so two
115
                # reference lists and two elements in the key tuple.
116
                index_builder_class(reference_lists=1, key_elements=2),
117
                # Signatures: Just blobs to store, no compression, no parents
118
                # listing.
119
                index_builder_class(reference_lists=0),
120
                # CHK based storage - just blobs, no compression or parents.
121
                chk_index=chk_index
122
                )
123
        else:
124
            # from bzr.dev
125
            Pack.__init__(self,
126
                # Revisions: parents list, no text compression.
127
                index_builder_class(reference_lists=1),
128
                # Inventory: compressed, with graph for compatibility with other
129
                # existing bzrlib code.
130
                index_builder_class(reference_lists=1),
131
                # Texts: per file graph:
132
                index_builder_class(reference_lists=1, key_elements=2),
133
                # Signatures: Just blobs to store, no compression, no parents
134
                # listing.
135
                index_builder_class(reference_lists=0),
136
                )
0.17.22 by Robert Collins
really get gc working with 1.10
137
        self._pack_collection = pack_collection
138
        # When we make readonly indices, we need this.
139
        self.index_class = pack_collection._index_class
0.17.9 by Robert Collins
Initial stab at repository format support.
140
        # where should the new pack be opened
0.17.22 by Robert Collins
really get gc working with 1.10
141
        self.upload_transport = pack_collection._upload_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
142
        # where are indices written out to
0.17.22 by Robert Collins
really get gc working with 1.10
143
        self.index_transport = pack_collection._index_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
144
        # where is the pack renamed to when it is finished?
0.17.22 by Robert Collins
really get gc working with 1.10
145
        self.pack_transport = pack_collection._pack_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
146
        # What file mode to upload the pack and indices with.
147
        self._file_mode = file_mode
148
        # tracks the content written to the .pack file.
149
        self._hash = md5.new()
150
        # a four-tuple with the length in bytes of the indices, once the pack
151
        # is finalised. (rev, inv, text, sigs)
152
        self.index_sizes = None
153
        # How much data to cache when writing packs. Note that this is not
154
        # synchronised with reads, because it's not in the transport layer, so
155
        # is not safe unless the client knows it won't be reading from the pack
156
        # under creation.
157
        self._cache_limit = 0
158
        # the temporary pack file name.
159
        self.random_name = rand_chars(20) + upload_suffix
160
        # when was this pack started ?
161
        self.start_time = time.time()
162
        # open an output stream for the data added to the pack.
163
        self.write_stream = self.upload_transport.open_write_stream(
164
            self.random_name, mode=self._file_mode)
165
        if 'pack' in debug.debug_flags:
0.23.30 by John Arbash Meinel
Merge in Ian's groupcompress trunk updates
166
            trace.mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
0.17.9 by Robert Collins
Initial stab at repository format support.
167
                time.ctime(), self.upload_transport.base, self.random_name,
168
                time.time() - self.start_time)
169
        # A list of byte sequences to be written to the new pack, and the 
170
        # aggregate size of them.  Stored as a list rather than separate 
171
        # variables so that the _write_data closure below can update them.
172
        self._buffer = [[], 0]
173
        # create a callable for adding data 
174
        #
175
        # robertc says- this is a closure rather than a method on the object
176
        # so that the variables are locals, and faster than accessing object
177
        # members.
178
        def _write_data(bytes, flush=False, _buffer=self._buffer,
179
            _write=self.write_stream.write, _update=self._hash.update):
180
            _buffer[0].append(bytes)
181
            _buffer[1] += len(bytes)
182
            # buffer cap
183
            if _buffer[1] > self._cache_limit or flush:
184
                bytes = ''.join(_buffer[0])
185
                _write(bytes)
186
                _update(bytes)
187
                _buffer[:] = [[], 0]
188
        # expose this on self, for the occasion when clients want to add data.
189
        self._write_data = _write_data
190
        # a pack writer object to serialise pack records.
191
        self._writer = pack.ContainerWriter(self._write_data)
192
        self._writer.begin()
193
        # what state is the pack in? (open, finished, aborted)
194
        self._state = 'open'
195
196
197
RepositoryPackCollection.pack_factory = NewPack
198
199
class GCRepositoryPackCollection(RepositoryPackCollection):
200
201
    pack_factory = GCPack
202
203
    def _make_index(self, name, suffix):
204
        """Overridden to use BTreeGraphIndex objects."""
205
        size_offset = self._suffix_offsets[suffix]
206
        index_name = name + suffix
207
        index_size = self._names[name][size_offset]
208
        return BTreeGraphIndex(
209
            self._index_transport, index_name, index_size)
210
211
    def _start_write_group(self):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
212
        # Overridden to add 'self.pack_factory()'
0.17.9 by Robert Collins
Initial stab at repository format support.
213
        # Do not permit preparation for writing if we're not in a 'write lock'.
214
        if not self.repo.is_write_locked():
215
            raise errors.NotWriteLocked(self)
0.17.22 by Robert Collins
really get gc working with 1.10
216
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
0.17.9 by Robert Collins
Initial stab at repository format support.
217
            file_mode=self.repo.bzrdir._get_file_mode())
218
        # allow writing: queue writes to a new index
219
        self.revision_index.add_writable_index(self._new_pack.revision_index,
220
            self._new_pack)
221
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
222
            self._new_pack)
223
        self.text_index.add_writable_index(self._new_pack.text_index,
224
            self._new_pack)
225
        self.signature_index.add_writable_index(self._new_pack.signature_index,
226
            self._new_pack)
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
227
        if chk_support and self.chk_index is not None:
228
            self.chk_index.add_writable_index(self._new_pack.chk_index,
229
                self._new_pack)
230
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
0.17.9 by Robert Collins
Initial stab at repository format support.
231
232
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
233
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
234
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
235
        self.repo.texts._index._add_callback = self.text_index.add_callback
236
0.17.43 by John Arbash Meinel
Give better chk and inventory progress indication during pack/autopack
237
    def _get_filtered_inv_stream(self, source_vf, keys, pb=None):
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
238
        """Filter the texts of inventories, to find the chk pages."""
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
239
        id_roots = []
240
        p_id_roots = []
241
        id_roots_set = set()
242
        p_id_roots_set = set()
0.17.43 by John Arbash Meinel
Give better chk and inventory progress indication during pack/autopack
243
        total_keys = len(keys)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
244
        def _filter_inv_stream(stream):
245
            for idx, record in enumerate(stream):
246
                ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
247
                bytes = record.get_bytes_as('fulltext')
248
                chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
0.17.43 by John Arbash Meinel
Give better chk and inventory progress indication during pack/autopack
249
                if pb is not None:
250
                    pb.update('inv', idx, total_keys)
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
251
                key = chk_inv.id_to_entry.key()
252
                if key not in id_roots_set:
253
                    id_roots.append(key)
254
                    id_roots_set.add(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
255
                p_id_map = chk_inv.parent_id_basename_to_file_id
256
                if p_id_map is not None:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
257
                    key = p_id_map.key()
258
                    if key not in p_id_roots_set:
259
                        p_id_roots_set.add(key)
260
                        p_id_roots.append(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
261
                yield record
262
        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
263
        return _filter_inv_stream(stream), id_roots, p_id_roots
264
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
265
    def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots, pb=None):
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
266
        # We want to stream the keys from 'id_roots', and things they
267
        # reference, and then stream things from p_id_roots and things they
268
        # reference, and then any remaining keys that we didn't get to.
269
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
270
        # We also group referenced texts together, so if one root references a
271
        # text with prefix 'a', and another root references a node with prefix
272
        # 'a', we want to yield those nodes before we yield the nodes for 'b'
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
273
        # This keeps 'similar' nodes together.
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
274
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
275
        # Note: We probably actually want multiple streams here, to help the
276
        #       client understand that the different levels won't compress well
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
277
        #       against each other.
0.20.27 by John Arbash Meinel
Update a Note/Todo
278
        #       Test the difference between using one Group per level, and
279
        #       using 1 Group per prefix. (so '' (root) would get a group, then
280
        #       all the references to search-key 'a' would get a group, etc.)
0.17.43 by John Arbash Meinel
Give better chk and inventory progress indication during pack/autopack
281
        total_keys = len(keys)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
282
        remaining_keys = set(keys)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
283
        counter = [0]
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
284
        def _get_referenced_stream(root_keys):
285
            cur_keys = root_keys
286
            while cur_keys:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
287
                keys_by_search_prefix = {}
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
288
                remaining_keys.difference_update(cur_keys)
289
                next_keys = set()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
290
                stream = source_vf.get_record_stream(cur_keys, 'as-requested',
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
291
                                                     True)
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
292
                def next_stream():
293
                    for record in stream:
294
                        bytes = record.get_bytes_as('fulltext')
295
                        # We don't care about search_key_func for this code,
296
                        # because we only care about external references.
297
                        node = chk_map._deserialise(bytes, record.key,
298
                                                    search_key_func=None)
299
                        common_base = node._search_prefix
300
                        if isinstance(node, chk_map.InternalNode):
301
                            for prefix, value in node._items.iteritems():
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
302
                                if not isinstance(value, tuple):
303
                                    raise AssertionError("value is %s when"
304
                                        " tuple expected" % (value.__class__))
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
305
                                if value not in next_keys:
306
                                    keys_by_search_prefix.setdefault(prefix,
307
                                        []).append(value)
308
                                    next_keys.add(value)
309
                        counter[0] += 1
310
                        if pb is not None:
0.17.43 by John Arbash Meinel
Give better chk and inventory progress indication during pack/autopack
311
                            pb.update('chk node', counter[0], total_keys)
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
312
                        yield record
313
                yield next_stream()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
314
                # Double check that we won't be emitting any keys twice
315
                next_keys = next_keys.intersection(remaining_keys)
316
                cur_keys = []
317
                for prefix in sorted(keys_by_search_prefix):
318
                    cur_keys.extend(keys_by_search_prefix[prefix])
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
319
        for stream in _get_referenced_stream(id_roots):
320
            yield stream
321
        for stream in _get_referenced_stream(p_id_roots):
322
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
323
        if remaining_keys:
0.23.30 by John Arbash Meinel
Merge in Ian's groupcompress trunk updates
324
            trace.note('There were %d keys in the chk index, which were not'
325
                       ' referenced from inventories', len(remaining_keys))
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
326
            stream = source_vf.get_record_stream(remaining_keys, 'unordered',
327
                                                 True)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
328
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
329
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
330
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
331
                                 reload_func=None):
332
        """Execute a series of pack operations.
333
334
        :param pack_operations: A list of [revision_count, packs_to_combine].
335
        :param _packer_class: The class of packer to use (default: Packer).
336
        :return: None.
337
        """
338
        for revision_count, packs in pack_operations:
339
            # we may have no-ops from the setup logic
340
            if len(packs) == 0:
341
                continue
342
            # Create a new temp VersionedFile instance based on these packs,
343
            # and then just fetch everything into the target
344
345
            to_copy = [('revision_index', 'revisions'),
346
                       ('inventory_index', 'inventories'),
347
                       ('text_index', 'texts'),
348
                       ('signature_index', 'signatures'),
349
                      ]
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
350
            # TODO: This is a very non-optimal ordering for chk_bytes. The
351
            #       issue is that pages that are similar are not transmitted
352
            #       together. Perhaps get_record_stream('gc-optimal') should be
353
            #       taught about how to group chk pages?
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
354
            has_chk = False
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
355
            if getattr(self, 'chk_index', None) is not None:
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
356
                has_chk = True
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
357
                to_copy.insert(2, ('chk_index', 'chk_bytes'))
358
359
            # Shouldn't we start_write_group around this?
360
            if self._new_pack is not None:
361
                raise errors.BzrError('call to %s.pack() while another pack is'
362
                                      ' being written.'
363
                                      % (self.__class__.__name__,))
0.20.33 by John Arbash Meinel
Properly name the file XXX.autopack rather than XXXautopack
364
            new_pack = self.pack_factory(self, '.autopack',
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
365
                file_mode=self.repo.bzrdir._get_file_mode())
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
366
            new_pack.set_write_cache_size(1024*1024)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
367
            # TODO: A better alternative is to probably use Packer.open_pack(), and
368
            #       then create a GroupCompressVersionedFiles() around the
369
            #       target pack to insert into.
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
370
            pb = ui.ui_factory.nested_progress_bar()
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
371
            try:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
372
                for idx, (index_name, vf_name) in enumerate(to_copy):
373
                    pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
374
                    keys = set()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
375
                    new_index = getattr(new_pack, index_name)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
376
                    new_index.set_optimize(for_size=True)
377
                    for pack in packs:
378
                        source_index = getattr(pack, index_name)
379
                        keys.update(e[1] for e in source_index.iter_all_entries())
0.23.22 by John Arbash Meinel
Add a mutter() while repacking, so that we log progress as we go along.
380
                    trace.mutter('repacking %s with %d keys',
381
                                 vf_name, len(keys))
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
382
                    source_vf = getattr(self.repo, vf_name)
383
                    target_access = knit._DirectPackAccess({})
384
                    target_access.set_writer(new_pack._writer, new_index,
385
                                             new_pack.access_tuple())
386
                    target_vf = GroupCompressVersionedFiles(
387
                        _GCGraphIndex(new_index,
388
                                      add_callback=new_index.add_nodes,
389
                                      parents=source_vf._index._parents,
390
                                      is_locked=self.repo.is_locked),
391
                        access=target_access,
392
                        delta=source_vf._delta)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
393
                    stream = None
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
394
                    child_pb = ui.ui_factory.nested_progress_bar()
395
                    try:
396
                        if has_chk:
397
                            if vf_name == 'inventories':
398
                                stream, id_roots, p_id_roots = self._get_filtered_inv_stream(
0.17.43 by John Arbash Meinel
Give better chk and inventory progress indication during pack/autopack
399
                                    source_vf, keys, pb=child_pb)
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
400
                            elif vf_name == 'chk_bytes':
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
401
                                for stream in self._get_chk_stream(source_vf, keys,
402
                                                    id_roots, p_id_roots,
403
                                                    pb=child_pb):
404
                                    target_vf.insert_record_stream(stream)
405
                                # No more to copy
406
                                stream = []
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
407
                        if stream is None:
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
408
                            def pb_stream():
409
                                substream = source_vf.get_record_stream(keys, 'gc-optimal', True)
410
                                for idx, record in enumerate(substream):
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
411
                                    child_pb.update(vf_name, idx + 1, len(keys))
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
412
                                    yield record
413
                            stream = pb_stream()
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
414
                        target_vf.insert_record_stream(stream)
415
                    finally:
416
                        child_pb.finished()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
417
                new_pack._check_references() # shouldn't be needed
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
418
            except:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
419
                pb.finished()
420
                new_pack.abort()
421
                raise
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
422
            else:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
423
                pb.finished()
424
                if not new_pack.data_inserted():
425
                    raise AssertionError('We copied from pack files,'
426
                                         ' but had no data copied')
427
                    # we need to abort somehow, because we don't want to remove
428
                    # the other packs
429
                new_pack.finish()
430
                self.allocate(new_pack)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
431
            for pack in packs:
432
                self._remove_pack_from_memory(pack)
433
        # record the newly available packs and stop advertising the old
434
        # packs
435
        self._save_pack_names(clear_obsolete_packs=True)
436
        # Move the old packs out of the way now they are no longer referenced.
437
        for revision_count, packs in pack_operations:
438
            self._obsolete_packs(packs)
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
439
0.17.9 by Robert Collins
Initial stab at repository format support.
440
441
442
class GCPackRepository(KnitPackRepository):
443
    """GC customisation of KnitPackRepository."""
444
0.20.31 by Ian Clatworthy
add coment suggesting a simplification in repofmt.py
445
    # Note: I think the CHK support can be dropped from this class as it's
446
    # implemented via the GCCHKPackRepository class defined next. IGC 20090301
447
0.17.9 by Robert Collins
Initial stab at repository format support.
448
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
449
        _serializer):
450
        """Overridden to change pack collection class."""
451
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
452
            _commit_builder_class, _serializer)
453
        # and now replace everything it did :)
454
        index_transport = self._transport.clone('indices')
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
455
        if chk_support:
456
            self._pack_collection = GCRepositoryPackCollection(self,
457
                self._transport, index_transport,
458
                self._transport.clone('upload'),
459
                self._transport.clone('packs'),
460
                _format.index_builder_class,
461
                _format.index_class,
462
                use_chk_index=self._format.supports_chks,
463
                )
464
        else:
465
            self._pack_collection = GCRepositoryPackCollection(self,
466
                self._transport, index_transport,
467
                self._transport.clone('upload'),
468
                self._transport.clone('packs'),
469
                _format.index_builder_class,
470
                _format.index_class)
0.17.9 by Robert Collins
Initial stab at repository format support.
471
        self.inventories = GroupCompressVersionedFiles(
472
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
473
                add_callback=self._pack_collection.inventory_index.add_callback,
474
                parents=True, is_locked=self.is_locked),
475
            access=self._pack_collection.inventory_index.data_access)
476
        self.revisions = GroupCompressVersionedFiles(
477
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
478
                add_callback=self._pack_collection.revision_index.add_callback,
479
                parents=True, is_locked=self.is_locked),
480
            access=self._pack_collection.revision_index.data_access,
481
            delta=False)
482
        self.signatures = GroupCompressVersionedFiles(
483
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
484
                add_callback=self._pack_collection.signature_index.add_callback,
485
                parents=False, is_locked=self.is_locked),
486
            access=self._pack_collection.signature_index.data_access,
487
            delta=False)
488
        self.texts = GroupCompressVersionedFiles(
489
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
490
                add_callback=self._pack_collection.text_index.add_callback,
491
                parents=True, is_locked=self.is_locked),
492
            access=self._pack_collection.text_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
493
        if chk_support and _format.supports_chks:
494
            # No graph, no compression:- references from chks are between
495
            # different objects not temporal versions of the same; and without
496
            # some sort of temporal structure knit compression will just fail.
497
            self.chk_bytes = GroupCompressVersionedFiles(
498
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
499
                    add_callback=self._pack_collection.chk_index.add_callback,
500
                    parents=False, is_locked=self.is_locked),
501
                access=self._pack_collection.chk_index.data_access)
502
        else:
503
            self.chk_bytes = None
0.17.9 by Robert Collins
Initial stab at repository format support.
504
        # True when the repository object is 'write locked' (as opposed to the
505
        # physical lock only taken out around changes to the pack-names list.) 
506
        # Another way to represent this would be a decorator around the control
507
        # files object that presents logical locks as physical ones - if this
508
        # gets ugly consider that alternative design. RBC 20071011
509
        self._write_lock_count = 0
510
        self._transaction = None
511
        # for tests
512
        self._reconcile_does_inventory_gc = True
513
        self._reconcile_fixes_text_parents = True
514
        self._reconcile_backsup_inventory = False
515
516
0.17.26 by Robert Collins
Working better --gc-plain-chk.
517
if chk_support:
518
    class GCCHKPackRepository(CHKInventoryRepository):
519
        """GC customisation of CHKInventoryRepository."""
520
521
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
522
            _serializer):
523
            """Overridden to change pack collection class."""
524
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
525
                _commit_builder_class, _serializer)
526
            # and now replace everything it did :)
527
            index_transport = self._transport.clone('indices')
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
528
            self._pack_collection = GCRepositoryPackCollection(self,
529
                self._transport, index_transport,
530
                self._transport.clone('upload'),
531
                self._transport.clone('packs'),
532
                _format.index_builder_class,
533
                _format.index_class,
534
                use_chk_index=self._format.supports_chks,
535
                )
0.17.26 by Robert Collins
Working better --gc-plain-chk.
536
            self.inventories = GroupCompressVersionedFiles(
537
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
538
                    add_callback=self._pack_collection.inventory_index.add_callback,
539
                    parents=True, is_locked=self.is_locked),
540
                access=self._pack_collection.inventory_index.data_access)
541
            self.revisions = GroupCompressVersionedFiles(
542
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
543
                    add_callback=self._pack_collection.revision_index.add_callback,
544
                    parents=True, is_locked=self.is_locked),
545
                access=self._pack_collection.revision_index.data_access,
546
                delta=False)
547
            self.signatures = GroupCompressVersionedFiles(
548
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
549
                    add_callback=self._pack_collection.signature_index.add_callback,
550
                    parents=False, is_locked=self.is_locked),
551
                access=self._pack_collection.signature_index.data_access,
552
                delta=False)
553
            self.texts = GroupCompressVersionedFiles(
554
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
555
                    add_callback=self._pack_collection.text_index.add_callback,
556
                    parents=True, is_locked=self.is_locked),
557
                access=self._pack_collection.text_index.data_access)
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
558
            assert _format.supports_chks
559
            # No parents, individual CHK pages don't have specific ancestry
560
            self.chk_bytes = GroupCompressVersionedFiles(
561
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
562
                    add_callback=self._pack_collection.chk_index.add_callback,
563
                    parents=False, is_locked=self.is_locked),
564
                access=self._pack_collection.chk_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
565
            # True when the repository object is 'write locked' (as opposed to the
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
566
            # physical lock only taken out around changes to the pack-names list.)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
567
            # Another way to represent this would be a decorator around the control
568
            # files object that presents logical locks as physical ones - if this
569
            # gets ugly consider that alternative design. RBC 20071011
570
            self._write_lock_count = 0
571
            self._transaction = None
572
            # for tests
573
            self._reconcile_does_inventory_gc = True
574
            self._reconcile_fixes_text_parents = True
575
            self._reconcile_backsup_inventory = False
576
577
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
578
class RepositoryFormatPackGCPlain(RepositoryFormatKnitPack6):
0.17.9 by Robert Collins
Initial stab at repository format support.
579
    """A B+Tree index using pack repository."""
580
581
    repository_class = GCPackRepository
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
582
    rich_root_data = False
0.17.42 by Robert Collins
Handle bzr.dev changes.
583
    # Note: We cannot unpack a delta that references a text we haven't
584
    # seen yet. There are 2 options, work in fulltexts, or require
585
    # topological sorting. Using fulltexts is more optimal for local
586
    # operations, because the source can be smart about extracting
587
    # multiple in-a-row (and sharing strings). Topological is better
588
    # for remote, because we access less data.
589
    _fetch_order = 'unordered'
590
    _fetch_gc_optimal = True
591
    _fetch_uses_deltas = False
0.17.9 by Robert Collins
Initial stab at repository format support.
592
593
    def get_format_string(self):
594
        """See RepositoryFormat.get_format_string()."""
595
        return ("Bazaar development format - btree+gc "
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
596
            "(needs bzr.dev from 1.13)\n")
0.17.9 by Robert Collins
Initial stab at repository format support.
597
598
    def get_format_description(self):
599
        """See RepositoryFormat.get_format_description()."""
600
        return ("Development repository format - btree+groupcompress "
601
            ", interoperates with pack-0.92\n")
602
603
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
604
if chk_support:
0.17.34 by John Arbash Meinel
Another disk-format bump.
605
    from bzrlib import chk_serializer
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
606
    class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
607
        """A hashed CHK+group compress pack repository."""
608
609
        repository_class = GCCHKPackRepository
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
610
        rich_root_data = True
0.17.42 by Robert Collins
Handle bzr.dev changes.
611
        # Note: We cannot unpack a delta that references a text we haven't
612
        # seen yet. There are 2 options, work in fulltexts, or require
613
        # topological sorting. Using fulltexts is more optimal for local
614
        # operations, because the source can be smart about extracting
615
        # multiple in-a-row (and sharing strings). Topological is better
616
        # for remote, because we access less data.
617
        _fetch_order = 'unordered'
618
        _fetch_gc_optimal = True
619
        _fetch_uses_deltas = False
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
620
621
        def get_format_string(self):
622
            """See RepositoryFormat.get_format_string()."""
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
623
            return ('Bazaar development format - hash16chk+gc rich-root'
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
624
                    ' (needs bzr.dev from 1.13)\n')
625
626
        def get_format_description(self):
627
            """See RepositoryFormat.get_format_description()."""
628
            return ("Development repository format - hash16chk+groupcompress")
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
629
630
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
631
    class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
632
        """A hashed CHK+group compress pack repository."""
633
634
        repository_class = GCCHKPackRepository
0.17.33 by John Arbash Meinel
A couple tweaks to repofmt to allow fetching to work again.
635
        # Setting this to True causes us to use InterModel1And2, so for now set
636
        # it to False which uses InterDifferingSerializer. When IM1&2 is
637
        # removed (as it is in bzr.dev) we can set this back to True.
0.17.41 by John Arbash Meinel
brisbane core has removed InterModel, so always set rich_root=True
638
        rich_root_data = True
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
639
640
        def get_format_string(self):
641
            """See RepositoryFormat.get_format_string()."""
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
642
            return ('Bazaar development format - hash255chk+gc rich-root'
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
643
                    ' (needs bzr.dev from 1.13)\n')
644
645
        def get_format_description(self):
646
            """See RepositoryFormat.get_format_description()."""
647
            return ("Development repository format - hash255chk+groupcompress")
648
649
0.17.40 by John Arbash Meinel
CHKSerializer no longer has a 'parent_id_basename_index' field to care about.
650
    chk_serializer_255_bigpage = chk_serializer.CHKSerializer(65536, 'hash-255-way')
0.25.16 by John Arbash Meinel
Make sure we don't inter-pack to GCCHKBig repos.
651
    class RepositoryFormatPackGCCHK255Big(RepositoryFormatPackGCCHK255):
0.25.15 by John Arbash Meinel
Implement a 'bigpage' version of chk serializer, which uses 64kB pages for leaf nodes. (this is approx 255 leaf entries, similar to the internal fan out.)
652
        """A hashed CHK+group compress pack repository."""
653
654
        repository_class = GCCHKPackRepository
655
        # For right now, setting this to True gives us InterModel1And2 rather
656
        # than InterDifferingSerializer
0.17.41 by John Arbash Meinel
brisbane core has removed InterModel, so always set rich_root=True
657
        rich_root_data = True
0.25.15 by John Arbash Meinel
Implement a 'bigpage' version of chk serializer, which uses 64kB pages for leaf nodes. (this is approx 255 leaf entries, similar to the internal fan out.)
658
        _serializer = chk_serializer_255_bigpage
0.17.42 by Robert Collins
Handle bzr.dev changes.
659
        # Note: We cannot unpack a delta that references a text we haven't
660
        # seen yet. There are 2 options, work in fulltexts, or require
661
        # topological sorting. Using fulltexts is more optimal for local
662
        # operations, because the source can be smart about extracting
663
        # multiple in-a-row (and sharing strings). Topological is better
664
        # for remote, because we access less data.
665
        _fetch_order = 'unordered'
666
        _fetch_gc_optimal = True
667
        _fetch_uses_deltas = False
0.25.15 by John Arbash Meinel
Implement a 'bigpage' version of chk serializer, which uses 64kB pages for leaf nodes. (this is approx 255 leaf entries, similar to the internal fan out.)
668
669
        def get_format_string(self):
670
            """See RepositoryFormat.get_format_string()."""
671
            return ('Bazaar development format - hash255chk+gc rich-root bigpage'
672
                    ' (needs bzr.dev from 1.13)\n')
673
674
        def get_format_description(self):
675
            """See RepositoryFormat.get_format_description()."""
676
            return ("Development repository format - hash255chk+groupcompress + bigpage")
677
678
0.17.9 by Robert Collins
Initial stab at repository format support.
679
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
680
    """Be incompatible with the regular fetch code."""
0.25.6 by John Arbash Meinel
(tests broken) implement the basic ability to have a separate header
681
    formats = (RepositoryFormatPackGCPlain,)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
682
    if chk_support:
0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
683
        formats = formats + (RepositoryFormatPackGCCHK16,
0.25.16 by John Arbash Meinel
Make sure we don't inter-pack to GCCHKBig repos.
684
                             RepositoryFormatPackGCCHK255,
685
                             RepositoryFormatPackGCCHK255Big)
0.17.10 by Robert Collins
Correct optimiser disabling.
686
    if isinstance(source._format, formats) or isinstance(target._format, formats):
0.17.9 by Robert Collins
Initial stab at repository format support.
687
        return False
688
    else:
689
        return orig_method(source, target)
690
691
692
InterPackRepo.is_compatible = staticmethod(pack_incompatible)