/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.9 by Robert Collins
Initial stab at repository format support.
1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
19
20
import md5
21
import time
22
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
23
from bzrlib import (
24
    debug,
25
    errors,
26
    knit,
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
27
    inventory,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
28
    pack,
29
    repository,
0.23.28 by John Arbash Meinel
Gotta import 'trace' if you want to use trace.mutter()
30
    trace,
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
31
    ui,
32
    )
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
33
from bzrlib.btree_index import (
34
    BTreeBuilder,
35
    BTreeGraphIndex,
36
    )
0.17.9 by Robert Collins
Initial stab at repository format support.
37
from bzrlib.index import GraphIndex, GraphIndexBuilder
38
from bzrlib.repository import InterPackRepo
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
39
from bzrlib.plugins.groupcompress_rabin.groupcompress import (
0.17.9 by Robert Collins
Initial stab at repository format support.
40
    _GCGraphIndex,
41
    GroupCompressVersionedFiles,
42
    )
43
from bzrlib.osutils import rand_chars
44
from bzrlib.repofmt.pack_repo import (
45
    Pack,
46
    NewPack,
47
    KnitPackRepository,
48
    RepositoryPackCollection,
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
49
    RepositoryFormatPackDevelopment2,
50
    RepositoryFormatPackDevelopment2Subtree,
0.17.9 by Robert Collins
Initial stab at repository format support.
51
    RepositoryFormatKnitPack1,
52
    RepositoryFormatKnitPack3,
53
    RepositoryFormatKnitPack4,
54
    Packer,
55
    ReconcilePacker,
56
    OptimisingPacker,
57
    )
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
58
try:
59
    from bzrlib.repofmt.pack_repo import (
0.17.26 by Robert Collins
Working better --gc-plain-chk.
60
    CHKInventoryRepository,
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
61
    RepositoryFormatPackDevelopment5,
62
    RepositoryFormatPackDevelopment5Hash16,
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
63
##    RepositoryFormatPackDevelopment5Hash16b,
64
##    RepositoryFormatPackDevelopment5Hash63,
65
##    RepositoryFormatPackDevelopment5Hash127a,
66
##    RepositoryFormatPackDevelopment5Hash127b,
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
67
    RepositoryFormatPackDevelopment5Hash255,
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
68
    )
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
69
    from bzrlib import chk_map
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
70
    chk_support = True
71
except ImportError:
72
    chk_support = False
0.17.9 by Robert Collins
Initial stab at repository format support.
73
74
75
def open_pack(self):
0.17.22 by Robert Collins
really get gc working with 1.10
76
    return self._pack_collection.pack_factory(self._pack_collection,
77
        upload_suffix=self.suffix,
0.17.9 by Robert Collins
Initial stab at repository format support.
78
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
79
80
81
Packer.open_pack = open_pack
82
83
84
class GCPack(NewPack):
85
0.17.22 by Robert Collins
really get gc working with 1.10
86
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
0.17.9 by Robert Collins
Initial stab at repository format support.
87
        """Create a NewPack instance.
88
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
89
        :param pack_collection: A PackCollection into which this is being
90
            inserted.
0.17.9 by Robert Collins
Initial stab at repository format support.
91
        :param upload_suffix: An optional suffix to be given to any temporary
92
            files created during the pack creation. e.g '.autopack'
93
        :param file_mode: An optional file mode to create the new files with.
94
        """
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
95
        # replaced from bzr.dev to:
96
        # - change inventory reference list length to 1
97
        # - change texts reference lists to 1
98
        # TODO: patch this to be parameterised upstream
99
        
0.17.9 by Robert Collins
Initial stab at repository format support.
100
        # The relative locations of the packs are constrained, but all are
101
        # passed in because the caller has them, so as to avoid object churn.
0.17.22 by Robert Collins
really get gc working with 1.10
102
        index_builder_class = pack_collection._index_builder_class
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
103
        if chk_support:
104
            # from brisbane-core
105
            if pack_collection.chk_index is not None:
106
                chk_index = index_builder_class(reference_lists=0)
107
            else:
108
                chk_index = None
109
            Pack.__init__(self,
110
                # Revisions: parents list, no text compression.
111
                index_builder_class(reference_lists=1),
112
                # Inventory: We want to map compression only, but currently the
113
                # knit code hasn't been updated enough to understand that, so we
114
                # have a regular 2-list index giving parents and compression
115
                # source.
116
                index_builder_class(reference_lists=1),
117
                # Texts: compression and per file graph, for all fileids - so two
118
                # reference lists and two elements in the key tuple.
119
                index_builder_class(reference_lists=1, key_elements=2),
120
                # Signatures: Just blobs to store, no compression, no parents
121
                # listing.
122
                index_builder_class(reference_lists=0),
123
                # CHK based storage - just blobs, no compression or parents.
124
                chk_index=chk_index
125
                )
126
        else:
127
            # from bzr.dev
128
            Pack.__init__(self,
129
                # Revisions: parents list, no text compression.
130
                index_builder_class(reference_lists=1),
131
                # Inventory: compressed, with graph for compatibility with other
132
                # existing bzrlib code.
133
                index_builder_class(reference_lists=1),
134
                # Texts: per file graph:
135
                index_builder_class(reference_lists=1, key_elements=2),
136
                # Signatures: Just blobs to store, no compression, no parents
137
                # listing.
138
                index_builder_class(reference_lists=0),
139
                )
0.17.22 by Robert Collins
really get gc working with 1.10
140
        self._pack_collection = pack_collection
141
        # When we make readonly indices, we need this.
142
        self.index_class = pack_collection._index_class
0.17.9 by Robert Collins
Initial stab at repository format support.
143
        # where should the new pack be opened
0.17.22 by Robert Collins
really get gc working with 1.10
144
        self.upload_transport = pack_collection._upload_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
145
        # where are indices written out to
0.17.22 by Robert Collins
really get gc working with 1.10
146
        self.index_transport = pack_collection._index_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
147
        # where is the pack renamed to when it is finished?
0.17.22 by Robert Collins
really get gc working with 1.10
148
        self.pack_transport = pack_collection._pack_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
149
        # What file mode to upload the pack and indices with.
150
        self._file_mode = file_mode
151
        # tracks the content written to the .pack file.
152
        self._hash = md5.new()
153
        # a four-tuple with the length in bytes of the indices, once the pack
154
        # is finalised. (rev, inv, text, sigs)
155
        self.index_sizes = None
156
        # How much data to cache when writing packs. Note that this is not
157
        # synchronised with reads, because it's not in the transport layer, so
158
        # is not safe unless the client knows it won't be reading from the pack
159
        # under creation.
160
        self._cache_limit = 0
161
        # the temporary pack file name.
162
        self.random_name = rand_chars(20) + upload_suffix
163
        # when was this pack started ?
164
        self.start_time = time.time()
165
        # open an output stream for the data added to the pack.
166
        self.write_stream = self.upload_transport.open_write_stream(
167
            self.random_name, mode=self._file_mode)
168
        if 'pack' in debug.debug_flags:
0.23.30 by John Arbash Meinel
Merge in Ian's groupcompress trunk updates
169
            trace.mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
0.17.9 by Robert Collins
Initial stab at repository format support.
170
                time.ctime(), self.upload_transport.base, self.random_name,
171
                time.time() - self.start_time)
172
        # A list of byte sequences to be written to the new pack, and the 
173
        # aggregate size of them.  Stored as a list rather than separate 
174
        # variables so that the _write_data closure below can update them.
175
        self._buffer = [[], 0]
176
        # create a callable for adding data 
177
        #
178
        # robertc says- this is a closure rather than a method on the object
179
        # so that the variables are locals, and faster than accessing object
180
        # members.
181
        def _write_data(bytes, flush=False, _buffer=self._buffer,
182
            _write=self.write_stream.write, _update=self._hash.update):
183
            _buffer[0].append(bytes)
184
            _buffer[1] += len(bytes)
185
            # buffer cap
186
            if _buffer[1] > self._cache_limit or flush:
187
                bytes = ''.join(_buffer[0])
188
                _write(bytes)
189
                _update(bytes)
190
                _buffer[:] = [[], 0]
191
        # expose this on self, for the occasion when clients want to add data.
192
        self._write_data = _write_data
193
        # a pack writer object to serialise pack records.
194
        self._writer = pack.ContainerWriter(self._write_data)
195
        self._writer.begin()
196
        # what state is the pack in? (open, finished, aborted)
197
        self._state = 'open'
198
199
200
RepositoryPackCollection.pack_factory = NewPack
201
202
class GCRepositoryPackCollection(RepositoryPackCollection):
203
204
    pack_factory = GCPack
205
206
    def _make_index(self, name, suffix):
207
        """Overridden to use BTreeGraphIndex objects."""
208
        size_offset = self._suffix_offsets[suffix]
209
        index_name = name + suffix
210
        index_size = self._names[name][size_offset]
211
        return BTreeGraphIndex(
212
            self._index_transport, index_name, index_size)
213
214
    def _start_write_group(self):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
215
        # Overridden to add 'self.pack_factory()'
0.17.9 by Robert Collins
Initial stab at repository format support.
216
        # Do not permit preparation for writing if we're not in a 'write lock'.
217
        if not self.repo.is_write_locked():
218
            raise errors.NotWriteLocked(self)
0.17.22 by Robert Collins
really get gc working with 1.10
219
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
0.17.9 by Robert Collins
Initial stab at repository format support.
220
            file_mode=self.repo.bzrdir._get_file_mode())
221
        # allow writing: queue writes to a new index
222
        self.revision_index.add_writable_index(self._new_pack.revision_index,
223
            self._new_pack)
224
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
225
            self._new_pack)
226
        self.text_index.add_writable_index(self._new_pack.text_index,
227
            self._new_pack)
228
        self.signature_index.add_writable_index(self._new_pack.signature_index,
229
            self._new_pack)
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
230
        if chk_support and self.chk_index is not None:
231
            self.chk_index.add_writable_index(self._new_pack.chk_index,
232
                self._new_pack)
233
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
0.17.9 by Robert Collins
Initial stab at repository format support.
234
235
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
236
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
237
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
238
        self.repo.texts._index._add_callback = self.text_index.add_callback
239
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
240
    def _get_filtered_inv_stream(self, source_vf, keys):
241
        """Filter the texts of inventories, to find the chk pages."""
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
242
        id_roots = []
243
        p_id_roots = []
244
        id_roots_set = set()
245
        p_id_roots_set = set()
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
246
        def _filter_inv_stream(stream):
247
            for idx, record in enumerate(stream):
248
                ### child_pb.update('fetch inv', idx, len(inv_keys_to_fetch))
249
                bytes = record.get_bytes_as('fulltext')
250
                chk_inv = inventory.CHKInventory.deserialise(None, bytes, record.key)
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
251
                key = chk_inv.id_to_entry.key()
252
                if key not in id_roots_set:
253
                    id_roots.append(key)
254
                    id_roots_set.add(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
255
                p_id_map = chk_inv.parent_id_basename_to_file_id
256
                if p_id_map is not None:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
257
                    key = p_id_map.key()
258
                    if key not in p_id_roots_set:
259
                        p_id_roots_set.add(key)
260
                        p_id_roots.append(key)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
261
                yield record
262
        stream = source_vf.get_record_stream(keys, 'gc-optimal', True)
263
        return _filter_inv_stream(stream), id_roots, p_id_roots
264
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
265
    def _get_chk_stream(self, source_vf, keys, id_roots, p_id_roots, pb=None):
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
266
        # We want to stream the keys from 'id_roots', and things they
267
        # reference, and then stream things from p_id_roots and things they
268
        # reference, and then any remaining keys that we didn't get to.
269
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
270
        # We also group referenced texts together, so if one root references a
271
        # text with prefix 'a', and another root references a node with prefix
272
        # 'a', we want to yield those nodes before we yield the nodes for 'b'
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
273
        # This keeps 'similar' nodes together.
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
274
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
275
        # Note: We probably actually want multiple streams here, to help the
276
        #       client understand that the different levels won't compress well
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
277
        #       against each other.
0.20.27 by John Arbash Meinel
Update a Note/Todo
278
        #       Test the difference between using one Group per level, and
279
        #       using 1 Group per prefix. (so '' (root) would get a group, then
280
        #       all the references to search-key 'a' would get a group, etc.)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
281
        remaining_keys = set(keys)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
282
        counter = [0]
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
283
        def _get_referenced_stream(root_keys):
284
            cur_keys = root_keys
285
            while cur_keys:
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
286
                keys_by_search_prefix = {}
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
287
                remaining_keys.difference_update(cur_keys)
288
                next_keys = set()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
289
                stream = source_vf.get_record_stream(cur_keys, 'as-requested',
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
290
                                                     True)
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
291
                def next_stream():
292
                    for record in stream:
293
                        bytes = record.get_bytes_as('fulltext')
294
                        # We don't care about search_key_func for this code,
295
                        # because we only care about external references.
296
                        node = chk_map._deserialise(bytes, record.key,
297
                                                    search_key_func=None)
298
                        common_base = node._search_prefix
299
                        if isinstance(node, chk_map.InternalNode):
300
                            for prefix, value in node._items.iteritems():
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
301
                                if not isinstance(value, tuple):
302
                                    raise AssertionError("value is %s when"
303
                                        " tuple expected" % (value.__class__))
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
304
                                if value not in next_keys:
305
                                    keys_by_search_prefix.setdefault(prefix,
306
                                        []).append(value)
307
                                    next_keys.add(value)
308
                        counter[0] += 1
309
                        if pb is not None:
310
                            pb.update('chk node', counter[0])
311
                        yield record
312
                yield next_stream()
0.22.6 by John Arbash Meinel
Clustering chk pages properly makes a big difference.
313
                # Double check that we won't be emitting any keys twice
314
                next_keys = next_keys.intersection(remaining_keys)
315
                cur_keys = []
316
                for prefix in sorted(keys_by_search_prefix):
317
                    cur_keys.extend(keys_by_search_prefix[prefix])
0.20.26 by John Arbash Meinel
Try even harder, now with even *more* streams.
318
        for stream in _get_referenced_stream(id_roots):
319
            yield stream
320
        for stream in _get_referenced_stream(p_id_roots):
321
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
322
        if remaining_keys:
0.23.30 by John Arbash Meinel
Merge in Ian's groupcompress trunk updates
323
            trace.note('There were %d keys in the chk index, which were not'
324
                       ' referenced from inventories', len(remaining_keys))
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
325
            stream = source_vf.get_record_stream(remaining_keys, 'unordered',
326
                                                 True)
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
327
            yield stream
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
328
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
329
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
330
                                 reload_func=None):
331
        """Execute a series of pack operations.
332
333
        :param pack_operations: A list of [revision_count, packs_to_combine].
334
        :param _packer_class: The class of packer to use (default: Packer).
335
        :return: None.
336
        """
337
        for revision_count, packs in pack_operations:
338
            # we may have no-ops from the setup logic
339
            if len(packs) == 0:
340
                continue
341
            # Create a new temp VersionedFile instance based on these packs,
342
            # and then just fetch everything into the target
343
344
            to_copy = [('revision_index', 'revisions'),
345
                       ('inventory_index', 'inventories'),
346
                       ('text_index', 'texts'),
347
                       ('signature_index', 'signatures'),
348
                      ]
0.22.3 by John Arbash Meinel
Play with some experimental alternate hashes, comment them out for now.
349
            # TODO: This is a very non-optimal ordering for chk_bytes. The
350
            #       issue is that pages that are similar are not transmitted
351
            #       together. Perhaps get_record_stream('gc-optimal') should be
352
            #       taught about how to group chk pages?
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
353
            has_chk = False
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
354
            if getattr(self, 'chk_index', None) is not None:
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
355
                has_chk = True
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
356
                to_copy.insert(2, ('chk_index', 'chk_bytes'))
357
358
            # Shouldn't we start_write_group around this?
359
            if self._new_pack is not None:
360
                raise errors.BzrError('call to %s.pack() while another pack is'
361
                                      ' being written.'
362
                                      % (self.__class__.__name__,))
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
363
            new_pack = self.pack_factory(self, 'autopack',
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
364
                file_mode=self.repo.bzrdir._get_file_mode())
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
365
            new_pack.set_write_cache_size(1024*1024)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
366
            # TODO: A better alternative is to probably use Packer.open_pack(), and
367
            #       then create a GroupCompressVersionedFiles() around the
368
            #       target pack to insert into.
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
369
            pb = ui.ui_factory.nested_progress_bar()
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
370
            try:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
371
                for idx, (index_name, vf_name) in enumerate(to_copy):
372
                    pb.update('repacking %s' % (vf_name,), idx + 1, len(to_copy))
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
373
                    keys = set()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
374
                    new_index = getattr(new_pack, index_name)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
375
                    new_index.set_optimize(for_size=True)
376
                    for pack in packs:
377
                        source_index = getattr(pack, index_name)
378
                        keys.update(e[1] for e in source_index.iter_all_entries())
0.23.22 by John Arbash Meinel
Add a mutter() while repacking, so that we log progress as we go along.
379
                    trace.mutter('repacking %s with %d keys',
380
                                 vf_name, len(keys))
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
381
                    source_vf = getattr(self.repo, vf_name)
382
                    target_access = knit._DirectPackAccess({})
383
                    target_access.set_writer(new_pack._writer, new_index,
384
                                             new_pack.access_tuple())
385
                    target_vf = GroupCompressVersionedFiles(
386
                        _GCGraphIndex(new_index,
387
                                      add_callback=new_index.add_nodes,
388
                                      parents=source_vf._index._parents,
389
                                      is_locked=self.repo.is_locked),
390
                        access=target_access,
391
                        delta=source_vf._delta)
0.22.5 by John Arbash Meinel
Try a different method of streaming the chk pages.
392
                    stream = None
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
393
                    child_pb = ui.ui_factory.nested_progress_bar()
394
                    try:
395
                        if has_chk:
396
                            if vf_name == 'inventories':
397
                                stream, id_roots, p_id_roots = self._get_filtered_inv_stream(
398
                                    source_vf, keys)
399
                            elif vf_name == 'chk_bytes':
0.20.25 by John Arbash Meinel
As expected, splitting things up into streams of streams
400
                                for stream in self._get_chk_stream(source_vf, keys,
401
                                                    id_roots, p_id_roots,
402
                                                    pb=child_pb):
403
                                    target_vf.insert_record_stream(stream)
404
                                # No more to copy
405
                                stream = []
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
406
                        if stream is None:
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
407
                            def pb_stream():
408
                                substream = source_vf.get_record_stream(keys, 'gc-optimal', True)
409
                                for idx, record in enumerate(substream):
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
410
                                    child_pb.update(vf_name, idx + 1, len(keys))
0.20.24 by John Arbash Meinel
Add a general progress indicator for other parts of copy.
411
                                    yield record
412
                            stream = pb_stream()
0.20.23 by John Arbash Meinel
Add a progress indicator for chk pages.
413
                        target_vf.insert_record_stream(stream)
414
                    finally:
415
                        child_pb.finished()
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
416
                new_pack._check_references() # shouldn't be needed
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
417
            except:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
418
                pb.finished()
419
                new_pack.abort()
420
                raise
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
421
            else:
0.20.19 by John Arbash Meinel
Groupcompress now supports 'autopack' and 'pack'.
422
                pb.finished()
423
                if not new_pack.data_inserted():
424
                    raise AssertionError('We copied from pack files,'
425
                                         ' but had no data copied')
426
                    # we need to abort somehow, because we don't want to remove
427
                    # the other packs
428
                new_pack.finish()
429
                self.allocate(new_pack)
0.22.1 by John Arbash Meinel
A first-cut at implementing an auto-pack by copying everything.
430
            for pack in packs:
431
                self._remove_pack_from_memory(pack)
432
        # record the newly available packs and stop advertising the old
433
        # packs
434
        self._save_pack_names(clear_obsolete_packs=True)
435
        # Move the old packs out of the way now they are no longer referenced.
436
        for revision_count, packs in pack_operations:
437
            self._obsolete_packs(packs)
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
438
0.17.9 by Robert Collins
Initial stab at repository format support.
439
440
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
441
class GCRPackRepository(KnitPackRepository):
0.17.9 by Robert Collins
Initial stab at repository format support.
442
    """GC customisation of KnitPackRepository."""
443
0.20.31 by Ian Clatworthy
add coment suggesting a simplification in repofmt.py
444
    # Note: I think the CHK support can be dropped from this class as it's
445
    # implemented via the GCCHKPackRepository class defined next. IGC 20090301
446
0.17.9 by Robert Collins
Initial stab at repository format support.
447
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
448
        _serializer):
449
        """Overridden to change pack collection class."""
450
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
451
            _commit_builder_class, _serializer)
452
        # and now replace everything it did :)
453
        index_transport = self._transport.clone('indices')
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
454
        if chk_support:
455
            self._pack_collection = GCRepositoryPackCollection(self,
456
                self._transport, index_transport,
457
                self._transport.clone('upload'),
458
                self._transport.clone('packs'),
459
                _format.index_builder_class,
460
                _format.index_class,
461
                use_chk_index=self._format.supports_chks,
462
                )
463
        else:
464
            self._pack_collection = GCRepositoryPackCollection(self,
465
                self._transport, index_transport,
466
                self._transport.clone('upload'),
467
                self._transport.clone('packs'),
468
                _format.index_builder_class,
469
                _format.index_class)
0.17.9 by Robert Collins
Initial stab at repository format support.
470
        self.inventories = GroupCompressVersionedFiles(
471
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
472
                add_callback=self._pack_collection.inventory_index.add_callback,
473
                parents=True, is_locked=self.is_locked),
474
            access=self._pack_collection.inventory_index.data_access)
475
        self.revisions = GroupCompressVersionedFiles(
476
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
477
                add_callback=self._pack_collection.revision_index.add_callback,
478
                parents=True, is_locked=self.is_locked),
479
            access=self._pack_collection.revision_index.data_access,
480
            delta=False)
481
        self.signatures = GroupCompressVersionedFiles(
482
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
483
                add_callback=self._pack_collection.signature_index.add_callback,
484
                parents=False, is_locked=self.is_locked),
485
            access=self._pack_collection.signature_index.data_access,
486
            delta=False)
487
        self.texts = GroupCompressVersionedFiles(
488
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
489
                add_callback=self._pack_collection.text_index.add_callback,
490
                parents=True, is_locked=self.is_locked),
491
            access=self._pack_collection.text_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
492
        if chk_support and _format.supports_chks:
493
            # No graph, no compression:- references from chks are between
494
            # different objects not temporal versions of the same; and without
495
            # some sort of temporal structure knit compression will just fail.
496
            self.chk_bytes = GroupCompressVersionedFiles(
497
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
498
                    add_callback=self._pack_collection.chk_index.add_callback,
499
                    parents=False, is_locked=self.is_locked),
500
                access=self._pack_collection.chk_index.data_access)
501
        else:
502
            self.chk_bytes = None
0.17.9 by Robert Collins
Initial stab at repository format support.
503
        # True when the repository object is 'write locked' (as opposed to the
504
        # physical lock only taken out around changes to the pack-names list.) 
505
        # Another way to represent this would be a decorator around the control
506
        # files object that presents logical locks as physical ones - if this
507
        # gets ugly consider that alternative design. RBC 20071011
508
        self._write_lock_count = 0
509
        self._transaction = None
510
        # for tests
511
        self._reconcile_does_inventory_gc = True
512
        self._reconcile_fixes_text_parents = True
513
        self._reconcile_backsup_inventory = False
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
514
        # Note: We cannot unpack a delta that references a text we haven't seen yet.
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
515
        #       There are 2 options, work in fulltexts, or require topological
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
516
        #       sorting. Using fulltexts is more optimal for local operations,
517
        #       because the source can be smart about extracting multiple
518
        #       in-a-row (and sharing strings). Topological is better for
519
        #       remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
520
        self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
521
        self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
522
        self._fetch_uses_deltas = False
0.17.9 by Robert Collins
Initial stab at repository format support.
523
524
0.17.26 by Robert Collins
Working better --gc-plain-chk.
525
if chk_support:
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
526
    class GCRCHKPackRepository(CHKInventoryRepository):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
527
        """GC customisation of CHKInventoryRepository."""
528
529
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
530
            _serializer):
531
            """Overridden to change pack collection class."""
532
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
533
                _commit_builder_class, _serializer)
534
            # and now replace everything it did :)
535
            index_transport = self._transport.clone('indices')
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
536
            self._pack_collection = GCRepositoryPackCollection(self,
537
                self._transport, index_transport,
538
                self._transport.clone('upload'),
539
                self._transport.clone('packs'),
540
                _format.index_builder_class,
541
                _format.index_class,
542
                use_chk_index=self._format.supports_chks,
543
                )
0.17.26 by Robert Collins
Working better --gc-plain-chk.
544
            self.inventories = GroupCompressVersionedFiles(
545
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
546
                    add_callback=self._pack_collection.inventory_index.add_callback,
547
                    parents=True, is_locked=self.is_locked),
548
                access=self._pack_collection.inventory_index.data_access)
549
            self.revisions = GroupCompressVersionedFiles(
550
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
551
                    add_callback=self._pack_collection.revision_index.add_callback,
552
                    parents=True, is_locked=self.is_locked),
553
                access=self._pack_collection.revision_index.data_access,
554
                delta=False)
555
            self.signatures = GroupCompressVersionedFiles(
556
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
557
                    add_callback=self._pack_collection.signature_index.add_callback,
558
                    parents=False, is_locked=self.is_locked),
559
                access=self._pack_collection.signature_index.data_access,
560
                delta=False)
561
            self.texts = GroupCompressVersionedFiles(
562
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
563
                    add_callback=self._pack_collection.text_index.add_callback,
564
                    parents=True, is_locked=self.is_locked),
565
                access=self._pack_collection.text_index.data_access)
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
566
            assert _format.supports_chks
567
            # No parents, individual CHK pages don't have specific ancestry
568
            self.chk_bytes = GroupCompressVersionedFiles(
569
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
570
                    add_callback=self._pack_collection.chk_index.add_callback,
571
                    parents=False, is_locked=self.is_locked),
572
                access=self._pack_collection.chk_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
573
            # True when the repository object is 'write locked' (as opposed to the
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
574
            # physical lock only taken out around changes to the pack-names list.)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
575
            # Another way to represent this would be a decorator around the control
576
            # files object that presents logical locks as physical ones - if this
577
            # gets ugly consider that alternative design. RBC 20071011
578
            self._write_lock_count = 0
579
            self._transaction = None
580
            # for tests
581
            self._reconcile_does_inventory_gc = True
582
            self._reconcile_fixes_text_parents = True
583
            self._reconcile_backsup_inventory = False
0.20.30 by Ian Clatworthy
repofmt.py code cleanups
584
            # Note: We cannot unpack a delta that references a text we haven't
585
            # seen yet. There are 2 options, work in fulltexts, or require
586
            # topological sorting. Using fulltexts is more optimal for local
587
            # operations, because the source can be smart about extracting
588
            # multiple in-a-row (and sharing strings). Topological is better
589
            # for remote, because we access less data.
0.20.20 by John Arbash Meinel
Setting _fetch_order='topological' gives sub-optimal ordering for gc=>gc fetches.
590
            self._fetch_order = 'unordered'
0.20.11 by John Arbash Meinel
start experimenting with gc-optimal ordering.
591
            self._fetch_gc_optimal = True
0.20.7 by John Arbash Meinel
(ugly hack) autopacking doesn't work, so don't do it.
592
            self._fetch_uses_deltas = False
0.17.26 by Robert Collins
Working better --gc-plain-chk.
593
594
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
595
class RepositoryFormatPackGCRabin(RepositoryFormatPackDevelopment2):
0.17.9 by Robert Collins
Initial stab at repository format support.
596
    """A B+Tree index using pack repository."""
597
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
598
    repository_class = GCRPackRepository
0.17.9 by Robert Collins
Initial stab at repository format support.
599
600
    def get_format_string(self):
601
        """See RepositoryFormat.get_format_string()."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
602
        return ("Bazaar development format - btree+gcr "
603
            "(needs bzr.dev from 1.13)\n")
0.17.9 by Robert Collins
Initial stab at repository format support.
604
605
    def get_format_description(self):
606
        """See RepositoryFormat.get_format_description()."""
607
        return ("Development repository format - btree+groupcompress "
608
            ", interoperates with pack-0.92\n")
609
610
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
611
if chk_support:
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
612
    class RepositoryFormatPackGCRabinCHK16(RepositoryFormatPackDevelopment5Hash16):
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
613
        """A hashed CHK+group compress pack repository."""
614
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
615
        repository_class = GCRCHKPackRepository
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
616
617
        def get_format_string(self):
618
            """See RepositoryFormat.get_format_string()."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
619
            return ('Bazaar development format - hash16chk+gcr'
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
620
                    ' (needs bzr.dev from 1.13)\n')
621
622
        def get_format_description(self):
623
            """See RepositoryFormat.get_format_description()."""
624
            return ("Development repository format - hash16chk+groupcompress")
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
625
626
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
627
    class RepositoryFormatPackGCRabinCHK255(RepositoryFormatPackDevelopment5Hash255):
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
628
        """A hashed CHK+group compress pack repository."""
629
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
630
        repository_class = GCRCHKPackRepository
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
631
632
        def get_format_string(self):
633
            """See RepositoryFormat.get_format_string()."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
634
            return ('Bazaar development format - hash255chk+gcr'
0.21.3 by John Arbash Meinel
Start putting together a GroupCompress format that is built on dev5
635
                    ' (needs bzr.dev from 1.13)\n')
636
637
        def get_format_description(self):
638
            """See RepositoryFormat.get_format_description()."""
639
            return ("Development repository format - hash255chk+groupcompress")
640
641
0.23.35 by John Arbash Meinel
Add a rich-root compatible gcr+chk255+rich-root format.
642
    class RepositoryFormatPackGCRabinRichRootCHK255(RepositoryFormatPackGCRabinCHK255):
643
        """A hashed CHK+group compress pack repository."""
644
645
        rich_root_data = True
646
        repository_class = GCRCHKPackRepository
647
648
        def get_format_string(self):
649
            """See RepositoryFormat.get_format_string()."""
650
            return ('Bazaar development format - hash255chk+gcr rich-root'
651
                    ' (needs bzr.dev from 1.13)\n')
652
653
        def get_format_description(self):
654
            """See RepositoryFormat.get_format_description()."""
655
            return ("Development repository format - hash255chk+groupcompress+rr")
656
657
0.17.9 by Robert Collins
Initial stab at repository format support.
658
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
659
    """Be incompatible with the regular fetch code."""
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
660
    formats = (RepositoryFormatPackGCRabin,)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
661
    if chk_support:
0.23.1 by John Arbash Meinel
Start a quick experimentation with a different 'diff' algorithm.
662
        formats = formats + (RepositoryFormatPackGCRabinCHK16,
663
                             RepositoryFormatPackGCRabinCHK255)
0.17.10 by Robert Collins
Correct optimiser disabling.
664
    if isinstance(source._format, formats) or isinstance(target._format, formats):
0.17.9 by Robert Collins
Initial stab at repository format support.
665
        return False
666
    else:
667
        return orig_method(source, target)
668
669
670
InterPackRepo.is_compatible = staticmethod(pack_incompatible)