/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.17.9 by Robert Collins
Initial stab at repository format support.
1
# groupcompress, a bzr plugin providing improved disk utilisation
2
# Copyright (C) 2008 Canonical Limited.
3
# 
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
7
# 
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
# 
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
16
# 
17
18
"""Repostory formats using B+Tree indices and groupcompress compression."""
19
20
import md5
21
import time
22
23
from bzrlib import debug, errors, pack, repository
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
24
from bzrlib.btree_index import (
25
    BTreeBuilder,
26
    BTreeGraphIndex,
27
    )
0.17.9 by Robert Collins
Initial stab at repository format support.
28
from bzrlib.index import GraphIndex, GraphIndexBuilder
29
from bzrlib.repository import InterPackRepo
30
from bzrlib.plugins.groupcompress.groupcompress import (
31
    _GCGraphIndex,
32
    GroupCompressVersionedFiles,
33
    )
34
from bzrlib.osutils import rand_chars
35
from bzrlib.repofmt.pack_repo import (
36
    Pack,
37
    NewPack,
38
    KnitPackRepository,
39
    RepositoryPackCollection,
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
40
    RepositoryFormatPackDevelopment2,
41
    RepositoryFormatPackDevelopment2Subtree,
0.17.9 by Robert Collins
Initial stab at repository format support.
42
    RepositoryFormatKnitPack1,
43
    RepositoryFormatKnitPack3,
44
    RepositoryFormatKnitPack4,
45
    Packer,
46
    ReconcilePacker,
47
    OptimisingPacker,
48
    )
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
49
try:
50
    from bzrlib.repofmt.pack_repo import (
0.17.26 by Robert Collins
Working better --gc-plain-chk.
51
    CHKInventoryRepository,
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
52
    RepositoryFormatPackDevelopment5,
53
    RepositoryFormatPackDevelopment5Hash16,
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
54
    )
55
    chk_support = True
56
except ImportError:
57
    chk_support = False
0.17.9 by Robert Collins
Initial stab at repository format support.
58
from bzrlib import ui
59
60
61
def open_pack(self):
0.17.22 by Robert Collins
really get gc working with 1.10
62
    return self._pack_collection.pack_factory(self._pack_collection,
63
        upload_suffix=self.suffix,
0.17.9 by Robert Collins
Initial stab at repository format support.
64
        file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
65
66
67
Packer.open_pack = open_pack
68
69
70
class GCPack(NewPack):
71
0.17.22 by Robert Collins
really get gc working with 1.10
72
    def __init__(self, pack_collection, upload_suffix='', file_mode=None):
0.17.9 by Robert Collins
Initial stab at repository format support.
73
        """Create a NewPack instance.
74
75
        :param upload_transport: A writable transport for the pack to be
76
            incrementally uploaded to.
77
        :param index_transport: A writable transport for the pack's indices to
78
            be written to when the pack is finished.
79
        :param pack_transport: A writable transport for the pack to be renamed
80
            to when the upload is complete. This *must* be the same as
81
            upload_transport.clone('../packs').
82
        :param upload_suffix: An optional suffix to be given to any temporary
83
            files created during the pack creation. e.g '.autopack'
84
        :param file_mode: An optional file mode to create the new files with.
85
        """
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
86
        # replaced from bzr.dev to:
87
        # - change inventory reference list length to 1
88
        # - change texts reference lists to 1
89
        # TODO: patch this to be parameterised upstream
90
        
0.17.9 by Robert Collins
Initial stab at repository format support.
91
        # The relative locations of the packs are constrained, but all are
92
        # passed in because the caller has them, so as to avoid object churn.
0.17.22 by Robert Collins
really get gc working with 1.10
93
        index_builder_class = pack_collection._index_builder_class
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
94
        if chk_support:
95
            # from brisbane-core
96
            if pack_collection.chk_index is not None:
97
                chk_index = index_builder_class(reference_lists=0)
98
            else:
99
                chk_index = None
100
            Pack.__init__(self,
101
                # Revisions: parents list, no text compression.
102
                index_builder_class(reference_lists=1),
103
                # Inventory: We want to map compression only, but currently the
104
                # knit code hasn't been updated enough to understand that, so we
105
                # have a regular 2-list index giving parents and compression
106
                # source.
107
                index_builder_class(reference_lists=1),
108
                # Texts: compression and per file graph, for all fileids - so two
109
                # reference lists and two elements in the key tuple.
110
                index_builder_class(reference_lists=1, key_elements=2),
111
                # Signatures: Just blobs to store, no compression, no parents
112
                # listing.
113
                index_builder_class(reference_lists=0),
114
                # CHK based storage - just blobs, no compression or parents.
115
                chk_index=chk_index
116
                )
117
        else:
118
            # from bzr.dev
119
            Pack.__init__(self,
120
                # Revisions: parents list, no text compression.
121
                index_builder_class(reference_lists=1),
122
                # Inventory: compressed, with graph for compatibility with other
123
                # existing bzrlib code.
124
                index_builder_class(reference_lists=1),
125
                # Texts: per file graph:
126
                index_builder_class(reference_lists=1, key_elements=2),
127
                # Signatures: Just blobs to store, no compression, no parents
128
                # listing.
129
                index_builder_class(reference_lists=0),
130
                )
0.17.22 by Robert Collins
really get gc working with 1.10
131
        self._pack_collection = pack_collection
132
        # When we make readonly indices, we need this.
133
        self.index_class = pack_collection._index_class
0.17.9 by Robert Collins
Initial stab at repository format support.
134
        # where should the new pack be opened
0.17.22 by Robert Collins
really get gc working with 1.10
135
        self.upload_transport = pack_collection._upload_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
136
        # where are indices written out to
0.17.22 by Robert Collins
really get gc working with 1.10
137
        self.index_transport = pack_collection._index_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
138
        # where is the pack renamed to when it is finished?
0.17.22 by Robert Collins
really get gc working with 1.10
139
        self.pack_transport = pack_collection._pack_transport
0.17.9 by Robert Collins
Initial stab at repository format support.
140
        # What file mode to upload the pack and indices with.
141
        self._file_mode = file_mode
142
        # tracks the content written to the .pack file.
143
        self._hash = md5.new()
144
        # a four-tuple with the length in bytes of the indices, once the pack
145
        # is finalised. (rev, inv, text, sigs)
146
        self.index_sizes = None
147
        # How much data to cache when writing packs. Note that this is not
148
        # synchronised with reads, because it's not in the transport layer, so
149
        # is not safe unless the client knows it won't be reading from the pack
150
        # under creation.
151
        self._cache_limit = 0
152
        # the temporary pack file name.
153
        self.random_name = rand_chars(20) + upload_suffix
154
        # when was this pack started ?
155
        self.start_time = time.time()
156
        # open an output stream for the data added to the pack.
157
        self.write_stream = self.upload_transport.open_write_stream(
158
            self.random_name, mode=self._file_mode)
159
        if 'pack' in debug.debug_flags:
160
            mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
161
                time.ctime(), self.upload_transport.base, self.random_name,
162
                time.time() - self.start_time)
163
        # A list of byte sequences to be written to the new pack, and the 
164
        # aggregate size of them.  Stored as a list rather than separate 
165
        # variables so that the _write_data closure below can update them.
166
        self._buffer = [[], 0]
167
        # create a callable for adding data 
168
        #
169
        # robertc says- this is a closure rather than a method on the object
170
        # so that the variables are locals, and faster than accessing object
171
        # members.
172
        def _write_data(bytes, flush=False, _buffer=self._buffer,
173
            _write=self.write_stream.write, _update=self._hash.update):
174
            _buffer[0].append(bytes)
175
            _buffer[1] += len(bytes)
176
            # buffer cap
177
            if _buffer[1] > self._cache_limit or flush:
178
                bytes = ''.join(_buffer[0])
179
                _write(bytes)
180
                _update(bytes)
181
                _buffer[:] = [[], 0]
182
        # expose this on self, for the occasion when clients want to add data.
183
        self._write_data = _write_data
184
        # a pack writer object to serialise pack records.
185
        self._writer = pack.ContainerWriter(self._write_data)
186
        self._writer.begin()
187
        # what state is the pack in? (open, finished, aborted)
188
        self._state = 'open'
189
190
191
RepositoryPackCollection.pack_factory = NewPack
192
193
class GCRepositoryPackCollection(RepositoryPackCollection):
194
195
    pack_factory = GCPack
196
197
    def _make_index(self, name, suffix):
198
        """Overridden to use BTreeGraphIndex objects."""
199
        size_offset = self._suffix_offsets[suffix]
200
        index_name = name + suffix
201
        index_size = self._names[name][size_offset]
202
        return BTreeGraphIndex(
203
            self._index_transport, index_name, index_size)
204
205
    def _start_write_group(self):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
206
        # Overridden to add 'self.pack_factory()'
0.17.9 by Robert Collins
Initial stab at repository format support.
207
        # Do not permit preparation for writing if we're not in a 'write lock'.
208
        if not self.repo.is_write_locked():
209
            raise errors.NotWriteLocked(self)
0.17.22 by Robert Collins
really get gc working with 1.10
210
        self._new_pack = self.pack_factory(self, upload_suffix='.pack',
0.17.9 by Robert Collins
Initial stab at repository format support.
211
            file_mode=self.repo.bzrdir._get_file_mode())
212
        # allow writing: queue writes to a new index
213
        self.revision_index.add_writable_index(self._new_pack.revision_index,
214
            self._new_pack)
215
        self.inventory_index.add_writable_index(self._new_pack.inventory_index,
216
            self._new_pack)
217
        self.text_index.add_writable_index(self._new_pack.text_index,
218
            self._new_pack)
219
        self.signature_index.add_writable_index(self._new_pack.signature_index,
220
            self._new_pack)
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
221
        if chk_support and self.chk_index is not None:
222
            self.chk_index.add_writable_index(self._new_pack.chk_index,
223
                self._new_pack)
224
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
0.17.9 by Robert Collins
Initial stab at repository format support.
225
226
        self.repo.inventories._index._add_callback = self.inventory_index.add_callback
227
        self.repo.revisions._index._add_callback = self.revision_index.add_callback
228
        self.repo.signatures._index._add_callback = self.signature_index.add_callback
229
        self.repo.texts._index._add_callback = self.text_index.add_callback
230
231
232
233
class GCPackRepository(KnitPackRepository):
234
    """GC customisation of KnitPackRepository."""
235
236
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
237
        _serializer):
238
        """Overridden to change pack collection class."""
239
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
240
            _commit_builder_class, _serializer)
241
        # and now replace everything it did :)
242
        index_transport = self._transport.clone('indices')
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
243
        if chk_support:
244
            self._pack_collection = GCRepositoryPackCollection(self,
245
                self._transport, index_transport,
246
                self._transport.clone('upload'),
247
                self._transport.clone('packs'),
248
                _format.index_builder_class,
249
                _format.index_class,
250
                use_chk_index=self._format.supports_chks,
251
                )
252
        else:
253
            self._pack_collection = GCRepositoryPackCollection(self,
254
                self._transport, index_transport,
255
                self._transport.clone('upload'),
256
                self._transport.clone('packs'),
257
                _format.index_builder_class,
258
                _format.index_class)
0.17.9 by Robert Collins
Initial stab at repository format support.
259
        self.inventories = GroupCompressVersionedFiles(
260
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
261
                add_callback=self._pack_collection.inventory_index.add_callback,
262
                parents=True, is_locked=self.is_locked),
263
            access=self._pack_collection.inventory_index.data_access)
264
        self.revisions = GroupCompressVersionedFiles(
265
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
266
                add_callback=self._pack_collection.revision_index.add_callback,
267
                parents=True, is_locked=self.is_locked),
268
            access=self._pack_collection.revision_index.data_access,
269
            delta=False)
270
        self.signatures = GroupCompressVersionedFiles(
271
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
272
                add_callback=self._pack_collection.signature_index.add_callback,
273
                parents=False, is_locked=self.is_locked),
274
            access=self._pack_collection.signature_index.data_access,
275
            delta=False)
276
        self.texts = GroupCompressVersionedFiles(
277
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
278
                add_callback=self._pack_collection.text_index.add_callback,
279
                parents=True, is_locked=self.is_locked),
280
            access=self._pack_collection.text_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
281
        if chk_support and _format.supports_chks:
282
            # No graph, no compression:- references from chks are between
283
            # different objects not temporal versions of the same; and without
284
            # some sort of temporal structure knit compression will just fail.
285
            self.chk_bytes = GroupCompressVersionedFiles(
286
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
287
                    add_callback=self._pack_collection.chk_index.add_callback,
288
                    parents=False, is_locked=self.is_locked),
289
                access=self._pack_collection.chk_index.data_access)
290
        else:
291
            self.chk_bytes = None
0.17.9 by Robert Collins
Initial stab at repository format support.
292
        # True when the repository object is 'write locked' (as opposed to the
293
        # physical lock only taken out around changes to the pack-names list.) 
294
        # Another way to represent this would be a decorator around the control
295
        # files object that presents logical locks as physical ones - if this
296
        # gets ugly consider that alternative design. RBC 20071011
297
        self._write_lock_count = 0
298
        self._transaction = None
299
        # for tests
300
        self._reconcile_does_inventory_gc = True
301
        self._reconcile_fixes_text_parents = True
302
        self._reconcile_backsup_inventory = False
303
304
0.17.26 by Robert Collins
Working better --gc-plain-chk.
305
if chk_support:
306
    class GCCHKPackRepository(CHKInventoryRepository):
307
        """GC customisation of CHKInventoryRepository."""
308
309
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
310
            _serializer):
311
            """Overridden to change pack collection class."""
312
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
313
                _commit_builder_class, _serializer)
314
            # and now replace everything it did :)
315
            index_transport = self._transport.clone('indices')
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
316
            self._pack_collection = GCRepositoryPackCollection(self,
317
                self._transport, index_transport,
318
                self._transport.clone('upload'),
319
                self._transport.clone('packs'),
320
                _format.index_builder_class,
321
                _format.index_class,
322
                use_chk_index=self._format.supports_chks,
323
                )
0.17.26 by Robert Collins
Working better --gc-plain-chk.
324
            self.inventories = GroupCompressVersionedFiles(
325
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
326
                    add_callback=self._pack_collection.inventory_index.add_callback,
327
                    parents=True, is_locked=self.is_locked),
328
                access=self._pack_collection.inventory_index.data_access)
329
            self.revisions = GroupCompressVersionedFiles(
330
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
331
                    add_callback=self._pack_collection.revision_index.add_callback,
332
                    parents=True, is_locked=self.is_locked),
333
                access=self._pack_collection.revision_index.data_access,
334
                delta=False)
335
            self.signatures = GroupCompressVersionedFiles(
336
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
337
                    add_callback=self._pack_collection.signature_index.add_callback,
338
                    parents=False, is_locked=self.is_locked),
339
                access=self._pack_collection.signature_index.data_access,
340
                delta=False)
341
            self.texts = GroupCompressVersionedFiles(
342
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
343
                    add_callback=self._pack_collection.text_index.add_callback,
344
                    parents=True, is_locked=self.is_locked),
345
                access=self._pack_collection.text_index.data_access)
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
346
            assert _format.supports_chks
347
            # No parents, individual CHK pages don't have specific ancestry
348
            self.chk_bytes = GroupCompressVersionedFiles(
349
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
350
                    add_callback=self._pack_collection.chk_index.add_callback,
351
                    parents=False, is_locked=self.is_locked),
352
                access=self._pack_collection.chk_index.data_access)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
353
            # True when the repository object is 'write locked' (as opposed to the
0.20.4 by John Arbash Meinel
Simplify the internals. We've already checked 'chk_support' so we don't need to check again.
354
            # physical lock only taken out around changes to the pack-names list.)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
355
            # Another way to represent this would be a decorator around the control
356
            # files object that presents logical locks as physical ones - if this
357
            # gets ugly consider that alternative design. RBC 20071011
358
            self._write_lock_count = 0
359
            self._transaction = None
360
            # for tests
361
            self._reconcile_does_inventory_gc = True
362
            self._reconcile_fixes_text_parents = True
363
            self._reconcile_backsup_inventory = False
364
365
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
366
class RepositoryFormatPackGCPlain(RepositoryFormatPackDevelopment2):
0.17.9 by Robert Collins
Initial stab at repository format support.
367
    """A B+Tree index using pack repository."""
368
369
    repository_class = GCPackRepository
370
371
    def get_format_string(self):
372
        """See RepositoryFormat.get_format_string()."""
373
        return ("Bazaar development format - btree+gc "
374
            "(needs bzr.dev from 1.6)\n")
375
376
    def get_format_description(self):
377
        """See RepositoryFormat.get_format_description()."""
378
        return ("Development repository format - btree+groupcompress "
379
            ", interoperates with pack-0.92\n")
380
381
382
class RepositoryFormatPackGCRichRoot(RepositoryFormatKnitPack4):
383
    """A B+Tree index using pack repository."""
384
385
    repository_class = GCPackRepository
386
387
    def get_format_string(self):
388
        """See RepositoryFormat.get_format_string()."""
389
        return ("Bazaar development format - btree+gc-rich-root "
390
            "(needs bzr.dev from 1.6)\n")
391
392
    def get_format_description(self):
393
        """See RepositoryFormat.get_format_description()."""
394
        return ("Development repository format - btree+groupcompress "
395
            ", interoperates with rich-root-pack\n")
396
397
0.17.21 by Robert Collins
Update groupcompress to bzrlib 1.10.
398
class RepositoryFormatPackGCSubtrees(RepositoryFormatPackDevelopment2Subtree):
0.17.9 by Robert Collins
Initial stab at repository format support.
399
    """A B+Tree index using pack repository."""
400
401
    repository_class = GCPackRepository
402
403
    def get_format_string(self):
404
        """See RepositoryFormat.get_format_string()."""
405
        return ("Bazaar development format - btree+gc-subtrees "
406
            "(needs bzr.dev from 1.6)\n")
407
408
    def get_format_description(self):
409
        """See RepositoryFormat.get_format_description()."""
410
        return ("Development repository format - btree+groupcompress "
411
            ", interoperates with pack-0.92-subtrees\n")
412
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
413
if chk_support:
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
414
    class RepositoryFormatPackGCPlainCHK(RepositoryFormatPackDevelopment5):
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
415
        """A CHK+group compress pack repository."""
416
0.17.26 by Robert Collins
Working better --gc-plain-chk.
417
        repository_class = GCCHKPackRepository
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
418
419
        def get_format_string(self):
420
            """See RepositoryFormat.get_format_string()."""
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
421
            return ('Bazaar development format - chk+gc'
422
                    ' (needs bzr.dev from 1.13)\n')
423
424
        def get_format_description(self):
425
            """See RepositoryFormat.get_format_description()."""
426
            return ("Development repository format - chk+groupcompress")
427
0.21.2 by John Arbash Meinel
Bring in the trunk simplifications.
428
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
429
    class RepositoryFormatPackGCPlainCHK16(RepositoryFormatPackDevelopment5Hash16):
430
        """A hashed CHK+group compress pack repository."""
431
432
        repository_class = GCCHKPackRepository
433
434
        def get_format_string(self):
435
            """See RepositoryFormat.get_format_string()."""
436
            return ('Bazaar development format - hash16chk+gc'
437
                    ' (needs bzr.dev from 1.13)\n')
438
439
        def get_format_description(self):
440
            """See RepositoryFormat.get_format_description()."""
441
            return ("Development repository format - hash16chk+groupcompress")
0.17.25 by Robert Collins
Preliminary --gc-plain-chk support.
442
443
0.17.9 by Robert Collins
Initial stab at repository format support.
444
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
0.17.26 by Robert Collins
Working better --gc-plain-chk.
445
    """Be incompatible with the regular fetch code."""
0.17.9 by Robert Collins
Initial stab at repository format support.
446
    formats = (RepositoryFormatPackGCPlain, RepositoryFormatPackGCRichRoot,
447
        RepositoryFormatPackGCSubtrees)
0.17.26 by Robert Collins
Working better --gc-plain-chk.
448
    if chk_support:
0.21.1 by John Arbash Meinel
Start basing the groupcompress chk formats on the dev5 formats.
449
        formats = formats + (RepositoryFormatPackGCPlainCHK,
450
                             RepositoryFormatPackGCPlainCHK16)
0.17.10 by Robert Collins
Correct optimiser disabling.
451
    if isinstance(source._format, formats) or isinstance(target._format, formats):
0.17.9 by Robert Collins
Initial stab at repository format support.
452
        return False
453
    else:
454
        return orig_method(source, target)
455
456
457
InterPackRepo.is_compatible = staticmethod(pack_incompatible)