1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from bzrlib.lazy_import import lazy_import
18
lazy_import(globals(), """
19
from itertools import izip
33
from bzrlib.index import (
37
GraphIndexPrefixAdapter,
40
from bzrlib.knit import (
46
from bzrlib import tsort
56
from bzrlib.decorators import needs_write_lock
57
from bzrlib.btree_index import (
61
from bzrlib.index import (
65
from bzrlib.repofmt.knitrepo import KnitRepository
66
from bzrlib.repository import (
68
MetaDirRepositoryFormat,
72
import bzrlib.revision as _mod_revision
73
from bzrlib.trace import (
79
class PackCommitBuilder(CommitBuilder):
80
"""A subclass of CommitBuilder to add texts with pack semantics.
82
Specifically this uses one knit object rather than one knit object per
83
added text, reducing memory and object pressure.
86
def __init__(self, repository, parents, config, timestamp=None,
87
timezone=None, committer=None, revprops=None,
89
CommitBuilder.__init__(self, repository, parents, config,
90
timestamp=timestamp, timezone=timezone, committer=committer,
91
revprops=revprops, revision_id=revision_id)
92
self._file_graph = graph.Graph(
93
repository._pack_collection.text_index.combined_index)
95
def _heads(self, file_id, revision_ids):
96
keys = [(file_id, revision_id) for revision_id in revision_ids]
97
return set([key[1] for key in self._file_graph.heads(keys)])
100
class PackRootCommitBuilder(RootCommitBuilder):
101
"""A subclass of RootCommitBuilder to add texts with pack semantics.
103
Specifically this uses one knit object rather than one knit object per
104
added text, reducing memory and object pressure.
107
def __init__(self, repository, parents, config, timestamp=None,
108
timezone=None, committer=None, revprops=None,
110
CommitBuilder.__init__(self, repository, parents, config,
111
timestamp=timestamp, timezone=timezone, committer=committer,
112
revprops=revprops, revision_id=revision_id)
113
self._file_graph = graph.Graph(
114
repository._pack_collection.text_index.combined_index)
116
def _heads(self, file_id, revision_ids):
117
keys = [(file_id, revision_id) for revision_id in revision_ids]
118
return set([key[1] for key in self._file_graph.heads(keys)])
122
"""An in memory proxy for a pack and its indices.
124
This is a base class that is not directly used, instead the classes
125
ExistingPack and NewPack are used.
128
def __init__(self, revision_index, inventory_index, text_index,
130
"""Create a pack instance.
132
:param revision_index: A GraphIndex for determining what revisions are
133
present in the Pack and accessing the locations of their texts.
134
:param inventory_index: A GraphIndex for determining what inventories are
135
present in the Pack and accessing the locations of their
137
:param text_index: A GraphIndex for determining what file texts
138
are present in the pack and accessing the locations of their
139
texts/deltas (via (fileid, revisionid) tuples).
140
:param signature_index: A GraphIndex for determining what signatures are
141
present in the Pack and accessing the locations of their texts.
143
self.revision_index = revision_index
144
self.inventory_index = inventory_index
145
self.text_index = text_index
146
self.signature_index = signature_index
148
def access_tuple(self):
149
"""Return a tuple (transport, name) for the pack content."""
150
return self.pack_transport, self.file_name()
153
"""Get the file name for the pack on disk."""
154
return self.name + '.pack'
156
def get_revision_count(self):
157
return self.revision_index.key_count()
159
def inventory_index_name(self, name):
160
"""The inv index is the name + .iix."""
161
return self.index_name('inventory', name)
163
def revision_index_name(self, name):
164
"""The revision index is the name + .rix."""
165
return self.index_name('revision', name)
167
def signature_index_name(self, name):
168
"""The signature index is the name + .six."""
169
return self.index_name('signature', name)
171
def text_index_name(self, name):
172
"""The text index is the name + .tix."""
173
return self.index_name('text', name)
176
class ExistingPack(Pack):
177
"""An in memory proxy for an existing .pack and its disk indices."""
179
def __init__(self, pack_transport, name, revision_index, inventory_index,
180
text_index, signature_index):
181
"""Create an ExistingPack object.
183
:param pack_transport: The transport where the pack file resides.
184
:param name: The name of the pack on disk in the pack_transport.
186
Pack.__init__(self, revision_index, inventory_index, text_index,
189
self.pack_transport = pack_transport
190
if None in (revision_index, inventory_index, text_index,
191
signature_index, name, pack_transport):
192
raise AssertionError()
194
def __eq__(self, other):
195
return self.__dict__ == other.__dict__
197
def __ne__(self, other):
198
return not self.__eq__(other)
201
return "<bzrlib.repofmt.pack_repo.Pack object at 0x%x, %s, %s" % (
202
id(self), self.pack_transport, self.name)
206
"""An in memory proxy for a pack which is being created."""
208
# A map of index 'type' to the file extension and position in the
210
index_definitions = {
211
'revision': ('.rix', 0),
212
'inventory': ('.iix', 1),
214
'signature': ('.six', 3),
217
def __init__(self, pack_collection, upload_suffix='', file_mode=None):
218
"""Create a NewPack instance.
220
:param pack_collection: A PackCollection into which this is being inserted.
221
:param upload_suffix: An optional suffix to be given to any temporary
222
files created during the pack creation. e.g '.autopack'
223
:param file_mode: Unix permissions for newly created file.
225
# The relative locations of the packs are constrained, but all are
226
# passed in because the caller has them, so as to avoid object churn.
227
index_builder_class = pack_collection._index_builder_class
229
# Revisions: parents list, no text compression.
230
index_builder_class(reference_lists=1),
231
# Inventory: We want to map compression only, but currently the
232
# knit code hasn't been updated enough to understand that, so we
233
# have a regular 2-list index giving parents and compression
235
index_builder_class(reference_lists=2),
236
# Texts: compression and per file graph, for all fileids - so two
237
# reference lists and two elements in the key tuple.
238
index_builder_class(reference_lists=2, key_elements=2),
239
# Signatures: Just blobs to store, no compression, no parents
241
index_builder_class(reference_lists=0),
243
self._pack_collection = pack_collection
244
# When we make readonly indices, we need this.
245
self.index_class = pack_collection._index_class
246
# where should the new pack be opened
247
self.upload_transport = pack_collection._upload_transport
248
# where are indices written out to
249
self.index_transport = pack_collection._index_transport
250
# where is the pack renamed to when it is finished?
251
self.pack_transport = pack_collection._pack_transport
252
# What file mode to upload the pack and indices with.
253
self._file_mode = file_mode
254
# tracks the content written to the .pack file.
255
self._hash = osutils.md5()
256
# a four-tuple with the length in bytes of the indices, once the pack
257
# is finalised. (rev, inv, text, sigs)
258
self.index_sizes = None
259
# How much data to cache when writing packs. Note that this is not
260
# synchronised with reads, because it's not in the transport layer, so
261
# is not safe unless the client knows it won't be reading from the pack
263
self._cache_limit = 0
264
# the temporary pack file name.
265
self.random_name = osutils.rand_chars(20) + upload_suffix
266
# when was this pack started ?
267
self.start_time = time.time()
268
# open an output stream for the data added to the pack.
269
self.write_stream = self.upload_transport.open_write_stream(
270
self.random_name, mode=self._file_mode)
271
if 'pack' in debug.debug_flags:
272
mutter('%s: create_pack: pack stream open: %s%s t+%6.3fs',
273
time.ctime(), self.upload_transport.base, self.random_name,
274
time.time() - self.start_time)
275
# A list of byte sequences to be written to the new pack, and the
276
# aggregate size of them. Stored as a list rather than separate
277
# variables so that the _write_data closure below can update them.
278
self._buffer = [[], 0]
279
# create a callable for adding data
281
# robertc says- this is a closure rather than a method on the object
282
# so that the variables are locals, and faster than accessing object
284
def _write_data(bytes, flush=False, _buffer=self._buffer,
285
_write=self.write_stream.write, _update=self._hash.update):
286
_buffer[0].append(bytes)
287
_buffer[1] += len(bytes)
289
if _buffer[1] > self._cache_limit or flush:
290
bytes = ''.join(_buffer[0])
294
# expose this on self, for the occasion when clients want to add data.
295
self._write_data = _write_data
296
# a pack writer object to serialise pack records.
297
self._writer = pack.ContainerWriter(self._write_data)
299
# what state is the pack in? (open, finished, aborted)
303
"""Cancel creating this pack."""
304
self._state = 'aborted'
305
self.write_stream.close()
306
# Remove the temporary pack file.
307
self.upload_transport.delete(self.random_name)
308
# The indices have no state on disk.
310
def access_tuple(self):
311
"""Return a tuple (transport, name) for the pack content."""
312
if self._state == 'finished':
313
return Pack.access_tuple(self)
314
elif self._state == 'open':
315
return self.upload_transport, self.random_name
317
raise AssertionError(self._state)
319
def _check_references(self):
320
"""Make sure our external references are present.
322
Packs are allowed to have deltas whose base is not in the pack, but it
323
must be present somewhere in this collection. It is not allowed to
324
have deltas based on a fallback repository.
325
(See <https://bugs.launchpad.net/bzr/+bug/288751>)
328
for (index_name, external_refs, index) in [
330
self.text_index._external_references(),
331
self._pack_collection.text_index.combined_index),
333
self.inventory_index._external_references(),
334
self._pack_collection.inventory_index.combined_index),
336
missing = external_refs.difference(
337
k for (idx, k, v, r) in
338
index.iter_entries(external_refs))
340
missing_items[index_name] = sorted(list(missing))
342
from pprint import pformat
343
raise errors.BzrCheckError(
344
"Newly created pack file %r has delta references to "
345
"items not in its repository:\n%s"
346
% (self, pformat(missing_items)))
348
def data_inserted(self):
349
"""True if data has been added to this pack."""
350
return bool(self.get_revision_count() or
351
self.inventory_index.key_count() or
352
self.text_index.key_count() or
353
self.signature_index.key_count())
356
"""Finish the new pack.
359
- finalises the content
360
- assigns a name (the md5 of the content, currently)
361
- writes out the associated indices
362
- renames the pack into place.
363
- stores the index size tuple for the pack in the index_sizes
368
self._write_data('', flush=True)
369
self.name = self._hash.hexdigest()
370
self._check_references()
372
# XXX: It'd be better to write them all to temporary names, then
373
# rename them all into place, so that the window when only some are
374
# visible is smaller. On the other hand none will be seen until
375
# they're in the names list.
376
self.index_sizes = [None, None, None, None]
377
self._write_index('revision', self.revision_index, 'revision')
378
self._write_index('inventory', self.inventory_index, 'inventory')
379
self._write_index('text', self.text_index, 'file texts')
380
self._write_index('signature', self.signature_index,
381
'revision signatures')
382
self.write_stream.close()
383
# Note that this will clobber an existing pack with the same name,
384
# without checking for hash collisions. While this is undesirable this
385
# is something that can be rectified in a subsequent release. One way
386
# to rectify it may be to leave the pack at the original name, writing
387
# its pack-names entry as something like 'HASH: index-sizes
388
# temporary-name'. Allocate that and check for collisions, if it is
389
# collision free then rename it into place. If clients know this scheme
390
# they can handle missing-file errors by:
391
# - try for HASH.pack
392
# - try for temporary-name
393
# - refresh the pack-list to see if the pack is now absent
394
self.upload_transport.rename(self.random_name,
395
'../packs/' + self.name + '.pack')
396
self._state = 'finished'
397
if 'pack' in debug.debug_flags:
398
# XXX: size might be interesting?
399
mutter('%s: create_pack: pack renamed into place: %s%s->%s%s t+%6.3fs',
400
time.ctime(), self.upload_transport.base, self.random_name,
401
self.pack_transport, self.name,
402
time.time() - self.start_time)
405
"""Flush any current data."""
407
bytes = ''.join(self._buffer[0])
408
self.write_stream.write(bytes)
409
self._hash.update(bytes)
410
self._buffer[:] = [[], 0]
412
def index_name(self, index_type, name):
413
"""Get the disk name of an index type for pack name 'name'."""
414
return name + NewPack.index_definitions[index_type][0]
416
def index_offset(self, index_type):
417
"""Get the position in a index_size array for a given index type."""
418
return NewPack.index_definitions[index_type][1]
420
def _replace_index_with_readonly(self, index_type):
421
setattr(self, index_type + '_index',
422
self.index_class(self.index_transport,
423
self.index_name(index_type, self.name),
424
self.index_sizes[self.index_offset(index_type)]))
426
def set_write_cache_size(self, size):
427
self._cache_limit = size
429
def _write_index(self, index_type, index, label):
430
"""Write out an index.
432
:param index_type: The type of index to write - e.g. 'revision'.
433
:param index: The index object to serialise.
434
:param label: What label to give the index e.g. 'revision'.
436
index_name = self.index_name(index_type, self.name)
437
self.index_sizes[self.index_offset(index_type)] = \
438
self.index_transport.put_file(index_name, index.finish(),
439
mode=self._file_mode)
440
if 'pack' in debug.debug_flags:
441
# XXX: size might be interesting?
442
mutter('%s: create_pack: wrote %s index: %s%s t+%6.3fs',
443
time.ctime(), label, self.upload_transport.base,
444
self.random_name, time.time() - self.start_time)
445
# Replace the writable index on this object with a readonly,
446
# presently unloaded index. We should alter
447
# the index layer to make its finish() error if add_node is
448
# subsequently used. RBC
449
self._replace_index_with_readonly(index_type)
452
class AggregateIndex(object):
453
"""An aggregated index for the RepositoryPackCollection.
455
AggregateIndex is reponsible for managing the PackAccess object,
456
Index-To-Pack mapping, and all indices list for a specific type of index
457
such as 'revision index'.
459
A CombinedIndex provides an index on a single key space built up
460
from several on-disk indices. The AggregateIndex builds on this
461
to provide a knit access layer, and allows having up to one writable
462
index within the collection.
464
# XXX: Probably 'can be written to' could/should be separated from 'acts
465
# like a knit index' -- mbp 20071024
467
def __init__(self, reload_func=None):
468
"""Create an AggregateIndex.
470
:param reload_func: A function to call if we find we are missing an
471
index. Should have the form reload_func() => True if the list of
472
active pack files has changed.
474
self._reload_func = reload_func
475
self.index_to_pack = {}
476
self.combined_index = CombinedGraphIndex([], reload_func=reload_func)
477
self.data_access = _DirectPackAccess(self.index_to_pack,
478
reload_func=reload_func)
479
self.add_callback = None
481
def replace_indices(self, index_to_pack, indices):
482
"""Replace the current mappings with fresh ones.
484
This should probably not be used eventually, rather incremental add and
485
removal of indices. It has been added during refactoring of existing
488
:param index_to_pack: A mapping from index objects to
489
(transport, name) tuples for the pack file data.
490
:param indices: A list of indices.
492
# refresh the revision pack map dict without replacing the instance.
493
self.index_to_pack.clear()
494
self.index_to_pack.update(index_to_pack)
495
# XXX: API break - clearly a 'replace' method would be good?
496
self.combined_index._indices[:] = indices
497
# the current add nodes callback for the current writable index if
499
self.add_callback = None
501
def add_index(self, index, pack):
502
"""Add index to the aggregate, which is an index for Pack pack.
504
Future searches on the aggregate index will seach this new index
505
before all previously inserted indices.
507
:param index: An Index for the pack.
508
:param pack: A Pack instance.
510
# expose it to the index map
511
self.index_to_pack[index] = pack.access_tuple()
512
# put it at the front of the linear index list
513
self.combined_index.insert_index(0, index)
515
def add_writable_index(self, index, pack):
516
"""Add an index which is able to have data added to it.
518
There can be at most one writable index at any time. Any
519
modifications made to the knit are put into this index.
521
:param index: An index from the pack parameter.
522
:param pack: A Pack instance.
524
if self.add_callback is not None:
525
raise AssertionError(
526
"%s already has a writable index through %s" % \
527
(self, self.add_callback))
528
# allow writing: queue writes to a new index
529
self.add_index(index, pack)
530
# Updates the index to packs mapping as a side effect,
531
self.data_access.set_writer(pack._writer, index, pack.access_tuple())
532
self.add_callback = index.add_nodes
535
"""Reset all the aggregate data to nothing."""
536
self.data_access.set_writer(None, None, (None, None))
537
self.index_to_pack.clear()
538
del self.combined_index._indices[:]
539
self.add_callback = None
541
def remove_index(self, index, pack):
542
"""Remove index from the indices used to answer queries.
544
:param index: An index from the pack parameter.
545
:param pack: A Pack instance.
547
del self.index_to_pack[index]
548
self.combined_index._indices.remove(index)
549
if (self.add_callback is not None and
550
getattr(index, 'add_nodes', None) == self.add_callback):
551
self.add_callback = None
552
self.data_access.set_writer(None, None, (None, None))
555
class Packer(object):
556
"""Create a pack from packs."""
558
def __init__(self, pack_collection, packs, suffix, revision_ids=None):
561
:param pack_collection: A RepositoryPackCollection object where the
562
new pack is being written to.
563
:param packs: The packs to combine.
564
:param suffix: The suffix to use on the temporary files for the pack.
565
:param revision_ids: Revision ids to limit the pack to.
569
self.revision_ids = revision_ids
570
# The pack object we are creating.
572
self._pack_collection = pack_collection
573
# The index layer keys for the revisions being copied. None for 'all
575
self._revision_keys = None
576
# What text keys to copy. None for 'all texts'. This is set by
577
# _copy_inventory_texts
578
self._text_filter = None
581
def _extra_init(self):
582
"""A template hook to allow extending the constructor trivially."""
584
def pack(self, pb=None):
585
"""Create a new pack by reading data from other packs.
587
This does little more than a bulk copy of data. One key difference
588
is that data with the same item key across multiple packs is elided
589
from the output. The new pack is written into the current pack store
590
along with its indices, and the name added to the pack names. The
591
source packs are not altered and are not required to be in the current
594
:param pb: An optional progress bar to use. A nested bar is created if
596
:return: A Pack object, or None if nothing was copied.
598
# open a pack - using the same name as the last temporary file
599
# - which has already been flushed, so its safe.
600
# XXX: - duplicate code warning with start_write_group; fix before
601
# considering 'done'.
602
if self._pack_collection._new_pack is not None:
603
raise errors.BzrError('call to create_pack_from_packs while '
604
'another pack is being written.')
605
if self.revision_ids is not None:
606
if len(self.revision_ids) == 0:
607
# silly fetch request.
610
self.revision_ids = frozenset(self.revision_ids)
611
self.revision_keys = frozenset((revid,) for revid in
614
self.pb = ui.ui_factory.nested_progress_bar()
618
return self._create_pack_from_packs()
624
"""Open a pack for the pack we are creating."""
625
return NewPack(self._pack_collection, upload_suffix=self.suffix,
626
file_mode=self._pack_collection.repo.bzrdir._get_file_mode())
628
def _copy_revision_texts(self):
629
"""Copy revision data to the new pack."""
631
if self.revision_ids:
632
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
635
# select revision keys
636
revision_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
637
self.packs, 'revision_index')[0]
638
revision_nodes = self._pack_collection._index_contents(revision_index_map, revision_keys)
639
# copy revision keys and adjust values
640
self.pb.update("Copying revision texts", 1)
641
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
642
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
643
self.new_pack.revision_index, readv_group_iter, total_items))
644
if 'pack' in debug.debug_flags:
645
mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
646
time.ctime(), self._pack_collection._upload_transport.base,
647
self.new_pack.random_name,
648
self.new_pack.revision_index.key_count(),
649
time.time() - self.new_pack.start_time)
650
self._revision_keys = revision_keys
652
def _copy_inventory_texts(self):
653
"""Copy the inventory texts to the new pack.
655
self._revision_keys is used to determine what inventories to copy.
657
Sets self._text_filter appropriately.
659
# select inventory keys
660
inv_keys = self._revision_keys # currently the same keyspace, and note that
661
# querying for keys here could introduce a bug where an inventory item
662
# is missed, so do not change it to query separately without cross
663
# checking like the text key check below.
664
inventory_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
665
self.packs, 'inventory_index')[0]
666
inv_nodes = self._pack_collection._index_contents(inventory_index_map, inv_keys)
667
# copy inventory keys and adjust values
668
# XXX: Should be a helper function to allow different inv representation
670
self.pb.update("Copying inventory texts", 2)
671
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
672
# Only grab the output lines if we will be processing them
673
output_lines = bool(self.revision_ids)
674
inv_lines = self._copy_nodes_graph(inventory_index_map,
675
self.new_pack._writer, self.new_pack.inventory_index,
676
readv_group_iter, total_items, output_lines=output_lines)
677
if self.revision_ids:
678
self._process_inventory_lines(inv_lines)
680
# eat the iterator to cause it to execute.
682
self._text_filter = None
683
if 'pack' in debug.debug_flags:
684
mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
685
time.ctime(), self._pack_collection._upload_transport.base,
686
self.new_pack.random_name,
687
self.new_pack.inventory_index.key_count(),
688
time.time() - self.new_pack.start_time)
690
def _copy_text_texts(self):
692
text_index_map, text_nodes = self._get_text_nodes()
693
if self._text_filter is not None:
694
# We could return the keys copied as part of the return value from
695
# _copy_nodes_graph but this doesn't work all that well with the
696
# need to get line output too, so we check separately, and as we're
697
# going to buffer everything anyway, we check beforehand, which
698
# saves reading knit data over the wire when we know there are
700
text_nodes = set(text_nodes)
701
present_text_keys = set(_node[1] for _node in text_nodes)
702
missing_text_keys = set(self._text_filter) - present_text_keys
703
if missing_text_keys:
704
# TODO: raise a specific error that can handle many missing
706
a_missing_key = missing_text_keys.pop()
707
raise errors.RevisionNotPresent(a_missing_key[1],
709
# copy text keys and adjust values
710
self.pb.update("Copying content texts", 3)
711
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
712
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
713
self.new_pack.text_index, readv_group_iter, total_items))
714
self._log_copied_texts()
716
def _create_pack_from_packs(self):
717
self.pb.update("Opening pack", 0, 5)
718
self.new_pack = self.open_pack()
719
new_pack = self.new_pack
720
# buffer data - we won't be reading-back during the pack creation and
721
# this makes a significant difference on sftp pushes.
722
new_pack.set_write_cache_size(1024*1024)
723
if 'pack' in debug.debug_flags:
724
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
725
for a_pack in self.packs]
726
if self.revision_ids is not None:
727
rev_count = len(self.revision_ids)
730
mutter('%s: create_pack: creating pack from source packs: '
731
'%s%s %s revisions wanted %s t=0',
732
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
733
plain_pack_list, rev_count)
734
self._copy_revision_texts()
735
self._copy_inventory_texts()
736
self._copy_text_texts()
737
# select signature keys
738
signature_filter = self._revision_keys # same keyspace
739
signature_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
740
self.packs, 'signature_index')[0]
741
signature_nodes = self._pack_collection._index_contents(signature_index_map,
743
# copy signature keys and adjust values
744
self.pb.update("Copying signature texts", 4)
745
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
746
new_pack.signature_index)
747
if 'pack' in debug.debug_flags:
748
mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
749
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
750
new_pack.signature_index.key_count(),
751
time.time() - new_pack.start_time)
752
new_pack._check_references()
753
if not self._use_pack(new_pack):
756
self.pb.update("Finishing pack", 5)
758
self._pack_collection.allocate(new_pack)
761
def _copy_nodes(self, nodes, index_map, writer, write_index):
762
"""Copy knit nodes between packs with no graph references."""
763
pb = ui.ui_factory.nested_progress_bar()
765
return self._do_copy_nodes(nodes, index_map, writer,
770
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):
771
# for record verification
772
knit = KnitVersionedFiles(None, None)
773
# plan a readv on each source pack:
775
nodes = sorted(nodes)
776
# how to map this into knit.py - or knit.py into this?
777
# we don't want the typical knit logic, we want grouping by pack
778
# at this point - perhaps a helper library for the following code
779
# duplication points?
781
for index, key, value in nodes:
782
if index not in request_groups:
783
request_groups[index] = []
784
request_groups[index].append((key, value))
786
pb.update("Copied record", record_index, len(nodes))
787
for index, items in request_groups.iteritems():
788
pack_readv_requests = []
789
for key, value in items:
790
# ---- KnitGraphIndex.get_position
791
bits = value[1:].split(' ')
792
offset, length = int(bits[0]), int(bits[1])
793
pack_readv_requests.append((offset, length, (key, value[0])))
794
# linear scan up the pack
795
pack_readv_requests.sort()
797
transport, path = index_map[index]
798
reader = pack.make_readv_reader(transport, path,
799
[offset[0:2] for offset in pack_readv_requests])
800
for (names, read_func), (_1, _2, (key, eol_flag)) in \
801
izip(reader.iter_records(), pack_readv_requests):
802
raw_data = read_func(None)
803
# check the header only
804
df, _ = knit._parse_record_header(key, raw_data)
806
pos, size = writer.add_bytes_record(raw_data, names)
807
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
808
pb.update("Copied record", record_index)
811
def _copy_nodes_graph(self, index_map, writer, write_index,
812
readv_group_iter, total_items, output_lines=False):
813
"""Copy knit nodes between packs.
815
:param output_lines: Return lines present in the copied data as
816
an iterator of line,version_id.
818
pb = ui.ui_factory.nested_progress_bar()
820
for result in self._do_copy_nodes_graph(index_map, writer,
821
write_index, output_lines, pb, readv_group_iter, total_items):
824
# Python 2.4 does not permit try:finally: in a generator.
830
def _do_copy_nodes_graph(self, index_map, writer, write_index,
831
output_lines, pb, readv_group_iter, total_items):
832
# for record verification
833
knit = KnitVersionedFiles(None, None)
834
# for line extraction when requested (inventories only)
836
factory = KnitPlainFactory()
838
pb.update("Copied record", record_index, total_items)
839
for index, readv_vector, node_vector in readv_group_iter:
841
transport, path = index_map[index]
842
reader = pack.make_readv_reader(transport, path, readv_vector)
843
for (names, read_func), (key, eol_flag, references) in \
844
izip(reader.iter_records(), node_vector):
845
raw_data = read_func(None)
847
# read the entire thing
848
content, _ = knit._parse_record(key[-1], raw_data)
849
if len(references[-1]) == 0:
850
line_iterator = factory.get_fulltext_content(content)
852
line_iterator = factory.get_linedelta_content(content)
853
for line in line_iterator:
856
# check the header only
857
df, _ = knit._parse_record_header(key, raw_data)
859
pos, size = writer.add_bytes_record(raw_data, names)
860
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
861
pb.update("Copied record", record_index)
864
def _get_text_nodes(self):
865
text_index_map = self._pack_collection._packs_list_to_pack_map_and_index_list(
866
self.packs, 'text_index')[0]
867
return text_index_map, self._pack_collection._index_contents(text_index_map,
870
def _least_readv_node_readv(self, nodes):
871
"""Generate request groups for nodes using the least readv's.
873
:param nodes: An iterable of graph index nodes.
874
:return: Total node count and an iterator of the data needed to perform
875
readvs to obtain the data for nodes. Each item yielded by the
876
iterator is a tuple with:
877
index, readv_vector, node_vector. readv_vector is a list ready to
878
hand to the transport readv method, and node_vector is a list of
879
(key, eol_flag, references) for the the node retrieved by the
880
matching readv_vector.
882
# group by pack so we do one readv per pack
883
nodes = sorted(nodes)
886
for index, key, value, references in nodes:
887
if index not in request_groups:
888
request_groups[index] = []
889
request_groups[index].append((key, value, references))
891
for index, items in request_groups.iteritems():
892
pack_readv_requests = []
893
for key, value, references in items:
894
# ---- KnitGraphIndex.get_position
895
bits = value[1:].split(' ')
896
offset, length = int(bits[0]), int(bits[1])
897
pack_readv_requests.append(
898
((offset, length), (key, value[0], references)))
899
# linear scan up the pack to maximum range combining.
900
pack_readv_requests.sort()
901
# split out the readv and the node data.
902
pack_readv = [readv for readv, node in pack_readv_requests]
903
node_vector = [node for readv, node in pack_readv_requests]
904
result.append((index, pack_readv, node_vector))
907
def _log_copied_texts(self):
908
if 'pack' in debug.debug_flags:
909
mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',
910
time.ctime(), self._pack_collection._upload_transport.base,
911
self.new_pack.random_name,
912
self.new_pack.text_index.key_count(),
913
time.time() - self.new_pack.start_time)
915
def _process_inventory_lines(self, inv_lines):
916
"""Use up the inv_lines generator and setup a text key filter."""
917
repo = self._pack_collection.repo
918
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
919
inv_lines, self.revision_keys)
921
for fileid, file_revids in fileid_revisions.iteritems():
922
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
923
self._text_filter = text_filter
925
def _revision_node_readv(self, revision_nodes):
926
"""Return the total revisions and the readv's to issue.
928
:param revision_nodes: The revision index contents for the packs being
929
incorporated into the new pack.
930
:return: As per _least_readv_node_readv.
932
return self._least_readv_node_readv(revision_nodes)
934
def _use_pack(self, new_pack):
935
"""Return True if new_pack should be used.
937
:param new_pack: The pack that has just been created.
938
:return: True if the pack should be used.
940
return new_pack.data_inserted()
943
class OptimisingPacker(Packer):
944
"""A packer which spends more time to create better disk layouts."""
946
def _revision_node_readv(self, revision_nodes):
947
"""Return the total revisions and the readv's to issue.
949
This sort places revisions in topological order with the ancestors
952
:param revision_nodes: The revision index contents for the packs being
953
incorporated into the new pack.
954
:return: As per _least_readv_node_readv.
956
# build an ancestors dict
959
for index, key, value, references in revision_nodes:
960
ancestors[key] = references[0]
961
by_key[key] = (index, value, references)
962
order = tsort.topo_sort(ancestors)
964
# Single IO is pathological, but it will work as a starting point.
966
for key in reversed(order):
967
index, value, references = by_key[key]
968
# ---- KnitGraphIndex.get_position
969
bits = value[1:].split(' ')
970
offset, length = int(bits[0]), int(bits[1])
972
(index, [(offset, length)], [(key, value[0], references)]))
973
# TODO: combine requests in the same index that are in ascending order.
974
return total, requests
977
"""Open a pack for the pack we are creating."""
978
new_pack = super(OptimisingPacker, self).open_pack()
979
# Turn on the optimization flags for all the index builders.
980
new_pack.revision_index.set_optimize(for_size=True)
981
new_pack.inventory_index.set_optimize(for_size=True)
982
new_pack.text_index.set_optimize(for_size=True)
983
new_pack.signature_index.set_optimize(for_size=True)
987
class ReconcilePacker(Packer):
988
"""A packer which regenerates indices etc as it copies.
990
This is used by ``bzr reconcile`` to cause parent text pointers to be
994
def _extra_init(self):
995
self._data_changed = False
997
def _process_inventory_lines(self, inv_lines):
998
"""Generate a text key reference map rather for reconciling with."""
999
repo = self._pack_collection.repo
1000
refs = repo._find_text_key_references_from_xml_inventory_lines(
1002
self._text_refs = refs
1003
# during reconcile we:
1004
# - convert unreferenced texts to full texts
1005
# - correct texts which reference a text not copied to be full texts
1006
# - copy all others as-is but with corrected parents.
1007
# - so at this point we don't know enough to decide what becomes a full
1009
self._text_filter = None
1011
def _copy_text_texts(self):
1012
"""generate what texts we should have and then copy."""
1013
self.pb.update("Copying content texts", 3)
1014
# we have three major tasks here:
1015
# 1) generate the ideal index
1016
repo = self._pack_collection.repo
1017
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
1018
_1, key, _2, refs in
1019
self.new_pack.revision_index.iter_all_entries()])
1020
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
1021
# 2) generate a text_nodes list that contains all the deltas that can
1022
# be used as-is, with corrected parents.
1025
discarded_nodes = []
1026
NULL_REVISION = _mod_revision.NULL_REVISION
1027
text_index_map, text_nodes = self._get_text_nodes()
1028
for node in text_nodes:
1034
ideal_parents = tuple(ideal_index[node[1]])
1036
discarded_nodes.append(node)
1037
self._data_changed = True
1039
if ideal_parents == (NULL_REVISION,):
1041
if ideal_parents == node[3][0]:
1043
ok_nodes.append(node)
1044
elif ideal_parents[0:1] == node[3][0][0:1]:
1045
# the left most parent is the same, or there are no parents
1046
# today. Either way, we can preserve the representation as
1047
# long as we change the refs to be inserted.
1048
self._data_changed = True
1049
ok_nodes.append((node[0], node[1], node[2],
1050
(ideal_parents, node[3][1])))
1051
self._data_changed = True
1053
# Reinsert this text completely
1054
bad_texts.append((node[1], ideal_parents))
1055
self._data_changed = True
1056
# we're finished with some data.
1059
# 3) bulk copy the ok data
1060
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1061
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1062
self.new_pack.text_index, readv_group_iter, total_items))
1063
# 4) adhoc copy all the other texts.
1064
# We have to topologically insert all texts otherwise we can fail to
1065
# reconcile when parts of a single delta chain are preserved intact,
1066
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1067
# reinserted, and if d3 has incorrect parents it will also be
1068
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1069
# copied), so we will try to delta, but d2 is not currently able to be
1070
# extracted because it's basis d1 is not present. Topologically sorting
1071
# addresses this. The following generates a sort for all the texts that
1072
# are being inserted without having to reference the entire text key
1073
# space (we only topo sort the revisions, which is smaller).
1074
topo_order = tsort.topo_sort(ancestors)
1075
rev_order = dict(zip(topo_order, range(len(topo_order))))
1076
bad_texts.sort(key=lambda key:rev_order[key[0][1]])
1077
transaction = repo.get_transaction()
1078
file_id_index = GraphIndexPrefixAdapter(
1079
self.new_pack.text_index,
1081
add_nodes_callback=self.new_pack.text_index.add_nodes)
1082
data_access = _DirectPackAccess(
1083
{self.new_pack.text_index:self.new_pack.access_tuple()})
1084
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1085
self.new_pack.access_tuple())
1086
output_texts = KnitVersionedFiles(
1087
_KnitGraphIndex(self.new_pack.text_index,
1088
add_callback=self.new_pack.text_index.add_nodes,
1089
deltas=True, parents=True, is_locked=repo.is_locked),
1090
data_access=data_access, max_delta_chain=200)
1091
for key, parent_keys in bad_texts:
1092
# We refer to the new pack to delta data being output.
1093
# A possible improvement would be to catch errors on short reads
1094
# and only flush then.
1095
self.new_pack.flush()
1097
for parent_key in parent_keys:
1098
if parent_key[0] != key[0]:
1099
# Graph parents must match the fileid
1100
raise errors.BzrError('Mismatched key parent %r:%r' %
1102
parents.append(parent_key[1])
1103
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1104
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1105
output_texts.add_lines(key, parent_keys, text_lines,
1106
random_id=True, check_content=False)
1107
# 5) check that nothing inserted has a reference outside the keyspace.
1108
missing_text_keys = self.new_pack.text_index._external_references()
1109
if missing_text_keys:
1110
raise errors.BzrCheckError('Reference to missing compression parents %r'
1111
% (missing_text_keys,))
1112
self._log_copied_texts()
1114
def _use_pack(self, new_pack):
1115
"""Override _use_pack to check for reconcile having changed content."""
1116
# XXX: we might be better checking this at the copy time.
1117
original_inventory_keys = set()
1118
inv_index = self._pack_collection.inventory_index.combined_index
1119
for entry in inv_index.iter_all_entries():
1120
original_inventory_keys.add(entry[1])
1121
new_inventory_keys = set()
1122
for entry in new_pack.inventory_index.iter_all_entries():
1123
new_inventory_keys.add(entry[1])
1124
if new_inventory_keys != original_inventory_keys:
1125
self._data_changed = True
1126
return new_pack.data_inserted() and self._data_changed
1129
class RepositoryPackCollection(object):
1130
"""Management of packs within a repository.
1132
:ivar _names: map of {pack_name: (index_size,)}
1135
def __init__(self, repo, transport, index_transport, upload_transport,
1136
pack_transport, index_builder_class, index_class):
1137
"""Create a new RepositoryPackCollection.
1139
:param transport: Addresses the repository base directory
1140
(typically .bzr/repository/).
1141
:param index_transport: Addresses the directory containing indices.
1142
:param upload_transport: Addresses the directory into which packs are written
1143
while they're being created.
1144
:param pack_transport: Addresses the directory of existing complete packs.
1145
:param index_builder_class: The index builder class to use.
1146
:param index_class: The index class to use.
1149
self.transport = transport
1150
self._index_transport = index_transport
1151
self._upload_transport = upload_transport
1152
self._pack_transport = pack_transport
1153
self._index_builder_class = index_builder_class
1154
self._index_class = index_class
1155
self._suffix_offsets = {'.rix': 0, '.iix': 1, '.tix': 2, '.six': 3}
1158
self._packs_by_name = {}
1159
# the previous pack-names content
1160
self._packs_at_load = None
1161
# when a pack is being created by this object, the state of that pack.
1162
self._new_pack = None
1163
# aggregated revision index data
1164
self.revision_index = AggregateIndex(self.reload_pack_names)
1165
self.inventory_index = AggregateIndex(self.reload_pack_names)
1166
self.text_index = AggregateIndex(self.reload_pack_names)
1167
self.signature_index = AggregateIndex(self.reload_pack_names)
1169
def add_pack_to_memory(self, pack):
1170
"""Make a Pack object available to the repository to satisfy queries.
1172
:param pack: A Pack object.
1174
if pack.name in self._packs_by_name:
1175
raise AssertionError()
1176
self.packs.append(pack)
1177
self._packs_by_name[pack.name] = pack
1178
self.revision_index.add_index(pack.revision_index, pack)
1179
self.inventory_index.add_index(pack.inventory_index, pack)
1180
self.text_index.add_index(pack.text_index, pack)
1181
self.signature_index.add_index(pack.signature_index, pack)
1183
def all_packs(self):
1184
"""Return a list of all the Pack objects this repository has.
1186
Note that an in-progress pack being created is not returned.
1188
:return: A list of Pack objects for all the packs in the repository.
1191
for name in self.names():
1192
result.append(self.get_pack_by_name(name))
1196
"""Pack the pack collection incrementally.
1198
This will not attempt global reorganisation or recompression,
1199
rather it will just ensure that the total number of packs does
1200
not grow without bound. It uses the _max_pack_count method to
1201
determine if autopacking is needed, and the pack_distribution
1202
method to determine the number of revisions in each pack.
1204
If autopacking takes place then the packs name collection will have
1205
been flushed to disk - packing requires updating the name collection
1206
in synchronisation with certain steps. Otherwise the names collection
1209
:return: True if packing took place.
1211
# XXX: Should not be needed when the management of indices is sane.
1212
total_revisions = self.revision_index.combined_index.key_count()
1213
total_packs = len(self._names)
1214
if self._max_pack_count(total_revisions) >= total_packs:
1216
# XXX: the following may want to be a class, to pack with a given
1218
mutter('Auto-packing repository %s, which has %d pack files, '
1219
'containing %d revisions into %d packs.', self, total_packs,
1220
total_revisions, self._max_pack_count(total_revisions))
1221
# determine which packs need changing
1222
pack_distribution = self.pack_distribution(total_revisions)
1224
for pack in self.all_packs():
1225
revision_count = pack.get_revision_count()
1226
if revision_count == 0:
1227
# revision less packs are not generated by normal operation,
1228
# only by operations like sign-my-commits, and thus will not
1229
# tend to grow rapdily or without bound like commit containing
1230
# packs do - leave them alone as packing them really should
1231
# group their data with the relevant commit, and that may
1232
# involve rewriting ancient history - which autopack tries to
1233
# avoid. Alternatively we could not group the data but treat
1234
# each of these as having a single revision, and thus add
1235
# one revision for each to the total revision count, to get
1236
# a matching distribution.
1238
existing_packs.append((revision_count, pack))
1239
pack_operations = self.plan_autopack_combinations(
1240
existing_packs, pack_distribution)
1241
self._execute_pack_operations(pack_operations)
1244
def _execute_pack_operations(self, pack_operations, _packer_class=Packer):
1245
"""Execute a series of pack operations.
1247
:param pack_operations: A list of [revision_count, packs_to_combine].
1248
:param _packer_class: The class of packer to use (default: Packer).
1251
for revision_count, packs in pack_operations:
1252
# we may have no-ops from the setup logic
1255
_packer_class(self, packs, '.autopack').pack()
1257
self._remove_pack_from_memory(pack)
1258
# record the newly available packs and stop advertising the old
1260
self._save_pack_names(clear_obsolete_packs=True)
1261
# Move the old packs out of the way now they are no longer referenced.
1262
for revision_count, packs in pack_operations:
1263
self._obsolete_packs(packs)
1265
def lock_names(self):
1266
"""Acquire the mutex around the pack-names index.
1268
This cannot be used in the middle of a read-only transaction on the
1271
self.repo.control_files.lock_write()
1274
"""Pack the pack collection totally."""
1275
self.ensure_loaded()
1276
total_packs = len(self._names)
1278
# This is arguably wrong because we might not be optimal, but for
1279
# now lets leave it in. (e.g. reconcile -> one pack. But not
1282
total_revisions = self.revision_index.combined_index.key_count()
1283
# XXX: the following may want to be a class, to pack with a given
1285
mutter('Packing repository %s, which has %d pack files, '
1286
'containing %d revisions into 1 packs.', self, total_packs,
1288
# determine which packs need changing
1289
pack_distribution = [1]
1290
pack_operations = [[0, []]]
1291
for pack in self.all_packs():
1292
pack_operations[-1][0] += pack.get_revision_count()
1293
pack_operations[-1][1].append(pack)
1294
self._execute_pack_operations(pack_operations, OptimisingPacker)
1296
def plan_autopack_combinations(self, existing_packs, pack_distribution):
1297
"""Plan a pack operation.
1299
:param existing_packs: The packs to pack. (A list of (revcount, Pack)
1301
:param pack_distribution: A list with the number of revisions desired
1304
if len(existing_packs) <= len(pack_distribution):
1306
existing_packs.sort(reverse=True)
1307
pack_operations = [[0, []]]
1308
# plan out what packs to keep, and what to reorganise
1309
while len(existing_packs):
1310
# take the largest pack, and if its less than the head of the
1311
# distribution chart we will include its contents in the new pack
1312
# for that position. If its larger, we remove its size from the
1313
# distribution chart
1314
next_pack_rev_count, next_pack = existing_packs.pop(0)
1315
if next_pack_rev_count >= pack_distribution[0]:
1316
# this is already packed 'better' than this, so we can
1317
# not waste time packing it.
1318
while next_pack_rev_count > 0:
1319
next_pack_rev_count -= pack_distribution[0]
1320
if next_pack_rev_count >= 0:
1322
del pack_distribution[0]
1324
# didn't use that entire bucket up
1325
pack_distribution[0] = -next_pack_rev_count
1327
# add the revisions we're going to add to the next output pack
1328
pack_operations[-1][0] += next_pack_rev_count
1329
# allocate this pack to the next pack sub operation
1330
pack_operations[-1][1].append(next_pack)
1331
if pack_operations[-1][0] >= pack_distribution[0]:
1332
# this pack is used up, shift left.
1333
del pack_distribution[0]
1334
pack_operations.append([0, []])
1335
# Now that we know which pack files we want to move, shove them all
1336
# into a single pack file.
1338
final_pack_list = []
1339
for num_revs, pack_files in pack_operations:
1340
final_rev_count += num_revs
1341
final_pack_list.extend(pack_files)
1342
if len(final_pack_list) == 1:
1343
raise AssertionError('We somehow generated an autopack with a'
1344
' single pack file being moved.')
1346
return [[final_rev_count, final_pack_list]]
1348
def ensure_loaded(self):
1349
# NB: if you see an assertion error here, its probably access against
1350
# an unlocked repo. Naughty.
1351
if not self.repo.is_locked():
1352
raise errors.ObjectNotLocked(self.repo)
1353
if self._names is None:
1355
self._packs_at_load = set()
1356
for index, key, value in self._iter_disk_pack_index():
1358
self._names[name] = self._parse_index_sizes(value)
1359
self._packs_at_load.add((key, value))
1360
# populate all the metadata.
1363
def _parse_index_sizes(self, value):
1364
"""Parse a string of index sizes."""
1365
return tuple([int(digits) for digits in value.split(' ')])
1367
def get_pack_by_name(self, name):
1368
"""Get a Pack object by name.
1370
:param name: The name of the pack - e.g. '123456'
1371
:return: A Pack object.
1374
return self._packs_by_name[name]
1376
rev_index = self._make_index(name, '.rix')
1377
inv_index = self._make_index(name, '.iix')
1378
txt_index = self._make_index(name, '.tix')
1379
sig_index = self._make_index(name, '.six')
1380
result = ExistingPack(self._pack_transport, name, rev_index,
1381
inv_index, txt_index, sig_index)
1382
self.add_pack_to_memory(result)
1385
def allocate(self, a_new_pack):
1386
"""Allocate name in the list of packs.
1388
:param a_new_pack: A NewPack instance to be added to the collection of
1389
packs for this repository.
1391
self.ensure_loaded()
1392
if a_new_pack.name in self._names:
1393
raise errors.BzrError(
1394
'Pack %r already exists in %s' % (a_new_pack.name, self))
1395
self._names[a_new_pack.name] = tuple(a_new_pack.index_sizes)
1396
self.add_pack_to_memory(a_new_pack)
1398
def _iter_disk_pack_index(self):
1399
"""Iterate over the contents of the pack-names index.
1401
This is used when loading the list from disk, and before writing to
1402
detect updates from others during our write operation.
1403
:return: An iterator of the index contents.
1405
return self._index_class(self.transport, 'pack-names', None
1406
).iter_all_entries()
1408
def _make_index(self, name, suffix):
1409
size_offset = self._suffix_offsets[suffix]
1410
index_name = name + suffix
1411
index_size = self._names[name][size_offset]
1412
return self._index_class(
1413
self._index_transport, index_name, index_size)
1415
def _max_pack_count(self, total_revisions):
1416
"""Return the maximum number of packs to use for total revisions.
1418
:param total_revisions: The total number of revisions in the
1421
if not total_revisions:
1423
digits = str(total_revisions)
1425
for digit in digits:
1426
result += int(digit)
1430
"""Provide an order to the underlying names."""
1431
return sorted(self._names.keys())
1433
def _obsolete_packs(self, packs):
1434
"""Move a number of packs which have been obsoleted out of the way.
1436
Each pack and its associated indices are moved out of the way.
1438
Note: for correctness this function should only be called after a new
1439
pack names index has been written without these pack names, and with
1440
the names of packs that contain the data previously available via these
1443
:param packs: The packs to obsolete.
1444
:param return: None.
1447
pack.pack_transport.rename(pack.file_name(),
1448
'../obsolete_packs/' + pack.file_name())
1449
# TODO: Probably needs to know all possible indices for this pack
1450
# - or maybe list the directory and move all indices matching this
1451
# name whether we recognize it or not?
1452
for suffix in ('.iix', '.six', '.tix', '.rix'):
1453
self._index_transport.rename(pack.name + suffix,
1454
'../obsolete_packs/' + pack.name + suffix)
1456
def pack_distribution(self, total_revisions):
1457
"""Generate a list of the number of revisions to put in each pack.
1459
:param total_revisions: The total number of revisions in the
1462
if total_revisions == 0:
1464
digits = reversed(str(total_revisions))
1466
for exponent, count in enumerate(digits):
1467
size = 10 ** exponent
1468
for pos in range(int(count)):
1470
return list(reversed(result))
1472
def _pack_tuple(self, name):
1473
"""Return a tuple with the transport and file name for a pack name."""
1474
return self._pack_transport, name + '.pack'
1476
def _remove_pack_from_memory(self, pack):
1477
"""Remove pack from the packs accessed by this repository.
1479
Only affects memory state, until self._save_pack_names() is invoked.
1481
self._names.pop(pack.name)
1482
self._packs_by_name.pop(pack.name)
1483
self._remove_pack_indices(pack)
1484
self.packs.remove(pack)
1486
def _remove_pack_indices(self, pack):
1487
"""Remove the indices for pack from the aggregated indices."""
1488
self.revision_index.remove_index(pack.revision_index, pack)
1489
self.inventory_index.remove_index(pack.inventory_index, pack)
1490
self.text_index.remove_index(pack.text_index, pack)
1491
self.signature_index.remove_index(pack.signature_index, pack)
1494
"""Clear all cached data."""
1495
# cached revision data
1496
self.repo._revision_knit = None
1497
self.revision_index.clear()
1498
# cached signature data
1499
self.repo._signature_knit = None
1500
self.signature_index.clear()
1501
# cached file text data
1502
self.text_index.clear()
1503
self.repo._text_knit = None
1504
# cached inventory data
1505
self.inventory_index.clear()
1506
# remove the open pack
1507
self._new_pack = None
1508
# information about packs.
1511
self._packs_by_name = {}
1512
self._packs_at_load = None
1514
def _make_index_map(self, index_suffix):
1515
"""Return information on existing indices.
1517
:param suffix: Index suffix added to pack name.
1519
:returns: (pack_map, indices) where indices is a list of GraphIndex
1520
objects, and pack_map is a mapping from those objects to the
1521
pack tuple they describe.
1523
# TODO: stop using this; it creates new indices unnecessarily.
1524
self.ensure_loaded()
1525
suffix_map = {'.rix': 'revision_index',
1526
'.six': 'signature_index',
1527
'.iix': 'inventory_index',
1528
'.tix': 'text_index',
1530
return self._packs_list_to_pack_map_and_index_list(self.all_packs(),
1531
suffix_map[index_suffix])
1533
def _packs_list_to_pack_map_and_index_list(self, packs, index_attribute):
1534
"""Convert a list of packs to an index pack map and index list.
1536
:param packs: The packs list to process.
1537
:param index_attribute: The attribute that the desired index is found
1539
:return: A tuple (map, list) where map contains the dict from
1540
index:pack_tuple, and lsit contains the indices in the same order
1546
index = getattr(pack, index_attribute)
1547
indices.append(index)
1548
pack_map[index] = (pack.pack_transport, pack.file_name())
1549
return pack_map, indices
1551
def _index_contents(self, pack_map, key_filter=None):
1552
"""Get an iterable of the index contents from a pack_map.
1554
:param pack_map: A map from indices to pack details.
1555
:param key_filter: An optional filter to limit the
1558
indices = [index for index in pack_map.iterkeys()]
1559
all_index = CombinedGraphIndex(indices)
1560
if key_filter is None:
1561
return all_index.iter_all_entries()
1563
return all_index.iter_entries(key_filter)
1565
def _unlock_names(self):
1566
"""Release the mutex around the pack-names index."""
1567
self.repo.control_files.unlock()
1569
def _diff_pack_names(self):
1570
"""Read the pack names from disk, and compare it to the one in memory.
1572
:return: (disk_nodes, deleted_nodes, new_nodes)
1573
disk_nodes The final set of nodes that should be referenced
1574
deleted_nodes Nodes which have been removed from when we started
1575
new_nodes Nodes that are newly introduced
1577
# load the disk nodes across
1579
for index, key, value in self._iter_disk_pack_index():
1580
disk_nodes.add((key, value))
1582
# do a two-way diff against our original content
1583
current_nodes = set()
1584
for name, sizes in self._names.iteritems():
1586
((name, ), ' '.join(str(size) for size in sizes)))
1588
# Packs no longer present in the repository, which were present when we
1589
# locked the repository
1590
deleted_nodes = self._packs_at_load - current_nodes
1591
# Packs which this process is adding
1592
new_nodes = current_nodes - self._packs_at_load
1594
# Update the disk_nodes set to include the ones we are adding, and
1595
# remove the ones which were removed by someone else
1596
disk_nodes.difference_update(deleted_nodes)
1597
disk_nodes.update(new_nodes)
1599
return disk_nodes, deleted_nodes, new_nodes
1601
def _syncronize_pack_names_from_disk_nodes(self, disk_nodes):
1602
"""Given the correct set of pack files, update our saved info.
1604
:return: (removed, added, modified)
1605
removed pack names removed from self._names
1606
added pack names added to self._names
1607
modified pack names that had changed value
1612
## self._packs_at_load = disk_nodes
1613
new_names = dict(disk_nodes)
1614
# drop no longer present nodes
1615
for pack in self.all_packs():
1616
if (pack.name,) not in new_names:
1617
removed.append(pack.name)
1618
self._remove_pack_from_memory(pack)
1619
# add new nodes/refresh existing ones
1620
for key, value in disk_nodes:
1622
sizes = self._parse_index_sizes(value)
1623
if name in self._names:
1625
if sizes != self._names[name]:
1626
# the pack for name has had its indices replaced - rare but
1627
# important to handle. XXX: probably can never happen today
1628
# because the three-way merge code above does not handle it
1629
# - you may end up adding the same key twice to the new
1630
# disk index because the set values are the same, unless
1631
# the only index shows up as deleted by the set difference
1632
# - which it may. Until there is a specific test for this,
1633
# assume its broken. RBC 20071017.
1634
self._remove_pack_from_memory(self.get_pack_by_name(name))
1635
self._names[name] = sizes
1636
self.get_pack_by_name(name)
1637
modified.append(name)
1640
self._names[name] = sizes
1641
self.get_pack_by_name(name)
1643
return removed, added, modified
1645
def _save_pack_names(self, clear_obsolete_packs=False):
1646
"""Save the list of packs.
1648
This will take out the mutex around the pack names list for the
1649
duration of the method call. If concurrent updates have been made, a
1650
three-way merge between the current list and the current in memory list
1653
:param clear_obsolete_packs: If True, clear out the contents of the
1654
obsolete_packs directory.
1658
builder = self._index_builder_class()
1659
disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
1660
# TODO: handle same-name, index-size-changes here -
1661
# e.g. use the value from disk, not ours, *unless* we're the one
1663
for key, value in disk_nodes:
1664
builder.add_node(key, value)
1665
self.transport.put_file('pack-names', builder.finish(),
1666
mode=self.repo.bzrdir._get_file_mode())
1667
# move the baseline forward
1668
self._packs_at_load = disk_nodes
1669
if clear_obsolete_packs:
1670
self._clear_obsolete_packs()
1672
self._unlock_names()
1673
# synchronise the memory packs list with what we just wrote:
1674
self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1676
def reload_pack_names(self):
1677
"""Sync our pack listing with what is present in the repository.
1679
This should be called when we find out that something we thought was
1680
present is now missing. This happens when another process re-packs the
1683
# This is functionally similar to _save_pack_names, but we don't write
1684
# out the new value.
1685
disk_nodes, _, _ = self._diff_pack_names()
1686
self._packs_at_load = disk_nodes
1688
modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1689
if removed or added or modified:
1693
def _clear_obsolete_packs(self):
1694
"""Delete everything from the obsolete-packs directory.
1696
obsolete_pack_transport = self.transport.clone('obsolete_packs')
1697
for filename in obsolete_pack_transport.list_dir('.'):
1699
obsolete_pack_transport.delete(filename)
1700
except (errors.PathError, errors.TransportError), e:
1701
warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
1703
def _start_write_group(self):
1704
# Do not permit preparation for writing if we're not in a 'write lock'.
1705
if not self.repo.is_write_locked():
1706
raise errors.NotWriteLocked(self)
1707
self._new_pack = NewPack(self, upload_suffix='.pack',
1708
file_mode=self.repo.bzrdir._get_file_mode())
1709
# allow writing: queue writes to a new index
1710
self.revision_index.add_writable_index(self._new_pack.revision_index,
1712
self.inventory_index.add_writable_index(self._new_pack.inventory_index,
1714
self.text_index.add_writable_index(self._new_pack.text_index,
1716
self.signature_index.add_writable_index(self._new_pack.signature_index,
1719
self.repo.inventories._index._add_callback = self.inventory_index.add_callback
1720
self.repo.revisions._index._add_callback = self.revision_index.add_callback
1721
self.repo.signatures._index._add_callback = self.signature_index.add_callback
1722
self.repo.texts._index._add_callback = self.text_index.add_callback
1724
def _abort_write_group(self):
1725
# FIXME: just drop the transient index.
1726
# forget what names there are
1727
if self._new_pack is not None:
1729
self._new_pack.abort()
1731
# XXX: If we aborted while in the middle of finishing the write
1732
# group, _remove_pack_indices can fail because the indexes are
1733
# already gone. If they're not there we shouldn't fail in this
1734
# case. -- mbp 20081113
1735
self._remove_pack_indices(self._new_pack)
1736
self._new_pack = None
1737
self.repo._text_knit = None
1739
def _commit_write_group(self):
1740
self._remove_pack_indices(self._new_pack)
1741
if self._new_pack.data_inserted():
1742
# get all the data to disk and read to use
1743
self._new_pack.finish()
1744
self.allocate(self._new_pack)
1745
self._new_pack = None
1746
if not self.autopack():
1747
# when autopack takes no steps, the names list is still
1749
self._save_pack_names()
1751
self._new_pack.abort()
1752
self._new_pack = None
1753
self.repo._text_knit = None
1756
class KnitPackRepository(KnitRepository):
1757
"""Repository with knit objects stored inside pack containers.
1759
The layering for a KnitPackRepository is:
1761
Graph | HPSS | Repository public layer |
1762
===================================================
1763
Tuple based apis below, string based, and key based apis above
1764
---------------------------------------------------
1766
Provides .texts, .revisions etc
1767
This adapts the N-tuple keys to physical knit records which only have a
1768
single string identifier (for historical reasons), which in older formats
1769
was always the revision_id, and in the mapped code for packs is always
1770
the last element of key tuples.
1771
---------------------------------------------------
1773
A separate GraphIndex is used for each of the
1774
texts/inventories/revisions/signatures contained within each individual
1775
pack file. The GraphIndex layer works in N-tuples and is unaware of any
1777
===================================================
1781
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
1783
KnitRepository.__init__(self, _format, a_bzrdir, control_files,
1784
_commit_builder_class, _serializer)
1785
index_transport = self._transport.clone('indices')
1786
self._pack_collection = RepositoryPackCollection(self, self._transport,
1788
self._transport.clone('upload'),
1789
self._transport.clone('packs'),
1790
_format.index_builder_class,
1791
_format.index_class)
1792
self.inventories = KnitVersionedFiles(
1793
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
1794
add_callback=self._pack_collection.inventory_index.add_callback,
1795
deltas=True, parents=True, is_locked=self.is_locked),
1796
data_access=self._pack_collection.inventory_index.data_access,
1797
max_delta_chain=200)
1798
self.revisions = KnitVersionedFiles(
1799
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
1800
add_callback=self._pack_collection.revision_index.add_callback,
1801
deltas=False, parents=True, is_locked=self.is_locked),
1802
data_access=self._pack_collection.revision_index.data_access,
1804
self.signatures = KnitVersionedFiles(
1805
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
1806
add_callback=self._pack_collection.signature_index.add_callback,
1807
deltas=False, parents=False, is_locked=self.is_locked),
1808
data_access=self._pack_collection.signature_index.data_access,
1810
self.texts = KnitVersionedFiles(
1811
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
1812
add_callback=self._pack_collection.text_index.add_callback,
1813
deltas=True, parents=True, is_locked=self.is_locked),
1814
data_access=self._pack_collection.text_index.data_access,
1815
max_delta_chain=200)
1816
# True when the repository object is 'write locked' (as opposed to the
1817
# physical lock only taken out around changes to the pack-names list.)
1818
# Another way to represent this would be a decorator around the control
1819
# files object that presents logical locks as physical ones - if this
1820
# gets ugly consider that alternative design. RBC 20071011
1821
self._write_lock_count = 0
1822
self._transaction = None
1824
self._reconcile_does_inventory_gc = True
1825
self._reconcile_fixes_text_parents = True
1826
self._reconcile_backsup_inventory = False
1827
self._fetch_order = 'unordered'
1829
def _warn_if_deprecated(self):
1830
# This class isn't deprecated, but one sub-format is
1831
if isinstance(self._format, RepositoryFormatKnitPack5RichRootBroken):
1832
from bzrlib import repository
1833
if repository._deprecation_warning_done:
1835
repository._deprecation_warning_done = True
1836
warning("Format %s for %s is deprecated - please use"
1837
" 'bzr upgrade --1.6.1-rich-root'"
1838
% (self._format, self.bzrdir.transport.base))
1840
def _abort_write_group(self):
1841
self._pack_collection._abort_write_group()
1843
def _find_inconsistent_revision_parents(self):
1844
"""Find revisions with incorrectly cached parents.
1846
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
1847
parents-in-revision).
1849
if not self.is_locked():
1850
raise errors.ObjectNotLocked(self)
1851
pb = ui.ui_factory.nested_progress_bar()
1854
revision_nodes = self._pack_collection.revision_index \
1855
.combined_index.iter_all_entries()
1856
index_positions = []
1857
# Get the cached index values for all revisions, and also the location
1858
# in each index of the revision text so we can perform linear IO.
1859
for index, key, value, refs in revision_nodes:
1860
pos, length = value[1:].split(' ')
1861
index_positions.append((index, int(pos), key[0],
1862
tuple(parent[0] for parent in refs[0])))
1863
pb.update("Reading revision index.", 0, 0)
1864
index_positions.sort()
1865
batch_count = len(index_positions) / 1000 + 1
1866
pb.update("Checking cached revision graph.", 0, batch_count)
1867
for offset in xrange(batch_count):
1868
pb.update("Checking cached revision graph.", offset)
1869
to_query = index_positions[offset * 1000:(offset + 1) * 1000]
1872
rev_ids = [item[2] for item in to_query]
1873
revs = self.get_revisions(rev_ids)
1874
for revision, item in zip(revs, to_query):
1875
index_parents = item[3]
1876
rev_parents = tuple(revision.parent_ids)
1877
if index_parents != rev_parents:
1878
result.append((revision.revision_id, index_parents, rev_parents))
1883
@symbol_versioning.deprecated_method(symbol_versioning.one_one)
1884
def get_parents(self, revision_ids):
1885
"""See graph._StackedParentsProvider.get_parents."""
1886
parent_map = self.get_parent_map(revision_ids)
1887
return [parent_map.get(r, None) for r in revision_ids]
1889
def _make_parents_provider(self):
1890
return graph.CachingParentsProvider(self)
1892
def _refresh_data(self):
1893
if self._write_lock_count == 1 or (
1894
self.control_files._lock_count == 1 and
1895
self.control_files._lock_mode == 'r'):
1896
# forget what names there are
1897
self._pack_collection.reset()
1898
# XXX: Better to do an in-memory merge when acquiring a new lock -
1899
# factor out code from _save_pack_names.
1900
self._pack_collection.ensure_loaded()
1902
def _start_write_group(self):
1903
self._pack_collection._start_write_group()
1905
def _commit_write_group(self):
1906
return self._pack_collection._commit_write_group()
1908
def get_transaction(self):
1909
if self._write_lock_count:
1910
return self._transaction
1912
return self.control_files.get_transaction()
1914
def is_locked(self):
1915
return self._write_lock_count or self.control_files.is_locked()
1917
def is_write_locked(self):
1918
return self._write_lock_count
1920
def lock_write(self, token=None):
1921
if not self._write_lock_count and self.is_locked():
1922
raise errors.ReadOnlyError(self)
1923
self._write_lock_count += 1
1924
if self._write_lock_count == 1:
1925
self._transaction = transactions.WriteTransaction()
1926
for repo in self._fallback_repositories:
1927
# Writes don't affect fallback repos
1929
self._refresh_data()
1931
def lock_read(self):
1932
if self._write_lock_count:
1933
self._write_lock_count += 1
1935
self.control_files.lock_read()
1936
for repo in self._fallback_repositories:
1937
# Writes don't affect fallback repos
1939
self._refresh_data()
1941
def leave_lock_in_place(self):
1942
# not supported - raise an error
1943
raise NotImplementedError(self.leave_lock_in_place)
1945
def dont_leave_lock_in_place(self):
1946
# not supported - raise an error
1947
raise NotImplementedError(self.dont_leave_lock_in_place)
1951
"""Compress the data within the repository.
1953
This will pack all the data to a single pack. In future it may
1954
recompress deltas or do other such expensive operations.
1956
self._pack_collection.pack()
1959
def reconcile(self, other=None, thorough=False):
1960
"""Reconcile this repository."""
1961
from bzrlib.reconcile import PackReconciler
1962
reconciler = PackReconciler(self, thorough=thorough)
1963
reconciler.reconcile()
1967
if self._write_lock_count == 1 and self._write_group is not None:
1968
self.abort_write_group()
1969
self._transaction = None
1970
self._write_lock_count = 0
1971
raise errors.BzrError(
1972
'Must end write group before releasing write lock on %s'
1974
if self._write_lock_count:
1975
self._write_lock_count -= 1
1976
if not self._write_lock_count:
1977
transaction = self._transaction
1978
self._transaction = None
1979
transaction.finish()
1980
for repo in self._fallback_repositories:
1983
self.control_files.unlock()
1984
for repo in self._fallback_repositories:
1988
class RepositoryFormatPack(MetaDirRepositoryFormat):
1989
"""Format logic for pack structured repositories.
1991
This repository format has:
1992
- a list of packs in pack-names
1993
- packs in packs/NAME.pack
1994
- indices in indices/NAME.{iix,six,tix,rix}
1995
- knit deltas in the packs, knit indices mapped to the indices.
1996
- thunk objects to support the knits programming API.
1997
- a format marker of its own
1998
- an optional 'shared-storage' flag
1999
- an optional 'no-working-trees' flag
2003
# Set this attribute in derived classes to control the repository class
2004
# created by open and initialize.
2005
repository_class = None
2006
# Set this attribute in derived classes to control the
2007
# _commit_builder_class that the repository objects will have passed to
2008
# their constructor.
2009
_commit_builder_class = None
2010
# Set this attribute in derived clases to control the _serializer that the
2011
# repository objects will have passed to their constructor.
2013
# External references are not supported in pack repositories yet.
2014
supports_external_lookups = False
2015
# What index classes to use
2016
index_builder_class = None
2019
def initialize(self, a_bzrdir, shared=False):
2020
"""Create a pack based repository.
2022
:param a_bzrdir: bzrdir to contain the new repository; must already
2024
:param shared: If true the repository will be initialized as a shared
2027
mutter('creating repository in %s.', a_bzrdir.transport.base)
2028
dirs = ['indices', 'obsolete_packs', 'packs', 'upload']
2029
builder = self.index_builder_class()
2030
files = [('pack-names', builder.finish())]
2031
utf8_files = [('format', self.get_format_string())]
2033
self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
2034
return self.open(a_bzrdir=a_bzrdir, _found=True)
2036
def open(self, a_bzrdir, _found=False, _override_transport=None):
2037
"""See RepositoryFormat.open().
2039
:param _override_transport: INTERNAL USE ONLY. Allows opening the
2040
repository at a slightly different url
2041
than normal. I.e. during 'upgrade'.
2044
format = RepositoryFormat.find_format(a_bzrdir)
2045
if _override_transport is not None:
2046
repo_transport = _override_transport
2048
repo_transport = a_bzrdir.get_repository_transport(None)
2049
control_files = lockable_files.LockableFiles(repo_transport,
2050
'lock', lockdir.LockDir)
2051
return self.repository_class(_format=self,
2053
control_files=control_files,
2054
_commit_builder_class=self._commit_builder_class,
2055
_serializer=self._serializer)
2058
class RepositoryFormatKnitPack1(RepositoryFormatPack):
2059
"""A no-subtrees parameterized Pack repository.
2061
This format was introduced in 0.92.
2064
repository_class = KnitPackRepository
2065
_commit_builder_class = PackCommitBuilder
2067
def _serializer(self):
2068
return xml5.serializer_v5
2069
# What index classes to use
2070
index_builder_class = InMemoryGraphIndex
2071
index_class = GraphIndex
2073
def _get_matching_bzrdir(self):
2074
return bzrdir.format_registry.make_bzrdir('pack-0.92')
2076
def _ignore_setting_bzrdir(self, format):
2079
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2081
def get_format_string(self):
2082
"""See RepositoryFormat.get_format_string()."""
2083
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
2085
def get_format_description(self):
2086
"""See RepositoryFormat.get_format_description()."""
2087
return "Packs containing knits without subtree support"
2089
def check_conversion_target(self, target_format):
2093
class RepositoryFormatKnitPack3(RepositoryFormatPack):
2094
"""A subtrees parameterized Pack repository.
2096
This repository format uses the xml7 serializer to get:
2097
- support for recording full info about the tree root
2098
- support for recording tree-references
2100
This format was introduced in 0.92.
2103
repository_class = KnitPackRepository
2104
_commit_builder_class = PackRootCommitBuilder
2105
rich_root_data = True
2106
supports_tree_reference = True
2108
def _serializer(self):
2109
return xml7.serializer_v7
2110
# What index classes to use
2111
index_builder_class = InMemoryGraphIndex
2112
index_class = GraphIndex
2114
def _get_matching_bzrdir(self):
2115
return bzrdir.format_registry.make_bzrdir(
2116
'pack-0.92-subtree')
2118
def _ignore_setting_bzrdir(self, format):
2121
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2123
def check_conversion_target(self, target_format):
2124
if not target_format.rich_root_data:
2125
raise errors.BadConversionTarget(
2126
'Does not support rich root data.', target_format)
2127
if not getattr(target_format, 'supports_tree_reference', False):
2128
raise errors.BadConversionTarget(
2129
'Does not support nested trees', target_format)
2131
def get_format_string(self):
2132
"""See RepositoryFormat.get_format_string()."""
2133
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
2135
def get_format_description(self):
2136
"""See RepositoryFormat.get_format_description()."""
2137
return "Packs containing knits with subtree support\n"
2140
class RepositoryFormatKnitPack4(RepositoryFormatPack):
2141
"""A rich-root, no subtrees parameterized Pack repository.
2143
This repository format uses the xml6 serializer to get:
2144
- support for recording full info about the tree root
2146
This format was introduced in 1.0.
2149
repository_class = KnitPackRepository
2150
_commit_builder_class = PackRootCommitBuilder
2151
rich_root_data = True
2152
supports_tree_reference = False
2154
def _serializer(self):
2155
return xml6.serializer_v6
2156
# What index classes to use
2157
index_builder_class = InMemoryGraphIndex
2158
index_class = GraphIndex
2160
def _get_matching_bzrdir(self):
2161
return bzrdir.format_registry.make_bzrdir(
2164
def _ignore_setting_bzrdir(self, format):
2167
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2169
def check_conversion_target(self, target_format):
2170
if not target_format.rich_root_data:
2171
raise errors.BadConversionTarget(
2172
'Does not support rich root data.', target_format)
2174
def get_format_string(self):
2175
"""See RepositoryFormat.get_format_string()."""
2176
return ("Bazaar pack repository format 1 with rich root"
2177
" (needs bzr 1.0)\n")
2179
def get_format_description(self):
2180
"""See RepositoryFormat.get_format_description()."""
2181
return "Packs containing knits with rich root support\n"
2184
class RepositoryFormatKnitPack5(RepositoryFormatPack):
2185
"""Repository that supports external references to allow stacking.
2189
Supports external lookups, which results in non-truncated ghosts after
2190
reconcile compared to pack-0.92 formats.
2193
repository_class = KnitPackRepository
2194
_commit_builder_class = PackCommitBuilder
2195
supports_external_lookups = True
2196
# What index classes to use
2197
index_builder_class = InMemoryGraphIndex
2198
index_class = GraphIndex
2201
def _serializer(self):
2202
return xml5.serializer_v5
2204
def _get_matching_bzrdir(self):
2205
return bzrdir.format_registry.make_bzrdir('1.6')
2207
def _ignore_setting_bzrdir(self, format):
2210
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2212
def get_format_string(self):
2213
"""See RepositoryFormat.get_format_string()."""
2214
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
2216
def get_format_description(self):
2217
"""See RepositoryFormat.get_format_description()."""
2218
return "Packs 5 (adds stacking support, requires bzr 1.6)"
2220
def check_conversion_target(self, target_format):
2224
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
2225
"""A repository with rich roots and stacking.
2227
New in release 1.6.1.
2229
Supports stacking on other repositories, allowing data to be accessed
2230
without being stored locally.
2233
repository_class = KnitPackRepository
2234
_commit_builder_class = PackRootCommitBuilder
2235
rich_root_data = True
2236
supports_tree_reference = False # no subtrees
2237
supports_external_lookups = True
2238
# What index classes to use
2239
index_builder_class = InMemoryGraphIndex
2240
index_class = GraphIndex
2243
def _serializer(self):
2244
return xml6.serializer_v6
2246
def _get_matching_bzrdir(self):
2247
return bzrdir.format_registry.make_bzrdir(
2250
def _ignore_setting_bzrdir(self, format):
2253
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2255
def check_conversion_target(self, target_format):
2256
if not target_format.rich_root_data:
2257
raise errors.BadConversionTarget(
2258
'Does not support rich root data.', target_format)
2260
def get_format_string(self):
2261
"""See RepositoryFormat.get_format_string()."""
2262
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
2264
def get_format_description(self):
2265
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
2268
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
2269
"""A repository with rich roots and external references.
2273
Supports external lookups, which results in non-truncated ghosts after
2274
reconcile compared to pack-0.92 formats.
2276
This format was deprecated because the serializer it uses accidentally
2277
supported subtrees, when the format was not intended to. This meant that
2278
someone could accidentally fetch from an incorrect repository.
2281
repository_class = KnitPackRepository
2282
_commit_builder_class = PackRootCommitBuilder
2283
rich_root_data = True
2284
supports_tree_reference = False # no subtrees
2286
supports_external_lookups = True
2287
# What index classes to use
2288
index_builder_class = InMemoryGraphIndex
2289
index_class = GraphIndex
2292
def _serializer(self):
2293
return xml7.serializer_v7
2295
def _get_matching_bzrdir(self):
2296
return bzrdir.format_registry.make_bzrdir(
2299
def _ignore_setting_bzrdir(self, format):
2302
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2304
def check_conversion_target(self, target_format):
2305
if not target_format.rich_root_data:
2306
raise errors.BadConversionTarget(
2307
'Does not support rich root data.', target_format)
2309
def get_format_string(self):
2310
"""See RepositoryFormat.get_format_string()."""
2311
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
2313
def get_format_description(self):
2314
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
2318
class RepositoryFormatKnitPack6(RepositoryFormatPack):
2319
"""A repository with stacking and btree indexes,
2320
without rich roots or subtrees.
2322
This is equivalent to pack-1.6 with B+Tree indices.
2325
repository_class = KnitPackRepository
2326
_commit_builder_class = PackCommitBuilder
2327
supports_external_lookups = True
2328
# What index classes to use
2329
index_builder_class = BTreeBuilder
2330
index_class = BTreeGraphIndex
2333
def _serializer(self):
2334
return xml5.serializer_v5
2336
def _get_matching_bzrdir(self):
2337
return bzrdir.format_registry.make_bzrdir('1.9')
2339
def _ignore_setting_bzrdir(self, format):
2342
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2344
def get_format_string(self):
2345
"""See RepositoryFormat.get_format_string()."""
2346
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
2348
def get_format_description(self):
2349
"""See RepositoryFormat.get_format_description()."""
2350
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
2352
def check_conversion_target(self, target_format):
2356
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
2357
"""A repository with rich roots, no subtrees, stacking and btree indexes.
2359
1.6-rich-root with B+Tree indices.
2362
repository_class = KnitPackRepository
2363
_commit_builder_class = PackRootCommitBuilder
2364
rich_root_data = True
2365
supports_tree_reference = False # no subtrees
2366
supports_external_lookups = True
2367
# What index classes to use
2368
index_builder_class = BTreeBuilder
2369
index_class = BTreeGraphIndex
2372
def _serializer(self):
2373
return xml6.serializer_v6
2375
def _get_matching_bzrdir(self):
2376
return bzrdir.format_registry.make_bzrdir(
2379
def _ignore_setting_bzrdir(self, format):
2382
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2384
def check_conversion_target(self, target_format):
2385
if not target_format.rich_root_data:
2386
raise errors.BadConversionTarget(
2387
'Does not support rich root data.', target_format)
2389
def get_format_string(self):
2390
"""See RepositoryFormat.get_format_string()."""
2391
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
2393
def get_format_description(self):
2394
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2397
class RepositoryFormatPackDevelopment2(RepositoryFormatPack):
2398
"""A no-subtrees development repository.
2400
This format should be retained until the second release after bzr 1.7.
2402
This is pack-1.6.1 with B+Tree indices.
2405
repository_class = KnitPackRepository
2406
_commit_builder_class = PackCommitBuilder
2407
supports_external_lookups = True
2408
# What index classes to use
2409
index_builder_class = BTreeBuilder
2410
index_class = BTreeGraphIndex
2413
def _serializer(self):
2414
return xml5.serializer_v5
2416
def _get_matching_bzrdir(self):
2417
return bzrdir.format_registry.make_bzrdir('development2')
2419
def _ignore_setting_bzrdir(self, format):
2422
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2424
def get_format_string(self):
2425
"""See RepositoryFormat.get_format_string()."""
2426
return "Bazaar development format 2 (needs bzr.dev from before 1.8)\n"
2428
def get_format_description(self):
2429
"""See RepositoryFormat.get_format_description()."""
2430
return ("Development repository format, currently the same as "
2431
"1.6.1 with B+Trees.\n")
2433
def check_conversion_target(self, target_format):
2437
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2438
"""A subtrees development repository.
2440
This format should be retained until the second release after bzr 1.7.
2442
1.6.1-subtree[as it might have been] with B+Tree indices.
2445
repository_class = KnitPackRepository
2446
_commit_builder_class = PackRootCommitBuilder
2447
rich_root_data = True
2448
supports_tree_reference = True
2449
supports_external_lookups = True
2450
# What index classes to use
2451
index_builder_class = BTreeBuilder
2452
index_class = BTreeGraphIndex
2455
def _serializer(self):
2456
return xml7.serializer_v7
2458
def _get_matching_bzrdir(self):
2459
return bzrdir.format_registry.make_bzrdir(
2460
'development2-subtree')
2462
def _ignore_setting_bzrdir(self, format):
2465
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2467
def check_conversion_target(self, target_format):
2468
if not target_format.rich_root_data:
2469
raise errors.BadConversionTarget(
2470
'Does not support rich root data.', target_format)
2471
if not getattr(target_format, 'supports_tree_reference', False):
2472
raise errors.BadConversionTarget(
2473
'Does not support nested trees', target_format)
2475
def get_format_string(self):
2476
"""See RepositoryFormat.get_format_string()."""
2477
return ("Bazaar development format 2 with subtree support "
2478
"(needs bzr.dev from before 1.8)\n")
2480
def get_format_description(self):
2481
"""See RepositoryFormat.get_format_description()."""
2482
return ("Development repository format, currently the same as "
2483
"1.6.1-subtree with B+Tree indices.\n")