1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from __future__ import absolute_import
21
from ..lazy_import import lazy_import
22
lazy_import(globals(), """
32
revision as _mod_revision,
40
from breezy.knit import (
53
GraphIndexPrefixAdapter,
56
from .knitrepo import (
59
from .pack_repo import (
67
PackRootCommitBuilder,
68
RepositoryPackCollection,
70
from ..sixish import (
73
from ..vf_repository import (
78
class KnitPackRepository(PackRepository, KnitRepository):
80
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
82
PackRepository.__init__(self, _format, a_bzrdir, control_files,
83
_commit_builder_class, _serializer)
84
if self._format.supports_chks:
85
raise AssertionError("chk not supported")
86
index_transport = self._transport.clone('indices')
87
self._pack_collection = KnitRepositoryPackCollection(self,
90
self._transport.clone('upload'),
91
self._transport.clone('packs'),
92
_format.index_builder_class,
96
self.inventories = KnitVersionedFiles(
97
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
98
add_callback=self._pack_collection.inventory_index.add_callback,
99
deltas=True, parents=True, is_locked=self.is_locked),
100
data_access=self._pack_collection.inventory_index.data_access,
102
self.revisions = KnitVersionedFiles(
103
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
104
add_callback=self._pack_collection.revision_index.add_callback,
105
deltas=False, parents=True, is_locked=self.is_locked,
106
track_external_parent_refs=True),
107
data_access=self._pack_collection.revision_index.data_access,
109
self.signatures = KnitVersionedFiles(
110
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
111
add_callback=self._pack_collection.signature_index.add_callback,
112
deltas=False, parents=False, is_locked=self.is_locked),
113
data_access=self._pack_collection.signature_index.data_access,
115
self.texts = KnitVersionedFiles(
116
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
117
add_callback=self._pack_collection.text_index.add_callback,
118
deltas=True, parents=True, is_locked=self.is_locked),
119
data_access=self._pack_collection.text_index.data_access,
121
self.chk_bytes = None
122
# True when the repository object is 'write locked' (as opposed to the
123
# physical lock only taken out around changes to the pack-names list.)
124
# Another way to represent this would be a decorator around the control
125
# files object that presents logical locks as physical ones - if this
126
# gets ugly consider that alternative design. RBC 20071011
127
self._write_lock_count = 0
128
self._transaction = None
130
self._reconcile_does_inventory_gc = True
131
self._reconcile_fixes_text_parents = True
132
self._reconcile_backsup_inventory = False
134
def _get_source(self, to_format):
135
if to_format.network_name() == self._format.network_name():
136
return KnitPackStreamSource(self, to_format)
137
return PackRepository._get_source(self, to_format)
139
def _reconcile_pack(self, collection, packs, extension, revs, pb):
140
packer = KnitReconcilePacker(collection, packs, extension, revs)
141
return packer.pack(pb)
144
class RepositoryFormatKnitPack1(RepositoryFormatPack):
145
"""A no-subtrees parameterized Pack repository.
147
This format was introduced in 0.92.
150
repository_class = KnitPackRepository
151
_commit_builder_class = PackCommitBuilder
153
def _serializer(self):
154
return xml5.serializer_v5
155
# What index classes to use
156
index_builder_class = InMemoryGraphIndex
157
index_class = GraphIndex
159
def _get_matching_bzrdir(self):
160
return controldir.format_registry.make_bzrdir('pack-0.92')
162
def _ignore_setting_bzrdir(self, format):
165
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
168
def get_format_string(cls):
169
"""See RepositoryFormat.get_format_string()."""
170
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
172
def get_format_description(self):
173
"""See RepositoryFormat.get_format_description()."""
174
return "Packs containing knits without subtree support"
177
class RepositoryFormatKnitPack3(RepositoryFormatPack):
178
"""A subtrees parameterized Pack repository.
180
This repository format uses the xml7 serializer to get:
181
- support for recording full info about the tree root
182
- support for recording tree-references
184
This format was introduced in 0.92.
187
repository_class = KnitPackRepository
188
_commit_builder_class = PackRootCommitBuilder
189
rich_root_data = True
191
supports_tree_reference = True
193
def _serializer(self):
194
return xml7.serializer_v7
195
# What index classes to use
196
index_builder_class = InMemoryGraphIndex
197
index_class = GraphIndex
199
def _get_matching_bzrdir(self):
200
return controldir.format_registry.make_bzrdir(
203
def _ignore_setting_bzrdir(self, format):
206
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
209
def get_format_string(cls):
210
"""See RepositoryFormat.get_format_string()."""
211
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
213
def get_format_description(self):
214
"""See RepositoryFormat.get_format_description()."""
215
return "Packs containing knits with subtree support\n"
218
class RepositoryFormatKnitPack4(RepositoryFormatPack):
219
"""A rich-root, no subtrees parameterized Pack repository.
221
This repository format uses the xml6 serializer to get:
222
- support for recording full info about the tree root
224
This format was introduced in 1.0.
227
repository_class = KnitPackRepository
228
_commit_builder_class = PackRootCommitBuilder
229
rich_root_data = True
230
supports_tree_reference = False
232
def _serializer(self):
233
return xml6.serializer_v6
234
# What index classes to use
235
index_builder_class = InMemoryGraphIndex
236
index_class = GraphIndex
238
def _get_matching_bzrdir(self):
239
return controldir.format_registry.make_bzrdir(
242
def _ignore_setting_bzrdir(self, format):
245
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
248
def get_format_string(cls):
249
"""See RepositoryFormat.get_format_string()."""
250
return ("Bazaar pack repository format 1 with rich root"
251
" (needs bzr 1.0)\n")
253
def get_format_description(self):
254
"""See RepositoryFormat.get_format_description()."""
255
return "Packs containing knits with rich root support\n"
258
class RepositoryFormatKnitPack5(RepositoryFormatPack):
259
"""Repository that supports external references to allow stacking.
263
Supports external lookups, which results in non-truncated ghosts after
264
reconcile compared to pack-0.92 formats.
267
repository_class = KnitPackRepository
268
_commit_builder_class = PackCommitBuilder
269
supports_external_lookups = True
270
# What index classes to use
271
index_builder_class = InMemoryGraphIndex
272
index_class = GraphIndex
275
def _serializer(self):
276
return xml5.serializer_v5
278
def _get_matching_bzrdir(self):
279
return controldir.format_registry.make_bzrdir('1.6')
281
def _ignore_setting_bzrdir(self, format):
284
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
287
def get_format_string(cls):
288
"""See RepositoryFormat.get_format_string()."""
289
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
291
def get_format_description(self):
292
"""See RepositoryFormat.get_format_description()."""
293
return "Packs 5 (adds stacking support, requires bzr 1.6)"
296
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
297
"""A repository with rich roots and stacking.
299
New in release 1.6.1.
301
Supports stacking on other repositories, allowing data to be accessed
302
without being stored locally.
305
repository_class = KnitPackRepository
306
_commit_builder_class = PackRootCommitBuilder
307
rich_root_data = True
308
supports_tree_reference = False # no subtrees
309
supports_external_lookups = True
310
# What index classes to use
311
index_builder_class = InMemoryGraphIndex
312
index_class = GraphIndex
315
def _serializer(self):
316
return xml6.serializer_v6
318
def _get_matching_bzrdir(self):
319
return controldir.format_registry.make_bzrdir(
322
def _ignore_setting_bzrdir(self, format):
325
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
328
def get_format_string(cls):
329
"""See RepositoryFormat.get_format_string()."""
330
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
332
def get_format_description(self):
333
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
336
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
337
"""A repository with rich roots and external references.
341
Supports external lookups, which results in non-truncated ghosts after
342
reconcile compared to pack-0.92 formats.
344
This format was deprecated because the serializer it uses accidentally
345
supported subtrees, when the format was not intended to. This meant that
346
someone could accidentally fetch from an incorrect repository.
349
repository_class = KnitPackRepository
350
_commit_builder_class = PackRootCommitBuilder
351
rich_root_data = True
352
supports_tree_reference = False # no subtrees
354
supports_external_lookups = True
355
# What index classes to use
356
index_builder_class = InMemoryGraphIndex
357
index_class = GraphIndex
360
def _serializer(self):
361
return xml7.serializer_v7
363
def _get_matching_bzrdir(self):
364
matching = controldir.format_registry.make_bzrdir(
366
matching.repository_format = self
369
def _ignore_setting_bzrdir(self, format):
372
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
375
def get_format_string(cls):
376
"""See RepositoryFormat.get_format_string()."""
377
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
379
def get_format_description(self):
380
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
383
def is_deprecated(self):
387
class RepositoryFormatKnitPack6(RepositoryFormatPack):
388
"""A repository with stacking and btree indexes,
389
without rich roots or subtrees.
391
This is equivalent to pack-1.6 with B+Tree indices.
394
repository_class = KnitPackRepository
395
_commit_builder_class = PackCommitBuilder
396
supports_external_lookups = True
397
# What index classes to use
398
index_builder_class = btree_index.BTreeBuilder
399
index_class = btree_index.BTreeGraphIndex
402
def _serializer(self):
403
return xml5.serializer_v5
405
def _get_matching_bzrdir(self):
406
return controldir.format_registry.make_bzrdir('1.9')
408
def _ignore_setting_bzrdir(self, format):
411
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
414
def get_format_string(cls):
415
"""See RepositoryFormat.get_format_string()."""
416
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
418
def get_format_description(self):
419
"""See RepositoryFormat.get_format_description()."""
420
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
423
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
424
"""A repository with rich roots, no subtrees, stacking and btree indexes.
426
1.6-rich-root with B+Tree indices.
429
repository_class = KnitPackRepository
430
_commit_builder_class = PackRootCommitBuilder
431
rich_root_data = True
432
supports_tree_reference = False # no subtrees
433
supports_external_lookups = True
434
# What index classes to use
435
index_builder_class = btree_index.BTreeBuilder
436
index_class = btree_index.BTreeGraphIndex
439
def _serializer(self):
440
return xml6.serializer_v6
442
def _get_matching_bzrdir(self):
443
return controldir.format_registry.make_bzrdir(
446
def _ignore_setting_bzrdir(self, format):
449
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
452
def get_format_string(cls):
453
"""See RepositoryFormat.get_format_string()."""
454
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
456
def get_format_description(self):
457
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
460
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
461
"""A subtrees development repository.
463
This format should be retained in 2.3, to provide an upgrade path from this
464
to RepositoryFormat2aSubtree. It can be removed in later releases.
466
1.6.1-subtree[as it might have been] with B+Tree indices.
469
repository_class = KnitPackRepository
470
_commit_builder_class = PackRootCommitBuilder
471
rich_root_data = True
473
supports_tree_reference = True
474
supports_external_lookups = True
475
# What index classes to use
476
index_builder_class = btree_index.BTreeBuilder
477
index_class = btree_index.BTreeGraphIndex
480
def _serializer(self):
481
return xml7.serializer_v7
483
def _get_matching_bzrdir(self):
484
return controldir.format_registry.make_bzrdir(
485
'development5-subtree')
487
def _ignore_setting_bzrdir(self, format):
490
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
493
def get_format_string(cls):
494
"""See RepositoryFormat.get_format_string()."""
495
return ("Bazaar development format 2 with subtree support "
496
"(needs bzr.dev from before 1.8)\n")
498
def get_format_description(self):
499
"""See RepositoryFormat.get_format_description()."""
500
return ("Development repository format, currently the same as "
501
"1.6.1-subtree with B+Tree indices.\n")
504
class KnitPackStreamSource(StreamSource):
505
"""A StreamSource used to transfer data between same-format KnitPack repos.
508
1) Same serialization format for all objects
509
2) Same root information
510
3) XML format inventories
511
4) Atomic inserts (so we can stream inventory texts before text
516
def __init__(self, from_repository, to_format):
517
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
518
self._text_keys = None
519
self._text_fetch_order = 'unordered'
521
def _get_filtered_inv_stream(self, revision_ids):
522
from_repo = self.from_repository
523
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
524
parent_keys = [(p,) for p in parent_ids]
525
find_text_keys = from_repo._serializer._find_text_key_references
526
parent_text_keys = set(find_text_keys(
527
from_repo._inventory_xml_lines_for_keys(parent_keys)))
528
content_text_keys = set()
529
knit = KnitVersionedFiles(None, None)
530
factory = KnitPlainFactory()
531
def find_text_keys_from_content(record):
532
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
533
raise ValueError("Unknown content storage kind for"
534
" inventory text: %s" % (record.storage_kind,))
535
# It's a knit record, it has a _raw_record field (even if it was
536
# reconstituted from a network stream).
537
raw_data = record._raw_record
538
# read the entire thing
539
revision_id = record.key[-1]
540
content, _ = knit._parse_record(revision_id, raw_data)
541
if record.storage_kind == 'knit-delta-gz':
542
line_iterator = factory.get_linedelta_content(content)
543
elif record.storage_kind == 'knit-ft-gz':
544
line_iterator = factory.get_fulltext_content(content)
545
content_text_keys.update(find_text_keys(
546
[(line, revision_id) for line in line_iterator]))
547
revision_keys = [(r,) for r in revision_ids]
548
def _filtered_inv_stream():
549
source_vf = from_repo.inventories
550
stream = source_vf.get_record_stream(revision_keys,
552
for record in stream:
553
if record.storage_kind == 'absent':
554
raise errors.NoSuchRevision(from_repo, record.key)
555
find_text_keys_from_content(record)
557
self._text_keys = content_text_keys - parent_text_keys
558
return ('inventories', _filtered_inv_stream())
560
def _get_text_stream(self):
561
# Note: We know we don't have to handle adding root keys, because both
562
# the source and target are the identical network name.
563
text_stream = self.from_repository.texts.get_record_stream(
564
self._text_keys, self._text_fetch_order, False)
565
return ('texts', text_stream)
567
def get_stream(self, search):
568
revision_ids = search.get_keys()
569
for stream_info in self._fetch_revision_texts(revision_ids):
571
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
572
yield self._get_filtered_inv_stream(revision_ids)
573
yield self._get_text_stream()
576
class KnitPacker(Packer):
577
"""Packer that works with knit packs."""
579
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
581
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
582
revision_ids=revision_ids,
583
reload_func=reload_func)
585
def _pack_map_and_index_list(self, index_attribute):
586
"""Convert a list of packs to an index pack map and index list.
588
:param index_attribute: The attribute that the desired index is found
590
:return: A tuple (map, list) where map contains the dict from
591
index:pack_tuple, and list contains the indices in the preferred
596
for pack_obj in self.packs:
597
index = getattr(pack_obj, index_attribute)
598
indices.append(index)
599
pack_map[index] = pack_obj
600
return pack_map, indices
602
def _index_contents(self, indices, key_filter=None):
603
"""Get an iterable of the index contents from a pack_map.
605
:param indices: The list of indices to query
606
:param key_filter: An optional filter to limit the keys returned.
608
all_index = CombinedGraphIndex(indices)
609
if key_filter is None:
610
return all_index.iter_all_entries()
612
return all_index.iter_entries(key_filter)
614
def _copy_nodes(self, nodes, index_map, writer, write_index,
616
"""Copy knit nodes between packs with no graph references.
618
:param output_lines: Output full texts of copied items.
620
pb = ui.ui_factory.nested_progress_bar()
622
return self._do_copy_nodes(nodes, index_map, writer,
623
write_index, pb, output_lines=output_lines)
627
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
629
# for record verification
630
knit = KnitVersionedFiles(None, None)
631
# plan a readv on each source pack:
633
nodes = sorted(nodes)
634
# how to map this into knit.py - or knit.py into this?
635
# we don't want the typical knit logic, we want grouping by pack
636
# at this point - perhaps a helper library for the following code
637
# duplication points?
639
for index, key, value in nodes:
640
if index not in request_groups:
641
request_groups[index] = []
642
request_groups[index].append((key, value))
644
pb.update("Copied record", record_index, len(nodes))
645
for index, items in request_groups.iteritems():
646
pack_readv_requests = []
647
for key, value in items:
648
# ---- KnitGraphIndex.get_position
649
bits = value[1:].split(' ')
650
offset, length = int(bits[0]), int(bits[1])
651
pack_readv_requests.append((offset, length, (key, value[0])))
652
# linear scan up the pack
653
pack_readv_requests.sort()
655
pack_obj = index_map[index]
656
transport, path = pack_obj.access_tuple()
658
reader = pack.make_readv_reader(transport, path,
659
[offset[0:2] for offset in pack_readv_requests])
660
except errors.NoSuchFile:
661
if self._reload_func is not None:
664
for (names, read_func), (_1, _2, (key, eol_flag)) in zip(
665
reader.iter_records(), pack_readv_requests):
666
raw_data = read_func(None)
667
# check the header only
668
if output_lines is not None:
669
output_lines(knit._parse_record(key[-1], raw_data)[0])
671
df, _ = knit._parse_record_header(key, raw_data)
673
pos, size = writer.add_bytes_record(raw_data, names)
674
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
675
pb.update("Copied record", record_index)
678
def _copy_nodes_graph(self, index_map, writer, write_index,
679
readv_group_iter, total_items, output_lines=False):
680
"""Copy knit nodes between packs.
682
:param output_lines: Return lines present in the copied data as
683
an iterator of line,version_id.
685
pb = ui.ui_factory.nested_progress_bar()
687
for result in self._do_copy_nodes_graph(index_map, writer,
688
write_index, output_lines, pb, readv_group_iter, total_items):
691
# Python 2.4 does not permit try:finally: in a generator.
697
def _do_copy_nodes_graph(self, index_map, writer, write_index,
698
output_lines, pb, readv_group_iter, total_items):
699
# for record verification
700
knit = KnitVersionedFiles(None, None)
701
# for line extraction when requested (inventories only)
703
factory = KnitPlainFactory()
705
pb.update("Copied record", record_index, total_items)
706
for index, readv_vector, node_vector in readv_group_iter:
708
pack_obj = index_map[index]
709
transport, path = pack_obj.access_tuple()
711
reader = pack.make_readv_reader(transport, path, readv_vector)
712
except errors.NoSuchFile:
713
if self._reload_func is not None:
716
for (names, read_func), (key, eol_flag, references) in zip(
717
reader.iter_records(), node_vector):
718
raw_data = read_func(None)
720
# read the entire thing
721
content, _ = knit._parse_record(key[-1], raw_data)
722
if len(references[-1]) == 0:
723
line_iterator = factory.get_fulltext_content(content)
725
line_iterator = factory.get_linedelta_content(content)
726
for line in line_iterator:
729
# check the header only
730
df, _ = knit._parse_record_header(key, raw_data)
732
pos, size = writer.add_bytes_record(raw_data, names)
733
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
734
pb.update("Copied record", record_index)
737
def _process_inventory_lines(self, inv_lines):
738
"""Use up the inv_lines generator and setup a text key filter."""
739
repo = self._pack_collection.repo
740
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
741
inv_lines, self.revision_keys)
743
for fileid, file_revids in fileid_revisions.iteritems():
744
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
745
self._text_filter = text_filter
747
def _copy_inventory_texts(self):
748
# select inventory keys
749
inv_keys = self._revision_keys # currently the same keyspace, and note that
750
# querying for keys here could introduce a bug where an inventory item
751
# is missed, so do not change it to query separately without cross
752
# checking like the text key check below.
753
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
755
inv_nodes = self._index_contents(inventory_indices, inv_keys)
756
# copy inventory keys and adjust values
757
# XXX: Should be a helper function to allow different inv representation
759
self.pb.update("Copying inventory texts", 2)
760
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
761
# Only grab the output lines if we will be processing them
762
output_lines = bool(self.revision_ids)
763
inv_lines = self._copy_nodes_graph(inventory_index_map,
764
self.new_pack._writer, self.new_pack.inventory_index,
765
readv_group_iter, total_items, output_lines=output_lines)
766
if self.revision_ids:
767
self._process_inventory_lines(inv_lines)
769
# eat the iterator to cause it to execute.
771
self._text_filter = None
772
if 'pack' in debug.debug_flags:
773
trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
774
time.ctime(), self._pack_collection._upload_transport.base,
775
self.new_pack.random_name,
776
self.new_pack.inventory_index.key_count(),
777
time.time() - self.new_pack.start_time)
779
def _update_pack_order(self, entries, index_to_pack_map):
780
"""Determine how we want our packs to be ordered.
782
This changes the sort order of the self.packs list so that packs unused
783
by 'entries' will be at the end of the list, so that future requests
784
can avoid probing them. Used packs will be at the front of the
785
self.packs list, in the order of their first use in 'entries'.
787
:param entries: A list of (index, ...) tuples
788
:param index_to_pack_map: A mapping from index objects to pack objects.
792
for entry in entries:
794
if index not in seen_indexes:
795
packs.append(index_to_pack_map[index])
796
seen_indexes.add(index)
797
if len(packs) == len(self.packs):
798
if 'pack' in debug.debug_flags:
799
trace.mutter('Not changing pack list, all packs used.')
801
seen_packs = set(packs)
802
for pack in self.packs:
803
if pack not in seen_packs:
806
if 'pack' in debug.debug_flags:
807
old_names = [p.access_tuple()[1] for p in self.packs]
808
new_names = [p.access_tuple()[1] for p in packs]
809
trace.mutter('Reordering packs\nfrom: %s\n to: %s',
810
old_names, new_names)
813
def _copy_revision_texts(self):
815
if self.revision_ids:
816
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
819
# select revision keys
820
revision_index_map, revision_indices = self._pack_map_and_index_list(
822
revision_nodes = self._index_contents(revision_indices, revision_keys)
823
revision_nodes = list(revision_nodes)
824
self._update_pack_order(revision_nodes, revision_index_map)
825
# copy revision keys and adjust values
826
self.pb.update("Copying revision texts", 1)
827
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
828
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
829
self.new_pack.revision_index, readv_group_iter, total_items))
830
if 'pack' in debug.debug_flags:
831
trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
832
time.ctime(), self._pack_collection._upload_transport.base,
833
self.new_pack.random_name,
834
self.new_pack.revision_index.key_count(),
835
time.time() - self.new_pack.start_time)
836
self._revision_keys = revision_keys
838
def _get_text_nodes(self):
839
text_index_map, text_indices = self._pack_map_and_index_list(
841
return text_index_map, self._index_contents(text_indices,
844
def _copy_text_texts(self):
846
text_index_map, text_nodes = self._get_text_nodes()
847
if self._text_filter is not None:
848
# We could return the keys copied as part of the return value from
849
# _copy_nodes_graph but this doesn't work all that well with the
850
# need to get line output too, so we check separately, and as we're
851
# going to buffer everything anyway, we check beforehand, which
852
# saves reading knit data over the wire when we know there are
854
text_nodes = set(text_nodes)
855
present_text_keys = set(_node[1] for _node in text_nodes)
856
missing_text_keys = set(self._text_filter) - present_text_keys
857
if missing_text_keys:
858
# TODO: raise a specific error that can handle many missing
860
trace.mutter("missing keys during fetch: %r", missing_text_keys)
861
a_missing_key = missing_text_keys.pop()
862
raise errors.RevisionNotPresent(a_missing_key[1],
864
# copy text keys and adjust values
865
self.pb.update("Copying content texts", 3)
866
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
867
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
868
self.new_pack.text_index, readv_group_iter, total_items))
869
self._log_copied_texts()
871
def _create_pack_from_packs(self):
872
self.pb.update("Opening pack", 0, 5)
873
self.new_pack = self.open_pack()
874
new_pack = self.new_pack
875
# buffer data - we won't be reading-back during the pack creation and
876
# this makes a significant difference on sftp pushes.
877
new_pack.set_write_cache_size(1024*1024)
878
if 'pack' in debug.debug_flags:
879
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
880
for a_pack in self.packs]
881
if self.revision_ids is not None:
882
rev_count = len(self.revision_ids)
885
trace.mutter('%s: create_pack: creating pack from source packs: '
886
'%s%s %s revisions wanted %s t=0',
887
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
888
plain_pack_list, rev_count)
889
self._copy_revision_texts()
890
self._copy_inventory_texts()
891
self._copy_text_texts()
892
# select signature keys
893
signature_filter = self._revision_keys # same keyspace
894
signature_index_map, signature_indices = self._pack_map_and_index_list(
896
signature_nodes = self._index_contents(signature_indices,
898
# copy signature keys and adjust values
899
self.pb.update("Copying signature texts", 4)
900
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
901
new_pack.signature_index)
902
if 'pack' in debug.debug_flags:
903
trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
904
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
905
new_pack.signature_index.key_count(),
906
time.time() - new_pack.start_time)
907
new_pack._check_references()
908
if not self._use_pack(new_pack):
911
self.pb.update("Finishing pack", 5)
913
self._pack_collection.allocate(new_pack)
916
def _least_readv_node_readv(self, nodes):
917
"""Generate request groups for nodes using the least readv's.
919
:param nodes: An iterable of graph index nodes.
920
:return: Total node count and an iterator of the data needed to perform
921
readvs to obtain the data for nodes. Each item yielded by the
922
iterator is a tuple with:
923
index, readv_vector, node_vector. readv_vector is a list ready to
924
hand to the transport readv method, and node_vector is a list of
925
(key, eol_flag, references) for the node retrieved by the
926
matching readv_vector.
928
# group by pack so we do one readv per pack
929
nodes = sorted(nodes)
932
for index, key, value, references in nodes:
933
if index not in request_groups:
934
request_groups[index] = []
935
request_groups[index].append((key, value, references))
937
for index, items in request_groups.iteritems():
938
pack_readv_requests = []
939
for key, value, references in items:
940
# ---- KnitGraphIndex.get_position
941
bits = value[1:].split(' ')
942
offset, length = int(bits[0]), int(bits[1])
943
pack_readv_requests.append(
944
((offset, length), (key, value[0], references)))
945
# linear scan up the pack to maximum range combining.
946
pack_readv_requests.sort()
947
# split out the readv and the node data.
948
pack_readv = [readv for readv, node in pack_readv_requests]
949
node_vector = [node for readv, node in pack_readv_requests]
950
result.append((index, pack_readv, node_vector))
953
def _revision_node_readv(self, revision_nodes):
954
"""Return the total revisions and the readv's to issue.
956
:param revision_nodes: The revision index contents for the packs being
957
incorporated into the new pack.
958
:return: As per _least_readv_node_readv.
960
return self._least_readv_node_readv(revision_nodes)
963
class KnitReconcilePacker(KnitPacker):
964
"""A packer which regenerates indices etc as it copies.
966
This is used by ``brz reconcile`` to cause parent text pointers to be
970
def __init__(self, *args, **kwargs):
971
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
972
self._data_changed = False
974
def _process_inventory_lines(self, inv_lines):
975
"""Generate a text key reference map rather for reconciling with."""
976
repo = self._pack_collection.repo
977
refs = repo._serializer._find_text_key_references(inv_lines)
978
self._text_refs = refs
979
# during reconcile we:
980
# - convert unreferenced texts to full texts
981
# - correct texts which reference a text not copied to be full texts
982
# - copy all others as-is but with corrected parents.
983
# - so at this point we don't know enough to decide what becomes a full
985
self._text_filter = None
987
def _copy_text_texts(self):
988
"""generate what texts we should have and then copy."""
989
self.pb.update("Copying content texts", 3)
990
# we have three major tasks here:
991
# 1) generate the ideal index
992
repo = self._pack_collection.repo
993
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
995
self.new_pack.revision_index.iter_all_entries()])
996
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
997
# 2) generate a text_nodes list that contains all the deltas that can
998
# be used as-is, with corrected parents.
1001
discarded_nodes = []
1002
NULL_REVISION = _mod_revision.NULL_REVISION
1003
text_index_map, text_nodes = self._get_text_nodes()
1004
for node in text_nodes:
1010
ideal_parents = tuple(ideal_index[node[1]])
1012
discarded_nodes.append(node)
1013
self._data_changed = True
1015
if ideal_parents == (NULL_REVISION,):
1017
if ideal_parents == node[3][0]:
1019
ok_nodes.append(node)
1020
elif ideal_parents[0:1] == node[3][0][0:1]:
1021
# the left most parent is the same, or there are no parents
1022
# today. Either way, we can preserve the representation as
1023
# long as we change the refs to be inserted.
1024
self._data_changed = True
1025
ok_nodes.append((node[0], node[1], node[2],
1026
(ideal_parents, node[3][1])))
1027
self._data_changed = True
1029
# Reinsert this text completely
1030
bad_texts.append((node[1], ideal_parents))
1031
self._data_changed = True
1032
# we're finished with some data.
1035
# 3) bulk copy the ok data
1036
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1037
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1038
self.new_pack.text_index, readv_group_iter, total_items))
1039
# 4) adhoc copy all the other texts.
1040
# We have to topologically insert all texts otherwise we can fail to
1041
# reconcile when parts of a single delta chain are preserved intact,
1042
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1043
# reinserted, and if d3 has incorrect parents it will also be
1044
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1045
# copied), so we will try to delta, but d2 is not currently able to be
1046
# extracted because its basis d1 is not present. Topologically sorting
1047
# addresses this. The following generates a sort for all the texts that
1048
# are being inserted without having to reference the entire text key
1049
# space (we only topo sort the revisions, which is smaller).
1050
topo_order = tsort.topo_sort(ancestors)
1051
rev_order = dict(zip(topo_order, range(len(topo_order))))
1052
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1053
transaction = repo.get_transaction()
1054
file_id_index = GraphIndexPrefixAdapter(
1055
self.new_pack.text_index,
1057
add_nodes_callback=self.new_pack.text_index.add_nodes)
1058
data_access = _DirectPackAccess(
1059
{self.new_pack.text_index:self.new_pack.access_tuple()})
1060
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1061
self.new_pack.access_tuple())
1062
output_texts = KnitVersionedFiles(
1063
_KnitGraphIndex(self.new_pack.text_index,
1064
add_callback=self.new_pack.text_index.add_nodes,
1065
deltas=True, parents=True, is_locked=repo.is_locked),
1066
data_access=data_access, max_delta_chain=200)
1067
for key, parent_keys in bad_texts:
1068
# We refer to the new pack to delta data being output.
1069
# A possible improvement would be to catch errors on short reads
1070
# and only flush then.
1071
self.new_pack.flush()
1073
for parent_key in parent_keys:
1074
if parent_key[0] != key[0]:
1075
# Graph parents must match the fileid
1076
raise errors.BzrError('Mismatched key parent %r:%r' %
1078
parents.append(parent_key[1])
1079
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1080
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1081
output_texts.add_lines(key, parent_keys, text_lines,
1082
random_id=True, check_content=False)
1083
# 5) check that nothing inserted has a reference outside the keyspace.
1084
missing_text_keys = self.new_pack.text_index._external_references()
1085
if missing_text_keys:
1086
raise errors.BzrCheckError('Reference to missing compression parents %r'
1087
% (missing_text_keys,))
1088
self._log_copied_texts()
1090
def _use_pack(self, new_pack):
1091
"""Override _use_pack to check for reconcile having changed content."""
1092
# XXX: we might be better checking this at the copy time.
1093
original_inventory_keys = set()
1094
inv_index = self._pack_collection.inventory_index.combined_index
1095
for entry in inv_index.iter_all_entries():
1096
original_inventory_keys.add(entry[1])
1097
new_inventory_keys = set()
1098
for entry in new_pack.inventory_index.iter_all_entries():
1099
new_inventory_keys.add(entry[1])
1100
if new_inventory_keys != original_inventory_keys:
1101
self._data_changed = True
1102
return new_pack.data_inserted() and self._data_changed
1105
class OptimisingKnitPacker(KnitPacker):
1106
"""A packer which spends more time to create better disk layouts."""
1108
def _revision_node_readv(self, revision_nodes):
1109
"""Return the total revisions and the readv's to issue.
1111
This sort places revisions in topological order with the ancestors
1114
:param revision_nodes: The revision index contents for the packs being
1115
incorporated into the new pack.
1116
:return: As per _least_readv_node_readv.
1118
# build an ancestors dict
1121
for index, key, value, references in revision_nodes:
1122
ancestors[key] = references[0]
1123
by_key[key] = (index, value, references)
1124
order = tsort.topo_sort(ancestors)
1126
# Single IO is pathological, but it will work as a starting point.
1128
for key in reversed(order):
1129
index, value, references = by_key[key]
1130
# ---- KnitGraphIndex.get_position
1131
bits = value[1:].split(' ')
1132
offset, length = int(bits[0]), int(bits[1])
1134
(index, [(offset, length)], [(key, value[0], references)]))
1135
# TODO: combine requests in the same index that are in ascending order.
1136
return total, requests
1138
def open_pack(self):
1139
"""Open a pack for the pack we are creating."""
1140
new_pack = super(OptimisingKnitPacker, self).open_pack()
1141
# Turn on the optimization flags for all the index builders.
1142
new_pack.revision_index.set_optimize(for_size=True)
1143
new_pack.inventory_index.set_optimize(for_size=True)
1144
new_pack.text_index.set_optimize(for_size=True)
1145
new_pack.signature_index.set_optimize(for_size=True)
1149
class KnitRepositoryPackCollection(RepositoryPackCollection):
1150
"""A knit pack collection."""
1152
pack_factory = NewPack
1153
resumed_pack_factory = ResumedPack
1154
normal_packer_class = KnitPacker
1155
optimising_packer_class = OptimisingKnitPacker