1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from __future__ import absolute_import
21
from ..lazy_import import lazy_import
22
lazy_import(globals(), """
30
revision as _mod_revision,
35
from breezy.bzr import (
42
from breezy.bzr.knit import (
52
from ..bzr.index import (
55
GraphIndexPrefixAdapter,
58
from .knitrepo import (
61
from .pack_repo import (
69
PackRootCommitBuilder,
70
RepositoryPackCollection,
72
from ..sixish import (
76
from ..bzr.vf_repository import (
81
class KnitPackRepository(PackRepository, KnitRepository):
83
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
85
PackRepository.__init__(self, _format, a_bzrdir, control_files,
86
_commit_builder_class, _serializer)
87
if self._format.supports_chks:
88
raise AssertionError("chk not supported")
89
index_transport = self._transport.clone('indices')
90
self._pack_collection = KnitRepositoryPackCollection(self,
93
self._transport.clone('upload'),
94
self._transport.clone('packs'),
95
_format.index_builder_class,
99
self.inventories = KnitVersionedFiles(
100
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
101
add_callback=self._pack_collection.inventory_index.add_callback,
102
deltas=True, parents=True, is_locked=self.is_locked),
103
data_access=self._pack_collection.inventory_index.data_access,
105
self.revisions = KnitVersionedFiles(
106
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
107
add_callback=self._pack_collection.revision_index.add_callback,
108
deltas=False, parents=True, is_locked=self.is_locked,
109
track_external_parent_refs=True),
110
data_access=self._pack_collection.revision_index.data_access,
112
self.signatures = KnitVersionedFiles(
113
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
114
add_callback=self._pack_collection.signature_index.add_callback,
115
deltas=False, parents=False, is_locked=self.is_locked),
116
data_access=self._pack_collection.signature_index.data_access,
118
self.texts = KnitVersionedFiles(
119
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
120
add_callback=self._pack_collection.text_index.add_callback,
121
deltas=True, parents=True, is_locked=self.is_locked),
122
data_access=self._pack_collection.text_index.data_access,
124
self.chk_bytes = None
125
# True when the repository object is 'write locked' (as opposed to the
126
# physical lock only taken out around changes to the pack-names list.)
127
# Another way to represent this would be a decorator around the control
128
# files object that presents logical locks as physical ones - if this
129
# gets ugly consider that alternative design. RBC 20071011
130
self._write_lock_count = 0
131
self._transaction = None
133
self._reconcile_does_inventory_gc = True
134
self._reconcile_fixes_text_parents = True
135
self._reconcile_backsup_inventory = False
137
def _get_source(self, to_format):
138
if to_format.network_name() == self._format.network_name():
139
return KnitPackStreamSource(self, to_format)
140
return PackRepository._get_source(self, to_format)
142
def _reconcile_pack(self, collection, packs, extension, revs, pb):
143
packer = KnitReconcilePacker(collection, packs, extension, revs)
144
return packer.pack(pb)
147
class RepositoryFormatKnitPack1(RepositoryFormatPack):
148
"""A no-subtrees parameterized Pack repository.
150
This format was introduced in 0.92.
153
repository_class = KnitPackRepository
154
_commit_builder_class = PackCommitBuilder
156
def _serializer(self):
157
return xml5.serializer_v5
158
# What index classes to use
159
index_builder_class = InMemoryGraphIndex
160
index_class = GraphIndex
162
def _get_matching_bzrdir(self):
163
return controldir.format_registry.make_bzrdir('pack-0.92')
165
def _ignore_setting_bzrdir(self, format):
168
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
171
def get_format_string(cls):
172
"""See RepositoryFormat.get_format_string()."""
173
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
175
def get_format_description(self):
176
"""See RepositoryFormat.get_format_description()."""
177
return "Packs containing knits without subtree support"
180
class RepositoryFormatKnitPack3(RepositoryFormatPack):
181
"""A subtrees parameterized Pack repository.
183
This repository format uses the xml7 serializer to get:
184
- support for recording full info about the tree root
185
- support for recording tree-references
187
This format was introduced in 0.92.
190
repository_class = KnitPackRepository
191
_commit_builder_class = PackRootCommitBuilder
192
rich_root_data = True
194
supports_tree_reference = True
196
def _serializer(self):
197
return xml7.serializer_v7
198
# What index classes to use
199
index_builder_class = InMemoryGraphIndex
200
index_class = GraphIndex
202
def _get_matching_bzrdir(self):
203
return controldir.format_registry.make_bzrdir(
206
def _ignore_setting_bzrdir(self, format):
209
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
212
def get_format_string(cls):
213
"""See RepositoryFormat.get_format_string()."""
214
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
216
def get_format_description(self):
217
"""See RepositoryFormat.get_format_description()."""
218
return "Packs containing knits with subtree support\n"
221
class RepositoryFormatKnitPack4(RepositoryFormatPack):
222
"""A rich-root, no subtrees parameterized Pack repository.
224
This repository format uses the xml6 serializer to get:
225
- support for recording full info about the tree root
227
This format was introduced in 1.0.
230
repository_class = KnitPackRepository
231
_commit_builder_class = PackRootCommitBuilder
232
rich_root_data = True
233
supports_tree_reference = False
235
def _serializer(self):
236
return xml6.serializer_v6
237
# What index classes to use
238
index_builder_class = InMemoryGraphIndex
239
index_class = GraphIndex
241
def _get_matching_bzrdir(self):
242
return controldir.format_registry.make_bzrdir(
245
def _ignore_setting_bzrdir(self, format):
248
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
251
def get_format_string(cls):
252
"""See RepositoryFormat.get_format_string()."""
253
return ("Bazaar pack repository format 1 with rich root"
254
" (needs bzr 1.0)\n")
256
def get_format_description(self):
257
"""See RepositoryFormat.get_format_description()."""
258
return "Packs containing knits with rich root support\n"
261
class RepositoryFormatKnitPack5(RepositoryFormatPack):
262
"""Repository that supports external references to allow stacking.
266
Supports external lookups, which results in non-truncated ghosts after
267
reconcile compared to pack-0.92 formats.
270
repository_class = KnitPackRepository
271
_commit_builder_class = PackCommitBuilder
272
supports_external_lookups = True
273
# What index classes to use
274
index_builder_class = InMemoryGraphIndex
275
index_class = GraphIndex
278
def _serializer(self):
279
return xml5.serializer_v5
281
def _get_matching_bzrdir(self):
282
return controldir.format_registry.make_bzrdir('1.6')
284
def _ignore_setting_bzrdir(self, format):
287
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
290
def get_format_string(cls):
291
"""See RepositoryFormat.get_format_string()."""
292
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
294
def get_format_description(self):
295
"""See RepositoryFormat.get_format_description()."""
296
return "Packs 5 (adds stacking support, requires bzr 1.6)"
299
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
300
"""A repository with rich roots and stacking.
302
New in release 1.6.1.
304
Supports stacking on other repositories, allowing data to be accessed
305
without being stored locally.
308
repository_class = KnitPackRepository
309
_commit_builder_class = PackRootCommitBuilder
310
rich_root_data = True
311
supports_tree_reference = False # no subtrees
312
supports_external_lookups = True
313
# What index classes to use
314
index_builder_class = InMemoryGraphIndex
315
index_class = GraphIndex
318
def _serializer(self):
319
return xml6.serializer_v6
321
def _get_matching_bzrdir(self):
322
return controldir.format_registry.make_bzrdir(
325
def _ignore_setting_bzrdir(self, format):
328
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
331
def get_format_string(cls):
332
"""See RepositoryFormat.get_format_string()."""
333
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
335
def get_format_description(self):
336
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
339
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
340
"""A repository with rich roots and external references.
344
Supports external lookups, which results in non-truncated ghosts after
345
reconcile compared to pack-0.92 formats.
347
This format was deprecated because the serializer it uses accidentally
348
supported subtrees, when the format was not intended to. This meant that
349
someone could accidentally fetch from an incorrect repository.
352
repository_class = KnitPackRepository
353
_commit_builder_class = PackRootCommitBuilder
354
rich_root_data = True
355
supports_tree_reference = False # no subtrees
357
supports_external_lookups = True
358
# What index classes to use
359
index_builder_class = InMemoryGraphIndex
360
index_class = GraphIndex
363
def _serializer(self):
364
return xml7.serializer_v7
366
def _get_matching_bzrdir(self):
367
matching = controldir.format_registry.make_bzrdir(
369
matching.repository_format = self
372
def _ignore_setting_bzrdir(self, format):
375
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
378
def get_format_string(cls):
379
"""See RepositoryFormat.get_format_string()."""
380
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
382
def get_format_description(self):
383
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
386
def is_deprecated(self):
390
class RepositoryFormatKnitPack6(RepositoryFormatPack):
391
"""A repository with stacking and btree indexes,
392
without rich roots or subtrees.
394
This is equivalent to pack-1.6 with B+Tree indices.
397
repository_class = KnitPackRepository
398
_commit_builder_class = PackCommitBuilder
399
supports_external_lookups = True
400
# What index classes to use
401
index_builder_class = btree_index.BTreeBuilder
402
index_class = btree_index.BTreeGraphIndex
405
def _serializer(self):
406
return xml5.serializer_v5
408
def _get_matching_bzrdir(self):
409
return controldir.format_registry.make_bzrdir('1.9')
411
def _ignore_setting_bzrdir(self, format):
414
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
417
def get_format_string(cls):
418
"""See RepositoryFormat.get_format_string()."""
419
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
421
def get_format_description(self):
422
"""See RepositoryFormat.get_format_description()."""
423
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
426
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
427
"""A repository with rich roots, no subtrees, stacking and btree indexes.
429
1.6-rich-root with B+Tree indices.
432
repository_class = KnitPackRepository
433
_commit_builder_class = PackRootCommitBuilder
434
rich_root_data = True
435
supports_tree_reference = False # no subtrees
436
supports_external_lookups = True
437
# What index classes to use
438
index_builder_class = btree_index.BTreeBuilder
439
index_class = btree_index.BTreeGraphIndex
442
def _serializer(self):
443
return xml6.serializer_v6
445
def _get_matching_bzrdir(self):
446
return controldir.format_registry.make_bzrdir(
449
def _ignore_setting_bzrdir(self, format):
452
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
455
def get_format_string(cls):
456
"""See RepositoryFormat.get_format_string()."""
457
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
459
def get_format_description(self):
460
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
463
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
464
"""A subtrees development repository.
466
This format should be retained in 2.3, to provide an upgrade path from this
467
to RepositoryFormat2aSubtree. It can be removed in later releases.
469
1.6.1-subtree[as it might have been] with B+Tree indices.
472
repository_class = KnitPackRepository
473
_commit_builder_class = PackRootCommitBuilder
474
rich_root_data = True
476
supports_tree_reference = True
477
supports_external_lookups = True
478
# What index classes to use
479
index_builder_class = btree_index.BTreeBuilder
480
index_class = btree_index.BTreeGraphIndex
483
def _serializer(self):
484
return xml7.serializer_v7
486
def _get_matching_bzrdir(self):
487
return controldir.format_registry.make_bzrdir(
488
'development5-subtree')
490
def _ignore_setting_bzrdir(self, format):
493
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
496
def get_format_string(cls):
497
"""See RepositoryFormat.get_format_string()."""
498
return ("Bazaar development format 2 with subtree support "
499
"(needs bzr.dev from before 1.8)\n")
501
def get_format_description(self):
502
"""See RepositoryFormat.get_format_description()."""
503
return ("Development repository format, currently the same as "
504
"1.6.1-subtree with B+Tree indices.\n")
507
class KnitPackStreamSource(StreamSource):
508
"""A StreamSource used to transfer data between same-format KnitPack repos.
511
1) Same serialization format for all objects
512
2) Same root information
513
3) XML format inventories
514
4) Atomic inserts (so we can stream inventory texts before text
519
def __init__(self, from_repository, to_format):
520
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
521
self._text_keys = None
522
self._text_fetch_order = 'unordered'
524
def _get_filtered_inv_stream(self, revision_ids):
525
from_repo = self.from_repository
526
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
527
parent_keys = [(p,) for p in parent_ids]
528
find_text_keys = from_repo._serializer._find_text_key_references
529
parent_text_keys = set(find_text_keys(
530
from_repo._inventory_xml_lines_for_keys(parent_keys)))
531
content_text_keys = set()
532
knit = KnitVersionedFiles(None, None)
533
factory = KnitPlainFactory()
534
def find_text_keys_from_content(record):
535
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
536
raise ValueError("Unknown content storage kind for"
537
" inventory text: %s" % (record.storage_kind,))
538
# It's a knit record, it has a _raw_record field (even if it was
539
# reconstituted from a network stream).
540
raw_data = record._raw_record
541
# read the entire thing
542
revision_id = record.key[-1]
543
content, _ = knit._parse_record(revision_id, raw_data)
544
if record.storage_kind == 'knit-delta-gz':
545
line_iterator = factory.get_linedelta_content(content)
546
elif record.storage_kind == 'knit-ft-gz':
547
line_iterator = factory.get_fulltext_content(content)
548
content_text_keys.update(find_text_keys(
549
[(line, revision_id) for line in line_iterator]))
550
revision_keys = [(r,) for r in revision_ids]
551
def _filtered_inv_stream():
552
source_vf = from_repo.inventories
553
stream = source_vf.get_record_stream(revision_keys,
555
for record in stream:
556
if record.storage_kind == 'absent':
557
raise errors.NoSuchRevision(from_repo, record.key)
558
find_text_keys_from_content(record)
560
self._text_keys = content_text_keys - parent_text_keys
561
return ('inventories', _filtered_inv_stream())
563
def _get_text_stream(self):
564
# Note: We know we don't have to handle adding root keys, because both
565
# the source and target are the identical network name.
566
text_stream = self.from_repository.texts.get_record_stream(
567
self._text_keys, self._text_fetch_order, False)
568
return ('texts', text_stream)
570
def get_stream(self, search):
571
revision_ids = search.get_keys()
572
for stream_info in self._fetch_revision_texts(revision_ids):
574
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
575
yield self._get_filtered_inv_stream(revision_ids)
576
yield self._get_text_stream()
579
class KnitPacker(Packer):
580
"""Packer that works with knit packs."""
582
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
584
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
585
revision_ids=revision_ids,
586
reload_func=reload_func)
588
def _pack_map_and_index_list(self, index_attribute):
589
"""Convert a list of packs to an index pack map and index list.
591
:param index_attribute: The attribute that the desired index is found
593
:return: A tuple (map, list) where map contains the dict from
594
index:pack_tuple, and list contains the indices in the preferred
599
for pack_obj in self.packs:
600
index = getattr(pack_obj, index_attribute)
601
indices.append(index)
602
pack_map[index] = pack_obj
603
return pack_map, indices
605
def _index_contents(self, indices, key_filter=None):
606
"""Get an iterable of the index contents from a pack_map.
608
:param indices: The list of indices to query
609
:param key_filter: An optional filter to limit the keys returned.
611
all_index = CombinedGraphIndex(indices)
612
if key_filter is None:
613
return all_index.iter_all_entries()
615
return all_index.iter_entries(key_filter)
617
def _copy_nodes(self, nodes, index_map, writer, write_index,
619
"""Copy knit nodes between packs with no graph references.
621
:param output_lines: Output full texts of copied items.
623
pb = ui.ui_factory.nested_progress_bar()
625
return self._do_copy_nodes(nodes, index_map, writer,
626
write_index, pb, output_lines=output_lines)
630
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
632
# for record verification
633
knit = KnitVersionedFiles(None, None)
634
# plan a readv on each source pack:
636
nodes = sorted(nodes)
637
# how to map this into knit.py - or knit.py into this?
638
# we don't want the typical knit logic, we want grouping by pack
639
# at this point - perhaps a helper library for the following code
640
# duplication points?
642
for index, key, value in nodes:
643
if index not in request_groups:
644
request_groups[index] = []
645
request_groups[index].append((key, value))
647
pb.update("Copied record", record_index, len(nodes))
648
for index, items in viewitems(request_groups):
649
pack_readv_requests = []
650
for key, value in items:
651
# ---- KnitGraphIndex.get_position
652
bits = value[1:].split(' ')
653
offset, length = int(bits[0]), int(bits[1])
654
pack_readv_requests.append((offset, length, (key, value[0])))
655
# linear scan up the pack
656
pack_readv_requests.sort()
658
pack_obj = index_map[index]
659
transport, path = pack_obj.access_tuple()
661
reader = pack.make_readv_reader(transport, path,
662
[offset[0:2] for offset in pack_readv_requests])
663
except errors.NoSuchFile:
664
if self._reload_func is not None:
667
for (names, read_func), (_1, _2, (key, eol_flag)) in zip(
668
reader.iter_records(), pack_readv_requests):
669
raw_data = read_func(None)
670
# check the header only
671
if output_lines is not None:
672
output_lines(knit._parse_record(key[-1], raw_data)[0])
674
df, _ = knit._parse_record_header(key, raw_data)
676
pos, size = writer.add_bytes_record(raw_data, names)
677
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
678
pb.update("Copied record", record_index)
681
def _copy_nodes_graph(self, index_map, writer, write_index,
682
readv_group_iter, total_items, output_lines=False):
683
"""Copy knit nodes between packs.
685
:param output_lines: Return lines present in the copied data as
686
an iterator of line,version_id.
688
pb = ui.ui_factory.nested_progress_bar()
690
for result in self._do_copy_nodes_graph(index_map, writer,
691
write_index, output_lines, pb, readv_group_iter, total_items):
694
# Python 2.4 does not permit try:finally: in a generator.
700
def _do_copy_nodes_graph(self, index_map, writer, write_index,
701
output_lines, pb, readv_group_iter, total_items):
702
# for record verification
703
knit = KnitVersionedFiles(None, None)
704
# for line extraction when requested (inventories only)
706
factory = KnitPlainFactory()
708
pb.update("Copied record", record_index, total_items)
709
for index, readv_vector, node_vector in readv_group_iter:
711
pack_obj = index_map[index]
712
transport, path = pack_obj.access_tuple()
714
reader = pack.make_readv_reader(transport, path, readv_vector)
715
except errors.NoSuchFile:
716
if self._reload_func is not None:
719
for (names, read_func), (key, eol_flag, references) in zip(
720
reader.iter_records(), node_vector):
721
raw_data = read_func(None)
723
# read the entire thing
724
content, _ = knit._parse_record(key[-1], raw_data)
725
if len(references[-1]) == 0:
726
line_iterator = factory.get_fulltext_content(content)
728
line_iterator = factory.get_linedelta_content(content)
729
for line in line_iterator:
732
# check the header only
733
df, _ = knit._parse_record_header(key, raw_data)
735
pos, size = writer.add_bytes_record(raw_data, names)
736
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
737
pb.update("Copied record", record_index)
740
def _process_inventory_lines(self, inv_lines):
741
"""Use up the inv_lines generator and setup a text key filter."""
742
repo = self._pack_collection.repo
743
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
744
inv_lines, self.revision_keys)
746
for fileid, file_revids in viewitems(fileid_revisions):
747
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
748
self._text_filter = text_filter
750
def _copy_inventory_texts(self):
751
# select inventory keys
752
inv_keys = self._revision_keys # currently the same keyspace, and note that
753
# querying for keys here could introduce a bug where an inventory item
754
# is missed, so do not change it to query separately without cross
755
# checking like the text key check below.
756
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
758
inv_nodes = self._index_contents(inventory_indices, inv_keys)
759
# copy inventory keys and adjust values
760
# XXX: Should be a helper function to allow different inv representation
762
self.pb.update("Copying inventory texts", 2)
763
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
764
# Only grab the output lines if we will be processing them
765
output_lines = bool(self.revision_ids)
766
inv_lines = self._copy_nodes_graph(inventory_index_map,
767
self.new_pack._writer, self.new_pack.inventory_index,
768
readv_group_iter, total_items, output_lines=output_lines)
769
if self.revision_ids:
770
self._process_inventory_lines(inv_lines)
772
# eat the iterator to cause it to execute.
774
self._text_filter = None
775
if 'pack' in debug.debug_flags:
776
trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
777
time.ctime(), self._pack_collection._upload_transport.base,
778
self.new_pack.random_name,
779
self.new_pack.inventory_index.key_count(),
780
time.time() - self.new_pack.start_time)
782
def _update_pack_order(self, entries, index_to_pack_map):
783
"""Determine how we want our packs to be ordered.
785
This changes the sort order of the self.packs list so that packs unused
786
by 'entries' will be at the end of the list, so that future requests
787
can avoid probing them. Used packs will be at the front of the
788
self.packs list, in the order of their first use in 'entries'.
790
:param entries: A list of (index, ...) tuples
791
:param index_to_pack_map: A mapping from index objects to pack objects.
795
for entry in entries:
797
if index not in seen_indexes:
798
packs.append(index_to_pack_map[index])
799
seen_indexes.add(index)
800
if len(packs) == len(self.packs):
801
if 'pack' in debug.debug_flags:
802
trace.mutter('Not changing pack list, all packs used.')
804
seen_packs = set(packs)
805
for pack in self.packs:
806
if pack not in seen_packs:
809
if 'pack' in debug.debug_flags:
810
old_names = [p.access_tuple()[1] for p in self.packs]
811
new_names = [p.access_tuple()[1] for p in packs]
812
trace.mutter('Reordering packs\nfrom: %s\n to: %s',
813
old_names, new_names)
816
def _copy_revision_texts(self):
818
if self.revision_ids:
819
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
822
# select revision keys
823
revision_index_map, revision_indices = self._pack_map_and_index_list(
825
revision_nodes = self._index_contents(revision_indices, revision_keys)
826
revision_nodes = list(revision_nodes)
827
self._update_pack_order(revision_nodes, revision_index_map)
828
# copy revision keys and adjust values
829
self.pb.update("Copying revision texts", 1)
830
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
831
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
832
self.new_pack.revision_index, readv_group_iter, total_items))
833
if 'pack' in debug.debug_flags:
834
trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
835
time.ctime(), self._pack_collection._upload_transport.base,
836
self.new_pack.random_name,
837
self.new_pack.revision_index.key_count(),
838
time.time() - self.new_pack.start_time)
839
self._revision_keys = revision_keys
841
def _get_text_nodes(self):
842
text_index_map, text_indices = self._pack_map_and_index_list(
844
return text_index_map, self._index_contents(text_indices,
847
def _copy_text_texts(self):
849
text_index_map, text_nodes = self._get_text_nodes()
850
if self._text_filter is not None:
851
# We could return the keys copied as part of the return value from
852
# _copy_nodes_graph but this doesn't work all that well with the
853
# need to get line output too, so we check separately, and as we're
854
# going to buffer everything anyway, we check beforehand, which
855
# saves reading knit data over the wire when we know there are
857
text_nodes = set(text_nodes)
858
present_text_keys = set(_node[1] for _node in text_nodes)
859
missing_text_keys = set(self._text_filter) - present_text_keys
860
if missing_text_keys:
861
# TODO: raise a specific error that can handle many missing
863
trace.mutter("missing keys during fetch: %r", missing_text_keys)
864
a_missing_key = missing_text_keys.pop()
865
raise errors.RevisionNotPresent(a_missing_key[1],
867
# copy text keys and adjust values
868
self.pb.update("Copying content texts", 3)
869
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
870
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
871
self.new_pack.text_index, readv_group_iter, total_items))
872
self._log_copied_texts()
874
def _create_pack_from_packs(self):
875
self.pb.update("Opening pack", 0, 5)
876
self.new_pack = self.open_pack()
877
new_pack = self.new_pack
878
# buffer data - we won't be reading-back during the pack creation and
879
# this makes a significant difference on sftp pushes.
880
new_pack.set_write_cache_size(1024*1024)
881
if 'pack' in debug.debug_flags:
882
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
883
for a_pack in self.packs]
884
if self.revision_ids is not None:
885
rev_count = len(self.revision_ids)
888
trace.mutter('%s: create_pack: creating pack from source packs: '
889
'%s%s %s revisions wanted %s t=0',
890
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
891
plain_pack_list, rev_count)
892
self._copy_revision_texts()
893
self._copy_inventory_texts()
894
self._copy_text_texts()
895
# select signature keys
896
signature_filter = self._revision_keys # same keyspace
897
signature_index_map, signature_indices = self._pack_map_and_index_list(
899
signature_nodes = self._index_contents(signature_indices,
901
# copy signature keys and adjust values
902
self.pb.update("Copying signature texts", 4)
903
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
904
new_pack.signature_index)
905
if 'pack' in debug.debug_flags:
906
trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
907
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
908
new_pack.signature_index.key_count(),
909
time.time() - new_pack.start_time)
910
new_pack._check_references()
911
if not self._use_pack(new_pack):
914
self.pb.update("Finishing pack", 5)
916
self._pack_collection.allocate(new_pack)
919
def _least_readv_node_readv(self, nodes):
920
"""Generate request groups for nodes using the least readv's.
922
:param nodes: An iterable of graph index nodes.
923
:return: Total node count and an iterator of the data needed to perform
924
readvs to obtain the data for nodes. Each item yielded by the
925
iterator is a tuple with:
926
index, readv_vector, node_vector. readv_vector is a list ready to
927
hand to the transport readv method, and node_vector is a list of
928
(key, eol_flag, references) for the node retrieved by the
929
matching readv_vector.
931
# group by pack so we do one readv per pack
932
nodes = sorted(nodes)
935
for index, key, value, references in nodes:
936
if index not in request_groups:
937
request_groups[index] = []
938
request_groups[index].append((key, value, references))
940
for index, items in viewitems(request_groups):
941
pack_readv_requests = []
942
for key, value, references in items:
943
# ---- KnitGraphIndex.get_position
944
bits = value[1:].split(' ')
945
offset, length = int(bits[0]), int(bits[1])
946
pack_readv_requests.append(
947
((offset, length), (key, value[0], references)))
948
# linear scan up the pack to maximum range combining.
949
pack_readv_requests.sort()
950
# split out the readv and the node data.
951
pack_readv = [readv for readv, node in pack_readv_requests]
952
node_vector = [node for readv, node in pack_readv_requests]
953
result.append((index, pack_readv, node_vector))
956
def _revision_node_readv(self, revision_nodes):
957
"""Return the total revisions and the readv's to issue.
959
:param revision_nodes: The revision index contents for the packs being
960
incorporated into the new pack.
961
:return: As per _least_readv_node_readv.
963
return self._least_readv_node_readv(revision_nodes)
966
class KnitReconcilePacker(KnitPacker):
967
"""A packer which regenerates indices etc as it copies.
969
This is used by ``brz reconcile`` to cause parent text pointers to be
973
def __init__(self, *args, **kwargs):
974
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
975
self._data_changed = False
977
def _process_inventory_lines(self, inv_lines):
978
"""Generate a text key reference map rather for reconciling with."""
979
repo = self._pack_collection.repo
980
refs = repo._serializer._find_text_key_references(inv_lines)
981
self._text_refs = refs
982
# during reconcile we:
983
# - convert unreferenced texts to full texts
984
# - correct texts which reference a text not copied to be full texts
985
# - copy all others as-is but with corrected parents.
986
# - so at this point we don't know enough to decide what becomes a full
988
self._text_filter = None
990
def _copy_text_texts(self):
991
"""generate what texts we should have and then copy."""
992
self.pb.update("Copying content texts", 3)
993
# we have three major tasks here:
994
# 1) generate the ideal index
995
repo = self._pack_collection.repo
996
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
998
self.new_pack.revision_index.iter_all_entries()])
999
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
1000
# 2) generate a text_nodes list that contains all the deltas that can
1001
# be used as-is, with corrected parents.
1004
discarded_nodes = []
1005
NULL_REVISION = _mod_revision.NULL_REVISION
1006
text_index_map, text_nodes = self._get_text_nodes()
1007
for node in text_nodes:
1013
ideal_parents = tuple(ideal_index[node[1]])
1015
discarded_nodes.append(node)
1016
self._data_changed = True
1018
if ideal_parents == (NULL_REVISION,):
1020
if ideal_parents == node[3][0]:
1022
ok_nodes.append(node)
1023
elif ideal_parents[0:1] == node[3][0][0:1]:
1024
# the left most parent is the same, or there are no parents
1025
# today. Either way, we can preserve the representation as
1026
# long as we change the refs to be inserted.
1027
self._data_changed = True
1028
ok_nodes.append((node[0], node[1], node[2],
1029
(ideal_parents, node[3][1])))
1030
self._data_changed = True
1032
# Reinsert this text completely
1033
bad_texts.append((node[1], ideal_parents))
1034
self._data_changed = True
1035
# we're finished with some data.
1038
# 3) bulk copy the ok data
1039
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1040
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1041
self.new_pack.text_index, readv_group_iter, total_items))
1042
# 4) adhoc copy all the other texts.
1043
# We have to topologically insert all texts otherwise we can fail to
1044
# reconcile when parts of a single delta chain are preserved intact,
1045
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1046
# reinserted, and if d3 has incorrect parents it will also be
1047
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1048
# copied), so we will try to delta, but d2 is not currently able to be
1049
# extracted because its basis d1 is not present. Topologically sorting
1050
# addresses this. The following generates a sort for all the texts that
1051
# are being inserted without having to reference the entire text key
1052
# space (we only topo sort the revisions, which is smaller).
1053
topo_order = tsort.topo_sort(ancestors)
1054
rev_order = dict(zip(topo_order, range(len(topo_order))))
1055
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1056
transaction = repo.get_transaction()
1057
file_id_index = GraphIndexPrefixAdapter(
1058
self.new_pack.text_index,
1060
add_nodes_callback=self.new_pack.text_index.add_nodes)
1061
data_access = _DirectPackAccess(
1062
{self.new_pack.text_index:self.new_pack.access_tuple()})
1063
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1064
self.new_pack.access_tuple())
1065
output_texts = KnitVersionedFiles(
1066
_KnitGraphIndex(self.new_pack.text_index,
1067
add_callback=self.new_pack.text_index.add_nodes,
1068
deltas=True, parents=True, is_locked=repo.is_locked),
1069
data_access=data_access, max_delta_chain=200)
1070
for key, parent_keys in bad_texts:
1071
# We refer to the new pack to delta data being output.
1072
# A possible improvement would be to catch errors on short reads
1073
# and only flush then.
1074
self.new_pack.flush()
1076
for parent_key in parent_keys:
1077
if parent_key[0] != key[0]:
1078
# Graph parents must match the fileid
1079
raise errors.BzrError('Mismatched key parent %r:%r' %
1081
parents.append(parent_key[1])
1082
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1083
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1084
output_texts.add_lines(key, parent_keys, text_lines,
1085
random_id=True, check_content=False)
1086
# 5) check that nothing inserted has a reference outside the keyspace.
1087
missing_text_keys = self.new_pack.text_index._external_references()
1088
if missing_text_keys:
1089
raise errors.BzrCheckError('Reference to missing compression parents %r'
1090
% (missing_text_keys,))
1091
self._log_copied_texts()
1093
def _use_pack(self, new_pack):
1094
"""Override _use_pack to check for reconcile having changed content."""
1095
# XXX: we might be better checking this at the copy time.
1096
original_inventory_keys = set()
1097
inv_index = self._pack_collection.inventory_index.combined_index
1098
for entry in inv_index.iter_all_entries():
1099
original_inventory_keys.add(entry[1])
1100
new_inventory_keys = set()
1101
for entry in new_pack.inventory_index.iter_all_entries():
1102
new_inventory_keys.add(entry[1])
1103
if new_inventory_keys != original_inventory_keys:
1104
self._data_changed = True
1105
return new_pack.data_inserted() and self._data_changed
1108
class OptimisingKnitPacker(KnitPacker):
1109
"""A packer which spends more time to create better disk layouts."""
1111
def _revision_node_readv(self, revision_nodes):
1112
"""Return the total revisions and the readv's to issue.
1114
This sort places revisions in topological order with the ancestors
1117
:param revision_nodes: The revision index contents for the packs being
1118
incorporated into the new pack.
1119
:return: As per _least_readv_node_readv.
1121
# build an ancestors dict
1124
for index, key, value, references in revision_nodes:
1125
ancestors[key] = references[0]
1126
by_key[key] = (index, value, references)
1127
order = tsort.topo_sort(ancestors)
1129
# Single IO is pathological, but it will work as a starting point.
1131
for key in reversed(order):
1132
index, value, references = by_key[key]
1133
# ---- KnitGraphIndex.get_position
1134
bits = value[1:].split(' ')
1135
offset, length = int(bits[0]), int(bits[1])
1137
(index, [(offset, length)], [(key, value[0], references)]))
1138
# TODO: combine requests in the same index that are in ascending order.
1139
return total, requests
1141
def open_pack(self):
1142
"""Open a pack for the pack we are creating."""
1143
new_pack = super(OptimisingKnitPacker, self).open_pack()
1144
# Turn on the optimization flags for all the index builders.
1145
new_pack.revision_index.set_optimize(for_size=True)
1146
new_pack.inventory_index.set_optimize(for_size=True)
1147
new_pack.text_index.set_optimize(for_size=True)
1148
new_pack.signature_index.set_optimize(for_size=True)
1152
class KnitRepositoryPackCollection(RepositoryPackCollection):
1153
"""A knit pack collection."""
1155
pack_factory = NewPack
1156
resumed_pack_factory = ResumedPack
1157
normal_packer_class = KnitPacker
1158
optimising_packer_class = OptimisingKnitPacker