1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from __future__ import absolute_import
21
from ..lazy_import import lazy_import
22
lazy_import(globals(), """
30
revision as _mod_revision,
35
from breezy.bzr import (
42
from breezy.bzr.knit import (
52
from ..bzr.index import (
55
GraphIndexPrefixAdapter,
58
from .knitrepo import (
61
from .pack_repo import (
69
PackRootCommitBuilder,
70
RepositoryPackCollection,
72
from ..sixish import (
76
from ..bzr.vf_repository import (
81
class KnitPackRepository(PackRepository, KnitRepository):
83
def __init__(self, _format, a_controldir, control_files, _commit_builder_class,
85
PackRepository.__init__(self, _format, a_controldir, control_files,
86
_commit_builder_class, _serializer)
87
if self._format.supports_chks:
88
raise AssertionError("chk not supported")
89
index_transport = self._transport.clone('indices')
90
self._pack_collection = KnitRepositoryPackCollection(self,
93
self._transport.clone('upload'),
94
self._transport.clone('packs'),
95
_format.index_builder_class,
99
self.inventories = KnitVersionedFiles(
100
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
101
add_callback=self._pack_collection.inventory_index.add_callback,
102
deltas=True, parents=True, is_locked=self.is_locked),
103
data_access=self._pack_collection.inventory_index.data_access,
105
self.revisions = KnitVersionedFiles(
106
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
107
add_callback=self._pack_collection.revision_index.add_callback,
108
deltas=False, parents=True, is_locked=self.is_locked,
109
track_external_parent_refs=True),
110
data_access=self._pack_collection.revision_index.data_access,
112
self.signatures = KnitVersionedFiles(
113
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
114
add_callback=self._pack_collection.signature_index.add_callback,
115
deltas=False, parents=False, is_locked=self.is_locked),
116
data_access=self._pack_collection.signature_index.data_access,
118
self.texts = KnitVersionedFiles(
119
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
120
add_callback=self._pack_collection.text_index.add_callback,
121
deltas=True, parents=True, is_locked=self.is_locked),
122
data_access=self._pack_collection.text_index.data_access,
124
self.chk_bytes = None
125
# True when the repository object is 'write locked' (as opposed to the
126
# physical lock only taken out around changes to the pack-names list.)
127
# Another way to represent this would be a decorator around the control
128
# files object that presents logical locks as physical ones - if this
129
# gets ugly consider that alternative design. RBC 20071011
130
self._write_lock_count = 0
131
self._transaction = None
133
self._reconcile_does_inventory_gc = True
134
self._reconcile_fixes_text_parents = True
135
self._reconcile_backsup_inventory = False
137
def _get_source(self, to_format):
138
if to_format.network_name() == self._format.network_name():
139
return KnitPackStreamSource(self, to_format)
140
return PackRepository._get_source(self, to_format)
142
def _reconcile_pack(self, collection, packs, extension, revs, pb):
143
packer = KnitReconcilePacker(collection, packs, extension, revs)
144
return packer.pack(pb)
147
class RepositoryFormatKnitPack1(RepositoryFormatPack):
148
"""A no-subtrees parameterized Pack repository.
150
This format was introduced in 0.92.
153
repository_class = KnitPackRepository
154
_commit_builder_class = PackCommitBuilder
156
def _serializer(self):
157
return xml5.serializer_v5
158
# What index classes to use
159
index_builder_class = InMemoryGraphIndex
160
index_class = GraphIndex
162
def _get_matching_bzrdir(self):
163
return controldir.format_registry.make_controldir('pack-0.92')
165
def _ignore_setting_bzrdir(self, format):
168
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
171
def get_format_string(cls):
172
"""See RepositoryFormat.get_format_string()."""
173
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
175
def get_format_description(self):
176
"""See RepositoryFormat.get_format_description()."""
177
return "Packs containing knits without subtree support"
180
class RepositoryFormatKnitPack3(RepositoryFormatPack):
181
"""A subtrees parameterized Pack repository.
183
This repository format uses the xml7 serializer to get:
184
- support for recording full info about the tree root
185
- support for recording tree-references
187
This format was introduced in 0.92.
190
repository_class = KnitPackRepository
191
_commit_builder_class = PackRootCommitBuilder
192
rich_root_data = True
194
supports_tree_reference = True
196
def _serializer(self):
197
return xml7.serializer_v7
198
# What index classes to use
199
index_builder_class = InMemoryGraphIndex
200
index_class = GraphIndex
202
def _get_matching_bzrdir(self):
203
return controldir.format_registry.make_controldir(
206
def _ignore_setting_bzrdir(self, format):
209
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
212
def get_format_string(cls):
213
"""See RepositoryFormat.get_format_string()."""
214
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
216
def get_format_description(self):
217
"""See RepositoryFormat.get_format_description()."""
218
return "Packs containing knits with subtree support\n"
221
class RepositoryFormatKnitPack4(RepositoryFormatPack):
222
"""A rich-root, no subtrees parameterized Pack repository.
224
This repository format uses the xml6 serializer to get:
225
- support for recording full info about the tree root
227
This format was introduced in 1.0.
230
repository_class = KnitPackRepository
231
_commit_builder_class = PackRootCommitBuilder
232
rich_root_data = True
233
supports_tree_reference = False
235
def _serializer(self):
236
return xml6.serializer_v6
237
# What index classes to use
238
index_builder_class = InMemoryGraphIndex
239
index_class = GraphIndex
241
def _get_matching_bzrdir(self):
242
return controldir.format_registry.make_controldir(
245
def _ignore_setting_bzrdir(self, format):
248
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
251
def get_format_string(cls):
252
"""See RepositoryFormat.get_format_string()."""
253
return ("Bazaar pack repository format 1 with rich root"
254
" (needs bzr 1.0)\n")
256
def get_format_description(self):
257
"""See RepositoryFormat.get_format_description()."""
258
return "Packs containing knits with rich root support\n"
261
class RepositoryFormatKnitPack5(RepositoryFormatPack):
262
"""Repository that supports external references to allow stacking.
266
Supports external lookups, which results in non-truncated ghosts after
267
reconcile compared to pack-0.92 formats.
270
repository_class = KnitPackRepository
271
_commit_builder_class = PackCommitBuilder
272
supports_external_lookups = True
273
# What index classes to use
274
index_builder_class = InMemoryGraphIndex
275
index_class = GraphIndex
278
def _serializer(self):
279
return xml5.serializer_v5
281
def _get_matching_bzrdir(self):
282
return controldir.format_registry.make_controldir('1.6')
284
def _ignore_setting_bzrdir(self, format):
287
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
290
def get_format_string(cls):
291
"""See RepositoryFormat.get_format_string()."""
292
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
294
def get_format_description(self):
295
"""See RepositoryFormat.get_format_description()."""
296
return "Packs 5 (adds stacking support, requires bzr 1.6)"
299
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
300
"""A repository with rich roots and stacking.
302
New in release 1.6.1.
304
Supports stacking on other repositories, allowing data to be accessed
305
without being stored locally.
308
repository_class = KnitPackRepository
309
_commit_builder_class = PackRootCommitBuilder
310
rich_root_data = True
311
supports_tree_reference = False # no subtrees
312
supports_external_lookups = True
313
# What index classes to use
314
index_builder_class = InMemoryGraphIndex
315
index_class = GraphIndex
318
def _serializer(self):
319
return xml6.serializer_v6
321
def _get_matching_bzrdir(self):
322
return controldir.format_registry.make_controldir(
325
def _ignore_setting_bzrdir(self, format):
328
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
331
def get_format_string(cls):
332
"""See RepositoryFormat.get_format_string()."""
333
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
335
def get_format_description(self):
336
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
339
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
340
"""A repository with rich roots and external references.
344
Supports external lookups, which results in non-truncated ghosts after
345
reconcile compared to pack-0.92 formats.
347
This format was deprecated because the serializer it uses accidentally
348
supported subtrees, when the format was not intended to. This meant that
349
someone could accidentally fetch from an incorrect repository.
352
repository_class = KnitPackRepository
353
_commit_builder_class = PackRootCommitBuilder
354
rich_root_data = True
355
supports_tree_reference = False # no subtrees
357
supports_external_lookups = True
358
# What index classes to use
359
index_builder_class = InMemoryGraphIndex
360
index_class = GraphIndex
363
def _serializer(self):
364
return xml7.serializer_v7
366
def _get_matching_bzrdir(self):
367
matching = controldir.format_registry.make_controldir(
369
matching.repository_format = self
372
def _ignore_setting_bzrdir(self, format):
375
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
378
def get_format_string(cls):
379
"""See RepositoryFormat.get_format_string()."""
380
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
382
def get_format_description(self):
383
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
386
def is_deprecated(self):
390
class RepositoryFormatKnitPack6(RepositoryFormatPack):
391
"""A repository with stacking and btree indexes,
392
without rich roots or subtrees.
394
This is equivalent to pack-1.6 with B+Tree indices.
397
repository_class = KnitPackRepository
398
_commit_builder_class = PackCommitBuilder
399
supports_external_lookups = True
400
# What index classes to use
401
index_builder_class = btree_index.BTreeBuilder
402
index_class = btree_index.BTreeGraphIndex
405
def _serializer(self):
406
return xml5.serializer_v5
408
def _get_matching_bzrdir(self):
409
return controldir.format_registry.make_controldir('1.9')
411
def _ignore_setting_bzrdir(self, format):
414
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
417
def get_format_string(cls):
418
"""See RepositoryFormat.get_format_string()."""
419
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
421
def get_format_description(self):
422
"""See RepositoryFormat.get_format_description()."""
423
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
426
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
427
"""A repository with rich roots, no subtrees, stacking and btree indexes.
429
1.6-rich-root with B+Tree indices.
432
repository_class = KnitPackRepository
433
_commit_builder_class = PackRootCommitBuilder
434
rich_root_data = True
435
supports_tree_reference = False # no subtrees
436
supports_external_lookups = True
437
# What index classes to use
438
index_builder_class = btree_index.BTreeBuilder
439
index_class = btree_index.BTreeGraphIndex
442
def _serializer(self):
443
return xml6.serializer_v6
445
def _get_matching_bzrdir(self):
446
return controldir.format_registry.make_controldir(
449
def _ignore_setting_bzrdir(self, format):
452
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
455
def get_format_string(cls):
456
"""See RepositoryFormat.get_format_string()."""
457
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
459
def get_format_description(self):
460
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
463
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
464
"""A subtrees development repository.
466
This format should be retained in 2.3, to provide an upgrade path from this
467
to RepositoryFormat2aSubtree. It can be removed in later releases.
469
1.6.1-subtree[as it might have been] with B+Tree indices.
472
repository_class = KnitPackRepository
473
_commit_builder_class = PackRootCommitBuilder
474
rich_root_data = True
476
supports_tree_reference = True
477
supports_external_lookups = True
478
# What index classes to use
479
index_builder_class = btree_index.BTreeBuilder
480
index_class = btree_index.BTreeGraphIndex
483
def _serializer(self):
484
return xml7.serializer_v7
486
def _get_matching_bzrdir(self):
487
return controldir.format_registry.make_controldir(
488
'development5-subtree')
490
def _ignore_setting_bzrdir(self, format):
493
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
496
def get_format_string(cls):
497
"""See RepositoryFormat.get_format_string()."""
498
return ("Bazaar development format 2 with subtree support "
499
"(needs bzr.dev from before 1.8)\n")
501
def get_format_description(self):
502
"""See RepositoryFormat.get_format_description()."""
503
return ("Development repository format, currently the same as "
504
"1.6.1-subtree with B+Tree indices.\n")
507
class KnitPackStreamSource(StreamSource):
508
"""A StreamSource used to transfer data between same-format KnitPack repos.
511
1) Same serialization format for all objects
512
2) Same root information
513
3) XML format inventories
514
4) Atomic inserts (so we can stream inventory texts before text
519
def __init__(self, from_repository, to_format):
520
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
521
self._text_keys = None
522
self._text_fetch_order = 'unordered'
524
def _get_filtered_inv_stream(self, revision_ids):
525
from_repo = self.from_repository
526
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
527
parent_keys = [(p,) for p in parent_ids]
528
find_text_keys = from_repo._serializer._find_text_key_references
529
parent_text_keys = set(find_text_keys(
530
from_repo._inventory_xml_lines_for_keys(parent_keys)))
531
content_text_keys = set()
532
knit = KnitVersionedFiles(None, None)
533
factory = KnitPlainFactory()
534
def find_text_keys_from_content(record):
535
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
536
raise ValueError("Unknown content storage kind for"
537
" inventory text: %s" % (record.storage_kind,))
538
# It's a knit record, it has a _raw_record field (even if it was
539
# reconstituted from a network stream).
540
raw_data = record._raw_record
541
# read the entire thing
542
revision_id = record.key[-1]
543
content, _ = knit._parse_record(revision_id, raw_data)
544
if record.storage_kind == 'knit-delta-gz':
545
line_iterator = factory.get_linedelta_content(content)
546
elif record.storage_kind == 'knit-ft-gz':
547
line_iterator = factory.get_fulltext_content(content)
548
content_text_keys.update(find_text_keys(
549
[(line, revision_id) for line in line_iterator]))
550
revision_keys = [(r,) for r in revision_ids]
551
def _filtered_inv_stream():
552
source_vf = from_repo.inventories
553
stream = source_vf.get_record_stream(revision_keys,
555
for record in stream:
556
if record.storage_kind == 'absent':
557
raise errors.NoSuchRevision(from_repo, record.key)
558
find_text_keys_from_content(record)
560
self._text_keys = content_text_keys - parent_text_keys
561
return ('inventories', _filtered_inv_stream())
563
def _get_text_stream(self):
564
# Note: We know we don't have to handle adding root keys, because both
565
# the source and target are the identical network name.
566
text_stream = self.from_repository.texts.get_record_stream(
567
self._text_keys, self._text_fetch_order, False)
568
return ('texts', text_stream)
570
def get_stream(self, search):
571
revision_ids = search.get_keys()
572
for stream_info in self._fetch_revision_texts(revision_ids):
574
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
575
yield self._get_filtered_inv_stream(revision_ids)
576
yield self._get_text_stream()
579
class KnitPacker(Packer):
580
"""Packer that works with knit packs."""
582
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
584
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
585
revision_ids=revision_ids,
586
reload_func=reload_func)
588
def _pack_map_and_index_list(self, index_attribute):
589
"""Convert a list of packs to an index pack map and index list.
591
:param index_attribute: The attribute that the desired index is found
593
:return: A tuple (map, list) where map contains the dict from
594
index:pack_tuple, and list contains the indices in the preferred
599
for pack_obj in self.packs:
600
index = getattr(pack_obj, index_attribute)
601
indices.append(index)
602
pack_map[index] = pack_obj
603
return pack_map, indices
605
def _index_contents(self, indices, key_filter=None):
606
"""Get an iterable of the index contents from a pack_map.
608
:param indices: The list of indices to query
609
:param key_filter: An optional filter to limit the keys returned.
611
all_index = CombinedGraphIndex(indices)
612
if key_filter is None:
613
return all_index.iter_all_entries()
615
return all_index.iter_entries(key_filter)
617
def _copy_nodes(self, nodes, index_map, writer, write_index,
619
"""Copy knit nodes between packs with no graph references.
621
:param output_lines: Output full texts of copied items.
623
pb = ui.ui_factory.nested_progress_bar()
625
return self._do_copy_nodes(nodes, index_map, writer,
626
write_index, pb, output_lines=output_lines)
630
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
632
# for record verification
633
knit = KnitVersionedFiles(None, None)
634
# plan a readv on each source pack:
636
nodes = sorted(nodes)
637
# how to map this into knit.py - or knit.py into this?
638
# we don't want the typical knit logic, we want grouping by pack
639
# at this point - perhaps a helper library for the following code
640
# duplication points?
642
for index, key, value in nodes:
643
if index not in request_groups:
644
request_groups[index] = []
645
request_groups[index].append((key, value))
647
pb.update("Copied record", record_index, len(nodes))
648
for index, items in viewitems(request_groups):
649
pack_readv_requests = []
650
for key, value in items:
651
# ---- KnitGraphIndex.get_position
652
bits = value[1:].split(' ')
653
offset, length = int(bits[0]), int(bits[1])
654
pack_readv_requests.append((offset, length, (key, value[0])))
655
# linear scan up the pack
656
pack_readv_requests.sort()
658
pack_obj = index_map[index]
659
transport, path = pack_obj.access_tuple()
661
reader = pack.make_readv_reader(transport, path,
662
[offset[0:2] for offset in pack_readv_requests])
663
except errors.NoSuchFile:
664
if self._reload_func is not None:
667
for (names, read_func), (_1, _2, (key, eol_flag)) in zip(
668
reader.iter_records(), pack_readv_requests):
669
raw_data = read_func(None)
670
# check the header only
671
if output_lines is not None:
672
output_lines(knit._parse_record(key[-1], raw_data)[0])
674
df, _ = knit._parse_record_header(key, raw_data)
676
pos, size = writer.add_bytes_record(raw_data, names)
677
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
678
pb.update("Copied record", record_index)
681
def _copy_nodes_graph(self, index_map, writer, write_index,
682
readv_group_iter, total_items, output_lines=False):
683
"""Copy knit nodes between packs.
685
:param output_lines: Return lines present in the copied data as
686
an iterator of line,version_id.
688
pb = ui.ui_factory.nested_progress_bar()
690
for result in self._do_copy_nodes_graph(index_map, writer,
691
write_index, output_lines, pb, readv_group_iter, total_items):
696
def _do_copy_nodes_graph(self, index_map, writer, write_index,
697
output_lines, pb, readv_group_iter, total_items):
698
# for record verification
699
knit = KnitVersionedFiles(None, None)
700
# for line extraction when requested (inventories only)
702
factory = KnitPlainFactory()
704
pb.update("Copied record", record_index, total_items)
705
for index, readv_vector, node_vector in readv_group_iter:
707
pack_obj = index_map[index]
708
transport, path = pack_obj.access_tuple()
710
reader = pack.make_readv_reader(transport, path, readv_vector)
711
except errors.NoSuchFile:
712
if self._reload_func is not None:
715
for (names, read_func), (key, eol_flag, references) in zip(
716
reader.iter_records(), node_vector):
717
raw_data = read_func(None)
719
# read the entire thing
720
content, _ = knit._parse_record(key[-1], raw_data)
721
if len(references[-1]) == 0:
722
line_iterator = factory.get_fulltext_content(content)
724
line_iterator = factory.get_linedelta_content(content)
725
for line in line_iterator:
728
# check the header only
729
df, _ = knit._parse_record_header(key, raw_data)
731
pos, size = writer.add_bytes_record(raw_data, names)
732
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
733
pb.update("Copied record", record_index)
736
def _process_inventory_lines(self, inv_lines):
737
"""Use up the inv_lines generator and setup a text key filter."""
738
repo = self._pack_collection.repo
739
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
740
inv_lines, self.revision_keys)
742
for fileid, file_revids in viewitems(fileid_revisions):
743
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
744
self._text_filter = text_filter
746
def _copy_inventory_texts(self):
747
# select inventory keys
748
inv_keys = self._revision_keys # currently the same keyspace, and note that
749
# querying for keys here could introduce a bug where an inventory item
750
# is missed, so do not change it to query separately without cross
751
# checking like the text key check below.
752
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
754
inv_nodes = self._index_contents(inventory_indices, inv_keys)
755
# copy inventory keys and adjust values
756
# XXX: Should be a helper function to allow different inv representation
758
self.pb.update("Copying inventory texts", 2)
759
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
760
# Only grab the output lines if we will be processing them
761
output_lines = bool(self.revision_ids)
762
inv_lines = self._copy_nodes_graph(inventory_index_map,
763
self.new_pack._writer, self.new_pack.inventory_index,
764
readv_group_iter, total_items, output_lines=output_lines)
765
if self.revision_ids:
766
self._process_inventory_lines(inv_lines)
768
# eat the iterator to cause it to execute.
770
self._text_filter = None
771
if 'pack' in debug.debug_flags:
772
trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
773
time.ctime(), self._pack_collection._upload_transport.base,
774
self.new_pack.random_name,
775
self.new_pack.inventory_index.key_count(),
776
time.time() - self.new_pack.start_time)
778
def _update_pack_order(self, entries, index_to_pack_map):
779
"""Determine how we want our packs to be ordered.
781
This changes the sort order of the self.packs list so that packs unused
782
by 'entries' will be at the end of the list, so that future requests
783
can avoid probing them. Used packs will be at the front of the
784
self.packs list, in the order of their first use in 'entries'.
786
:param entries: A list of (index, ...) tuples
787
:param index_to_pack_map: A mapping from index objects to pack objects.
791
for entry in entries:
793
if index not in seen_indexes:
794
packs.append(index_to_pack_map[index])
795
seen_indexes.add(index)
796
if len(packs) == len(self.packs):
797
if 'pack' in debug.debug_flags:
798
trace.mutter('Not changing pack list, all packs used.')
800
seen_packs = set(packs)
801
for pack in self.packs:
802
if pack not in seen_packs:
805
if 'pack' in debug.debug_flags:
806
old_names = [p.access_tuple()[1] for p in self.packs]
807
new_names = [p.access_tuple()[1] for p in packs]
808
trace.mutter('Reordering packs\nfrom: %s\n to: %s',
809
old_names, new_names)
812
def _copy_revision_texts(self):
814
if self.revision_ids:
815
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
818
# select revision keys
819
revision_index_map, revision_indices = self._pack_map_and_index_list(
821
revision_nodes = self._index_contents(revision_indices, revision_keys)
822
revision_nodes = list(revision_nodes)
823
self._update_pack_order(revision_nodes, revision_index_map)
824
# copy revision keys and adjust values
825
self.pb.update("Copying revision texts", 1)
826
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
827
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
828
self.new_pack.revision_index, readv_group_iter, total_items))
829
if 'pack' in debug.debug_flags:
830
trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
831
time.ctime(), self._pack_collection._upload_transport.base,
832
self.new_pack.random_name,
833
self.new_pack.revision_index.key_count(),
834
time.time() - self.new_pack.start_time)
835
self._revision_keys = revision_keys
837
def _get_text_nodes(self):
838
text_index_map, text_indices = self._pack_map_and_index_list(
840
return text_index_map, self._index_contents(text_indices,
843
def _copy_text_texts(self):
845
text_index_map, text_nodes = self._get_text_nodes()
846
if self._text_filter is not None:
847
# We could return the keys copied as part of the return value from
848
# _copy_nodes_graph but this doesn't work all that well with the
849
# need to get line output too, so we check separately, and as we're
850
# going to buffer everything anyway, we check beforehand, which
851
# saves reading knit data over the wire when we know there are
853
text_nodes = set(text_nodes)
854
present_text_keys = set(_node[1] for _node in text_nodes)
855
missing_text_keys = set(self._text_filter) - present_text_keys
856
if missing_text_keys:
857
# TODO: raise a specific error that can handle many missing
859
trace.mutter("missing keys during fetch: %r", missing_text_keys)
860
a_missing_key = missing_text_keys.pop()
861
raise errors.RevisionNotPresent(a_missing_key[1],
863
# copy text keys and adjust values
864
self.pb.update("Copying content texts", 3)
865
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
866
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
867
self.new_pack.text_index, readv_group_iter, total_items))
868
self._log_copied_texts()
870
def _create_pack_from_packs(self):
871
self.pb.update("Opening pack", 0, 5)
872
self.new_pack = self.open_pack()
873
new_pack = self.new_pack
874
# buffer data - we won't be reading-back during the pack creation and
875
# this makes a significant difference on sftp pushes.
876
new_pack.set_write_cache_size(1024*1024)
877
if 'pack' in debug.debug_flags:
878
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
879
for a_pack in self.packs]
880
if self.revision_ids is not None:
881
rev_count = len(self.revision_ids)
884
trace.mutter('%s: create_pack: creating pack from source packs: '
885
'%s%s %s revisions wanted %s t=0',
886
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
887
plain_pack_list, rev_count)
888
self._copy_revision_texts()
889
self._copy_inventory_texts()
890
self._copy_text_texts()
891
# select signature keys
892
signature_filter = self._revision_keys # same keyspace
893
signature_index_map, signature_indices = self._pack_map_and_index_list(
895
signature_nodes = self._index_contents(signature_indices,
897
# copy signature keys and adjust values
898
self.pb.update("Copying signature texts", 4)
899
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
900
new_pack.signature_index)
901
if 'pack' in debug.debug_flags:
902
trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
903
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
904
new_pack.signature_index.key_count(),
905
time.time() - new_pack.start_time)
906
new_pack._check_references()
907
if not self._use_pack(new_pack):
910
self.pb.update("Finishing pack", 5)
912
self._pack_collection.allocate(new_pack)
915
def _least_readv_node_readv(self, nodes):
916
"""Generate request groups for nodes using the least readv's.
918
:param nodes: An iterable of graph index nodes.
919
:return: Total node count and an iterator of the data needed to perform
920
readvs to obtain the data for nodes. Each item yielded by the
921
iterator is a tuple with:
922
index, readv_vector, node_vector. readv_vector is a list ready to
923
hand to the transport readv method, and node_vector is a list of
924
(key, eol_flag, references) for the node retrieved by the
925
matching readv_vector.
927
# group by pack so we do one readv per pack
928
nodes = sorted(nodes)
931
for index, key, value, references in nodes:
932
if index not in request_groups:
933
request_groups[index] = []
934
request_groups[index].append((key, value, references))
936
for index, items in viewitems(request_groups):
937
pack_readv_requests = []
938
for key, value, references in items:
939
# ---- KnitGraphIndex.get_position
940
bits = value[1:].split(' ')
941
offset, length = int(bits[0]), int(bits[1])
942
pack_readv_requests.append(
943
((offset, length), (key, value[0], references)))
944
# linear scan up the pack to maximum range combining.
945
pack_readv_requests.sort()
946
# split out the readv and the node data.
947
pack_readv = [readv for readv, node in pack_readv_requests]
948
node_vector = [node for readv, node in pack_readv_requests]
949
result.append((index, pack_readv, node_vector))
952
def _revision_node_readv(self, revision_nodes):
953
"""Return the total revisions and the readv's to issue.
955
:param revision_nodes: The revision index contents for the packs being
956
incorporated into the new pack.
957
:return: As per _least_readv_node_readv.
959
return self._least_readv_node_readv(revision_nodes)
962
class KnitReconcilePacker(KnitPacker):
963
"""A packer which regenerates indices etc as it copies.
965
This is used by ``brz reconcile`` to cause parent text pointers to be
969
def __init__(self, *args, **kwargs):
970
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
971
self._data_changed = False
973
def _process_inventory_lines(self, inv_lines):
974
"""Generate a text key reference map rather for reconciling with."""
975
repo = self._pack_collection.repo
976
refs = repo._serializer._find_text_key_references(inv_lines)
977
self._text_refs = refs
978
# during reconcile we:
979
# - convert unreferenced texts to full texts
980
# - correct texts which reference a text not copied to be full texts
981
# - copy all others as-is but with corrected parents.
982
# - so at this point we don't know enough to decide what becomes a full
984
self._text_filter = None
986
def _copy_text_texts(self):
987
"""generate what texts we should have and then copy."""
988
self.pb.update("Copying content texts", 3)
989
# we have three major tasks here:
990
# 1) generate the ideal index
991
repo = self._pack_collection.repo
992
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
994
self.new_pack.revision_index.iter_all_entries()])
995
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
996
# 2) generate a text_nodes list that contains all the deltas that can
997
# be used as-is, with corrected parents.
1000
discarded_nodes = []
1001
NULL_REVISION = _mod_revision.NULL_REVISION
1002
text_index_map, text_nodes = self._get_text_nodes()
1003
for node in text_nodes:
1009
ideal_parents = tuple(ideal_index[node[1]])
1011
discarded_nodes.append(node)
1012
self._data_changed = True
1014
if ideal_parents == (NULL_REVISION,):
1016
if ideal_parents == node[3][0]:
1018
ok_nodes.append(node)
1019
elif ideal_parents[0:1] == node[3][0][0:1]:
1020
# the left most parent is the same, or there are no parents
1021
# today. Either way, we can preserve the representation as
1022
# long as we change the refs to be inserted.
1023
self._data_changed = True
1024
ok_nodes.append((node[0], node[1], node[2],
1025
(ideal_parents, node[3][1])))
1026
self._data_changed = True
1028
# Reinsert this text completely
1029
bad_texts.append((node[1], ideal_parents))
1030
self._data_changed = True
1031
# we're finished with some data.
1034
# 3) bulk copy the ok data
1035
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1036
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1037
self.new_pack.text_index, readv_group_iter, total_items))
1038
# 4) adhoc copy all the other texts.
1039
# We have to topologically insert all texts otherwise we can fail to
1040
# reconcile when parts of a single delta chain are preserved intact,
1041
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1042
# reinserted, and if d3 has incorrect parents it will also be
1043
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1044
# copied), so we will try to delta, but d2 is not currently able to be
1045
# extracted because its basis d1 is not present. Topologically sorting
1046
# addresses this. The following generates a sort for all the texts that
1047
# are being inserted without having to reference the entire text key
1048
# space (we only topo sort the revisions, which is smaller).
1049
topo_order = tsort.topo_sort(ancestors)
1050
rev_order = dict(zip(topo_order, range(len(topo_order))))
1051
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1052
transaction = repo.get_transaction()
1053
file_id_index = GraphIndexPrefixAdapter(
1054
self.new_pack.text_index,
1056
add_nodes_callback=self.new_pack.text_index.add_nodes)
1057
data_access = _DirectPackAccess(
1058
{self.new_pack.text_index:self.new_pack.access_tuple()})
1059
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1060
self.new_pack.access_tuple())
1061
output_texts = KnitVersionedFiles(
1062
_KnitGraphIndex(self.new_pack.text_index,
1063
add_callback=self.new_pack.text_index.add_nodes,
1064
deltas=True, parents=True, is_locked=repo.is_locked),
1065
data_access=data_access, max_delta_chain=200)
1066
for key, parent_keys in bad_texts:
1067
# We refer to the new pack to delta data being output.
1068
# A possible improvement would be to catch errors on short reads
1069
# and only flush then.
1070
self.new_pack.flush()
1072
for parent_key in parent_keys:
1073
if parent_key[0] != key[0]:
1074
# Graph parents must match the fileid
1075
raise errors.BzrError('Mismatched key parent %r:%r' %
1077
parents.append(parent_key[1])
1078
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1079
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1080
output_texts.add_lines(key, parent_keys, text_lines,
1081
random_id=True, check_content=False)
1082
# 5) check that nothing inserted has a reference outside the keyspace.
1083
missing_text_keys = self.new_pack.text_index._external_references()
1084
if missing_text_keys:
1085
raise errors.BzrCheckError('Reference to missing compression parents %r'
1086
% (missing_text_keys,))
1087
self._log_copied_texts()
1089
def _use_pack(self, new_pack):
1090
"""Override _use_pack to check for reconcile having changed content."""
1091
# XXX: we might be better checking this at the copy time.
1092
original_inventory_keys = set()
1093
inv_index = self._pack_collection.inventory_index.combined_index
1094
for entry in inv_index.iter_all_entries():
1095
original_inventory_keys.add(entry[1])
1096
new_inventory_keys = set()
1097
for entry in new_pack.inventory_index.iter_all_entries():
1098
new_inventory_keys.add(entry[1])
1099
if new_inventory_keys != original_inventory_keys:
1100
self._data_changed = True
1101
return new_pack.data_inserted() and self._data_changed
1104
class OptimisingKnitPacker(KnitPacker):
1105
"""A packer which spends more time to create better disk layouts."""
1107
def _revision_node_readv(self, revision_nodes):
1108
"""Return the total revisions and the readv's to issue.
1110
This sort places revisions in topological order with the ancestors
1113
:param revision_nodes: The revision index contents for the packs being
1114
incorporated into the new pack.
1115
:return: As per _least_readv_node_readv.
1117
# build an ancestors dict
1120
for index, key, value, references in revision_nodes:
1121
ancestors[key] = references[0]
1122
by_key[key] = (index, value, references)
1123
order = tsort.topo_sort(ancestors)
1125
# Single IO is pathological, but it will work as a starting point.
1127
for key in reversed(order):
1128
index, value, references = by_key[key]
1129
# ---- KnitGraphIndex.get_position
1130
bits = value[1:].split(' ')
1131
offset, length = int(bits[0]), int(bits[1])
1133
(index, [(offset, length)], [(key, value[0], references)]))
1134
# TODO: combine requests in the same index that are in ascending order.
1135
return total, requests
1137
def open_pack(self):
1138
"""Open a pack for the pack we are creating."""
1139
new_pack = super(OptimisingKnitPacker, self).open_pack()
1140
# Turn on the optimization flags for all the index builders.
1141
new_pack.revision_index.set_optimize(for_size=True)
1142
new_pack.inventory_index.set_optimize(for_size=True)
1143
new_pack.text_index.set_optimize(for_size=True)
1144
new_pack.signature_index.set_optimize(for_size=True)
1148
class KnitRepositoryPackCollection(RepositoryPackCollection):
1149
"""A knit pack collection."""
1151
pack_factory = NewPack
1152
resumed_pack_factory = ResumedPack
1153
normal_packer_class = KnitPacker
1154
optimising_packer_class = OptimisingKnitPacker