1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from __future__ import absolute_import
25
from ..lazy_import import lazy_import
26
lazy_import(globals(), """
33
revision as _mod_revision,
38
from breezy.bzr import (
44
from breezy.bzr.knit import (
54
from ..bzr.index import (
57
GraphIndexPrefixAdapter,
60
from .knitrepo import (
63
from .pack_repo import (
71
RepositoryPackCollection,
73
from ..sixish import (
77
from ..bzr.vf_repository import (
82
class KnitPackRepository(PackRepository, KnitRepository):
84
def __init__(self, _format, a_controldir, control_files, _commit_builder_class,
86
PackRepository.__init__(self, _format, a_controldir, control_files,
87
_commit_builder_class, _serializer)
88
if self._format.supports_chks:
89
raise AssertionError("chk not supported")
90
index_transport = self._transport.clone('indices')
91
self._pack_collection = KnitRepositoryPackCollection(self,
94
self._transport.clone(
96
self._transport.clone(
98
_format.index_builder_class,
102
self.inventories = KnitVersionedFiles(
103
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
104
add_callback=self._pack_collection.inventory_index.add_callback,
105
deltas=True, parents=True, is_locked=self.is_locked),
106
data_access=self._pack_collection.inventory_index.data_access,
108
self.revisions = KnitVersionedFiles(
109
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
110
add_callback=self._pack_collection.revision_index.add_callback,
111
deltas=False, parents=True, is_locked=self.is_locked,
112
track_external_parent_refs=True),
113
data_access=self._pack_collection.revision_index.data_access,
115
self.signatures = KnitVersionedFiles(
116
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
117
add_callback=self._pack_collection.signature_index.add_callback,
118
deltas=False, parents=False, is_locked=self.is_locked),
119
data_access=self._pack_collection.signature_index.data_access,
121
self.texts = KnitVersionedFiles(
122
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
123
add_callback=self._pack_collection.text_index.add_callback,
124
deltas=True, parents=True, is_locked=self.is_locked),
125
data_access=self._pack_collection.text_index.data_access,
127
self.chk_bytes = None
128
# True when the repository object is 'write locked' (as opposed to the
129
# physical lock only taken out around changes to the pack-names list.)
130
# Another way to represent this would be a decorator around the control
131
# files object that presents logical locks as physical ones - if this
132
# gets ugly consider that alternative design. RBC 20071011
133
self._write_lock_count = 0
134
self._transaction = None
136
self._reconcile_does_inventory_gc = True
137
self._reconcile_fixes_text_parents = True
138
self._reconcile_backsup_inventory = False
140
def _get_source(self, to_format):
141
if to_format.network_name() == self._format.network_name():
142
return KnitPackStreamSource(self, to_format)
143
return PackRepository._get_source(self, to_format)
145
def _reconcile_pack(self, collection, packs, extension, revs, pb):
146
packer = KnitReconcilePacker(collection, packs, extension, revs)
147
return packer.pack(pb)
150
class RepositoryFormatKnitPack1(RepositoryFormatPack):
151
"""A no-subtrees parameterized Pack repository.
153
This format was introduced in 0.92.
156
repository_class = KnitPackRepository
157
_commit_builder_class = PackCommitBuilder
160
def _serializer(self):
161
return xml5.serializer_v5
162
# What index classes to use
163
index_builder_class = InMemoryGraphIndex
164
index_class = GraphIndex
166
def _get_matching_bzrdir(self):
167
return controldir.format_registry.make_controldir('pack-0.92')
169
def _ignore_setting_bzrdir(self, format):
172
_matchingcontroldir = property(
173
_get_matching_bzrdir, _ignore_setting_bzrdir)
176
def get_format_string(cls):
177
"""See RepositoryFormat.get_format_string()."""
178
return b"Bazaar pack repository format 1 (needs bzr 0.92)\n"
180
def get_format_description(self):
181
"""See RepositoryFormat.get_format_description()."""
182
return "Packs containing knits without subtree support"
185
class RepositoryFormatKnitPack3(RepositoryFormatPack):
186
"""A subtrees parameterized Pack repository.
188
This repository format uses the xml7 serializer to get:
189
- support for recording full info about the tree root
190
- support for recording tree-references
192
This format was introduced in 0.92.
195
repository_class = KnitPackRepository
196
_commit_builder_class = PackCommitBuilder
197
rich_root_data = True
199
supports_tree_reference = True
202
def _serializer(self):
203
return xml7.serializer_v7
204
# What index classes to use
205
index_builder_class = InMemoryGraphIndex
206
index_class = GraphIndex
208
def _get_matching_bzrdir(self):
209
return controldir.format_registry.make_controldir(
212
def _ignore_setting_bzrdir(self, format):
215
_matchingcontroldir = property(
216
_get_matching_bzrdir, _ignore_setting_bzrdir)
219
def get_format_string(cls):
220
"""See RepositoryFormat.get_format_string()."""
221
return b"Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
223
def get_format_description(self):
224
"""See RepositoryFormat.get_format_description()."""
225
return "Packs containing knits with subtree support\n"
228
class RepositoryFormatKnitPack4(RepositoryFormatPack):
229
"""A rich-root, no subtrees parameterized Pack repository.
231
This repository format uses the xml6 serializer to get:
232
- support for recording full info about the tree root
234
This format was introduced in 1.0.
237
repository_class = KnitPackRepository
238
_commit_builder_class = PackCommitBuilder
239
rich_root_data = True
240
supports_tree_reference = False
243
def _serializer(self):
244
return xml6.serializer_v6
245
# What index classes to use
246
index_builder_class = InMemoryGraphIndex
247
index_class = GraphIndex
249
def _get_matching_bzrdir(self):
250
return controldir.format_registry.make_controldir(
253
def _ignore_setting_bzrdir(self, format):
256
_matchingcontroldir = property(
257
_get_matching_bzrdir, _ignore_setting_bzrdir)
260
def get_format_string(cls):
261
"""See RepositoryFormat.get_format_string()."""
262
return (b"Bazaar pack repository format 1 with rich root"
263
b" (needs bzr 1.0)\n")
265
def get_format_description(self):
266
"""See RepositoryFormat.get_format_description()."""
267
return "Packs containing knits with rich root support\n"
270
class RepositoryFormatKnitPack5(RepositoryFormatPack):
271
"""Repository that supports external references to allow stacking.
273
Supports external lookups, which results in non-truncated ghosts after
274
reconcile compared to pack-0.92 formats.
277
repository_class = KnitPackRepository
278
_commit_builder_class = PackCommitBuilder
279
supports_external_lookups = True
280
# What index classes to use
281
index_builder_class = InMemoryGraphIndex
282
index_class = GraphIndex
285
def _serializer(self):
286
return xml5.serializer_v5
288
def _get_matching_bzrdir(self):
289
return controldir.format_registry.make_controldir('1.6')
291
def _ignore_setting_bzrdir(self, format):
294
_matchingcontroldir = property(
295
_get_matching_bzrdir, _ignore_setting_bzrdir)
298
def get_format_string(cls):
299
"""See RepositoryFormat.get_format_string()."""
300
return b"Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
302
def get_format_description(self):
303
"""See RepositoryFormat.get_format_description()."""
304
return "Packs 5 (adds stacking support, requires bzr 1.6)"
307
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
308
"""A repository with rich roots and stacking.
310
Supports stacking on other repositories, allowing data to be accessed
311
without being stored locally.
314
repository_class = KnitPackRepository
315
_commit_builder_class = PackCommitBuilder
316
rich_root_data = True
317
supports_tree_reference = False # no subtrees
318
supports_external_lookups = True
319
# What index classes to use
320
index_builder_class = InMemoryGraphIndex
321
index_class = GraphIndex
324
def _serializer(self):
325
return xml6.serializer_v6
327
def _get_matching_bzrdir(self):
328
return controldir.format_registry.make_controldir(
331
def _ignore_setting_bzrdir(self, format):
334
_matchingcontroldir = property(
335
_get_matching_bzrdir, _ignore_setting_bzrdir)
338
def get_format_string(cls):
339
"""See RepositoryFormat.get_format_string()."""
340
return b"Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
342
def get_format_description(self):
343
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
346
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
347
"""A repository with rich roots and external references.
349
Supports external lookups, which results in non-truncated ghosts after
350
reconcile compared to pack-0.92 formats.
352
This format was deprecated because the serializer it uses accidentally
353
supported subtrees, when the format was not intended to. This meant that
354
someone could accidentally fetch from an incorrect repository.
357
repository_class = KnitPackRepository
358
_commit_builder_class = PackCommitBuilder
359
rich_root_data = True
360
supports_tree_reference = False # no subtrees
362
supports_external_lookups = True
363
# What index classes to use
364
index_builder_class = InMemoryGraphIndex
365
index_class = GraphIndex
368
def _serializer(self):
369
return xml7.serializer_v7
371
def _get_matching_bzrdir(self):
372
matching = controldir.format_registry.make_controldir(
374
matching.repository_format = self
377
def _ignore_setting_bzrdir(self, format):
380
_matchingcontroldir = property(
381
_get_matching_bzrdir, _ignore_setting_bzrdir)
384
def get_format_string(cls):
385
"""See RepositoryFormat.get_format_string()."""
386
return b"Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
388
def get_format_description(self):
389
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
392
def is_deprecated(self):
396
class RepositoryFormatKnitPack6(RepositoryFormatPack):
397
"""A repository with stacking and btree indexes,
398
without rich roots or subtrees.
400
This is equivalent to pack-1.6 with B+Tree indices.
403
repository_class = KnitPackRepository
404
_commit_builder_class = PackCommitBuilder
405
supports_external_lookups = True
406
# What index classes to use
407
index_builder_class = btree_index.BTreeBuilder
408
index_class = btree_index.BTreeGraphIndex
411
def _serializer(self):
412
return xml5.serializer_v5
414
def _get_matching_bzrdir(self):
415
return controldir.format_registry.make_controldir('1.9')
417
def _ignore_setting_bzrdir(self, format):
420
_matchingcontroldir = property(
421
_get_matching_bzrdir, _ignore_setting_bzrdir)
424
def get_format_string(cls):
425
"""See RepositoryFormat.get_format_string()."""
426
return b"Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
428
def get_format_description(self):
429
"""See RepositoryFormat.get_format_description()."""
430
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
433
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
434
"""A repository with rich roots, no subtrees, stacking and btree indexes.
436
1.6-rich-root with B+Tree indices.
439
repository_class = KnitPackRepository
440
_commit_builder_class = PackCommitBuilder
441
rich_root_data = True
442
supports_tree_reference = False # no subtrees
443
supports_external_lookups = True
444
# What index classes to use
445
index_builder_class = btree_index.BTreeBuilder
446
index_class = btree_index.BTreeGraphIndex
449
def _serializer(self):
450
return xml6.serializer_v6
452
def _get_matching_bzrdir(self):
453
return controldir.format_registry.make_controldir(
456
def _ignore_setting_bzrdir(self, format):
459
_matchingcontroldir = property(
460
_get_matching_bzrdir, _ignore_setting_bzrdir)
463
def get_format_string(cls):
464
"""See RepositoryFormat.get_format_string()."""
465
return b"Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
467
def get_format_description(self):
468
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
471
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
472
"""A subtrees development repository.
474
This format should be retained in 2.3, to provide an upgrade path from this
475
to RepositoryFormat2aSubtree. It can be removed in later releases.
477
1.6.1-subtree[as it might have been] with B+Tree indices.
480
repository_class = KnitPackRepository
481
_commit_builder_class = PackCommitBuilder
482
rich_root_data = True
484
supports_tree_reference = True
485
supports_external_lookups = True
486
# What index classes to use
487
index_builder_class = btree_index.BTreeBuilder
488
index_class = btree_index.BTreeGraphIndex
491
def _serializer(self):
492
return xml7.serializer_v7
494
def _get_matching_bzrdir(self):
495
return controldir.format_registry.make_controldir(
496
'development5-subtree')
498
def _ignore_setting_bzrdir(self, format):
501
_matchingcontroldir = property(
502
_get_matching_bzrdir, _ignore_setting_bzrdir)
505
def get_format_string(cls):
506
"""See RepositoryFormat.get_format_string()."""
507
return (b"Bazaar development format 2 with subtree support "
508
b"(needs bzr.dev from before 1.8)\n")
510
def get_format_description(self):
511
"""See RepositoryFormat.get_format_description()."""
512
return ("Development repository format, currently the same as "
513
"1.6.1-subtree with B+Tree indices.\n")
516
class KnitPackStreamSource(StreamSource):
517
"""A StreamSource used to transfer data between same-format KnitPack repos.
520
1) Same serialization format for all objects
521
2) Same root information
522
3) XML format inventories
523
4) Atomic inserts (so we can stream inventory texts before text
528
def __init__(self, from_repository, to_format):
529
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
530
self._text_keys = None
531
self._text_fetch_order = 'unordered'
533
def _get_filtered_inv_stream(self, revision_ids):
534
from_repo = self.from_repository
535
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
536
parent_keys = [(p,) for p in parent_ids]
537
find_text_keys = from_repo._serializer._find_text_key_references
538
parent_text_keys = set(find_text_keys(
539
from_repo._inventory_xml_lines_for_keys(parent_keys)))
540
content_text_keys = set()
541
knit = KnitVersionedFiles(None, None)
542
factory = KnitPlainFactory()
544
def find_text_keys_from_content(record):
545
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
546
raise ValueError("Unknown content storage kind for"
547
" inventory text: %s" % (record.storage_kind,))
548
# It's a knit record, it has a _raw_record field (even if it was
549
# reconstituted from a network stream).
550
raw_data = record._raw_record
551
# read the entire thing
552
revision_id = record.key[-1]
553
content, _ = knit._parse_record(revision_id, raw_data)
554
if record.storage_kind == 'knit-delta-gz':
555
line_iterator = factory.get_linedelta_content(content)
556
elif record.storage_kind == 'knit-ft-gz':
557
line_iterator = factory.get_fulltext_content(content)
558
content_text_keys.update(find_text_keys(
559
[(line, revision_id) for line in line_iterator]))
560
revision_keys = [(r,) for r in revision_ids]
562
def _filtered_inv_stream():
563
source_vf = from_repo.inventories
564
stream = source_vf.get_record_stream(revision_keys,
566
for record in stream:
567
if record.storage_kind == 'absent':
568
raise errors.NoSuchRevision(from_repo, record.key)
569
find_text_keys_from_content(record)
571
self._text_keys = content_text_keys - parent_text_keys
572
return ('inventories', _filtered_inv_stream())
574
def _get_text_stream(self):
575
# Note: We know we don't have to handle adding root keys, because both
576
# the source and target are the identical network name.
577
text_stream = self.from_repository.texts.get_record_stream(
578
self._text_keys, self._text_fetch_order, False)
579
return ('texts', text_stream)
581
def get_stream(self, search):
582
revision_ids = search.get_keys()
583
for stream_info in self._fetch_revision_texts(revision_ids):
585
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
586
yield self._get_filtered_inv_stream(revision_ids)
587
yield self._get_text_stream()
590
class KnitPacker(Packer):
591
"""Packer that works with knit packs."""
593
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
595
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
596
revision_ids=revision_ids,
597
reload_func=reload_func)
599
def _pack_map_and_index_list(self, index_attribute):
600
"""Convert a list of packs to an index pack map and index list.
602
:param index_attribute: The attribute that the desired index is found
604
:return: A tuple (map, list) where map contains the dict from
605
index:pack_tuple, and list contains the indices in the preferred
610
for pack_obj in self.packs:
611
index = getattr(pack_obj, index_attribute)
612
indices.append(index)
613
pack_map[index] = pack_obj
614
return pack_map, indices
616
def _index_contents(self, indices, key_filter=None):
617
"""Get an iterable of the index contents from a pack_map.
619
:param indices: The list of indices to query
620
:param key_filter: An optional filter to limit the keys returned.
622
all_index = CombinedGraphIndex(indices)
623
if key_filter is None:
624
return all_index.iter_all_entries()
626
return all_index.iter_entries(key_filter)
628
def _copy_nodes(self, nodes, index_map, writer, write_index,
630
"""Copy knit nodes between packs with no graph references.
632
:param output_lines: Output full texts of copied items.
634
with ui.ui_factory.nested_progress_bar() as pb:
635
return self._do_copy_nodes(nodes, index_map, writer,
636
write_index, pb, output_lines=output_lines)
638
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
640
# for record verification
641
knit = KnitVersionedFiles(None, None)
642
# plan a readv on each source pack:
644
nodes = sorted(nodes)
645
# how to map this into knit.py - or knit.py into this?
646
# we don't want the typical knit logic, we want grouping by pack
647
# at this point - perhaps a helper library for the following code
648
# duplication points?
650
for index, key, value in nodes:
651
if index not in request_groups:
652
request_groups[index] = []
653
request_groups[index].append((key, value))
655
pb.update("Copied record", record_index, len(nodes))
656
for index, items in viewitems(request_groups):
657
pack_readv_requests = []
658
for key, value in items:
659
# ---- KnitGraphIndex.get_position
660
bits = value[1:].split(b' ')
661
offset, length = int(bits[0]), int(bits[1])
662
pack_readv_requests.append((offset, length, (key, value[0:1])))
663
# linear scan up the pack
664
pack_readv_requests.sort()
666
pack_obj = index_map[index]
667
transport, path = pack_obj.access_tuple()
669
reader = pack.make_readv_reader(transport, path,
670
[offset[0:2] for offset in pack_readv_requests])
671
except errors.NoSuchFile:
672
if self._reload_func is not None:
675
for (names, read_func), (_1, _2, (key, eol_flag)) in zip(
676
reader.iter_records(), pack_readv_requests):
677
raw_data = read_func(None)
678
# check the header only
679
if output_lines is not None:
680
output_lines(knit._parse_record(key[-1], raw_data)[0])
682
df, _ = knit._parse_record_header(key, raw_data)
684
pos, size = writer.add_bytes_record([raw_data], len(raw_data), names)
685
write_index.add_node(key, eol_flag + b"%d %d" % (pos, size))
686
pb.update("Copied record", record_index)
689
def _copy_nodes_graph(self, index_map, writer, write_index,
690
readv_group_iter, total_items, output_lines=False):
691
"""Copy knit nodes between packs.
693
:param output_lines: Return lines present in the copied data as
694
an iterator of line,version_id.
696
with ui.ui_factory.nested_progress_bar() as pb:
697
for result in self._do_copy_nodes_graph(index_map, writer,
698
write_index, output_lines, pb, readv_group_iter, total_items):
701
def _do_copy_nodes_graph(self, index_map, writer, write_index,
702
output_lines, pb, readv_group_iter, total_items):
703
# for record verification
704
knit = KnitVersionedFiles(None, None)
705
# for line extraction when requested (inventories only)
707
factory = KnitPlainFactory()
709
pb.update("Copied record", record_index, total_items)
710
for index, readv_vector, node_vector in readv_group_iter:
712
pack_obj = index_map[index]
713
transport, path = pack_obj.access_tuple()
715
reader = pack.make_readv_reader(transport, path, readv_vector)
716
except errors.NoSuchFile:
717
if self._reload_func is not None:
720
for (names, read_func), (key, eol_flag, references) in zip(
721
reader.iter_records(), node_vector):
722
raw_data = read_func(None)
724
# read the entire thing
725
content, _ = knit._parse_record(key[-1], raw_data)
726
if len(references[-1]) == 0:
727
line_iterator = factory.get_fulltext_content(content)
729
line_iterator = factory.get_linedelta_content(content)
730
for line in line_iterator:
733
# check the header only
734
df, _ = knit._parse_record_header(key, raw_data)
736
pos, size = writer.add_bytes_record([raw_data], len(raw_data), names)
737
write_index.add_node(key, eol_flag + b"%d %d" %
738
(pos, size), references)
739
pb.update("Copied record", record_index)
742
def _process_inventory_lines(self, inv_lines):
743
"""Use up the inv_lines generator and setup a text key filter."""
744
repo = self._pack_collection.repo
745
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
746
inv_lines, self.revision_keys)
748
for fileid, file_revids in viewitems(fileid_revisions):
749
text_filter.extend([(fileid, file_revid)
750
for file_revid in file_revids])
751
self._text_filter = text_filter
753
def _copy_inventory_texts(self):
754
# select inventory keys
755
inv_keys = self._revision_keys # currently the same keyspace, and note that
756
# querying for keys here could introduce a bug where an inventory item
757
# is missed, so do not change it to query separately without cross
758
# checking like the text key check below.
759
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
761
inv_nodes = self._index_contents(inventory_indices, inv_keys)
762
# copy inventory keys and adjust values
763
# XXX: Should be a helper function to allow different inv representation
765
self.pb.update("Copying inventory texts", 2)
766
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
767
# Only grab the output lines if we will be processing them
768
output_lines = bool(self.revision_ids)
769
inv_lines = self._copy_nodes_graph(inventory_index_map,
770
self.new_pack._writer, self.new_pack.inventory_index,
771
readv_group_iter, total_items, output_lines=output_lines)
772
if self.revision_ids:
773
self._process_inventory_lines(inv_lines)
775
# eat the iterator to cause it to execute.
777
self._text_filter = None
778
if 'pack' in debug.debug_flags:
779
trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
780
time.ctime(), self._pack_collection._upload_transport.base,
781
self.new_pack.random_name,
782
self.new_pack.inventory_index.key_count(),
783
time.time() - self.new_pack.start_time)
785
def _update_pack_order(self, entries, index_to_pack_map):
786
"""Determine how we want our packs to be ordered.
788
This changes the sort order of the self.packs list so that packs unused
789
by 'entries' will be at the end of the list, so that future requests
790
can avoid probing them. Used packs will be at the front of the
791
self.packs list, in the order of their first use in 'entries'.
793
:param entries: A list of (index, ...) tuples
794
:param index_to_pack_map: A mapping from index objects to pack objects.
798
for entry in entries:
800
if index not in seen_indexes:
801
packs.append(index_to_pack_map[index])
802
seen_indexes.add(index)
803
if len(packs) == len(self.packs):
804
if 'pack' in debug.debug_flags:
805
trace.mutter('Not changing pack list, all packs used.')
807
seen_packs = set(packs)
808
for pack in self.packs:
809
if pack not in seen_packs:
812
if 'pack' in debug.debug_flags:
813
old_names = [p.access_tuple()[1] for p in self.packs]
814
new_names = [p.access_tuple()[1] for p in packs]
815
trace.mutter('Reordering packs\nfrom: %s\n to: %s',
816
old_names, new_names)
819
def _copy_revision_texts(self):
821
if self.revision_ids:
822
revision_keys = [(revision_id,)
823
for revision_id in self.revision_ids]
826
# select revision keys
827
revision_index_map, revision_indices = self._pack_map_and_index_list(
829
revision_nodes = self._index_contents(revision_indices, revision_keys)
830
revision_nodes = list(revision_nodes)
831
self._update_pack_order(revision_nodes, revision_index_map)
832
# copy revision keys and adjust values
833
self.pb.update("Copying revision texts", 1)
834
total_items, readv_group_iter = self._revision_node_readv(
836
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
837
self.new_pack.revision_index, readv_group_iter, total_items))
838
if 'pack' in debug.debug_flags:
839
trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
840
time.ctime(), self._pack_collection._upload_transport.base,
841
self.new_pack.random_name,
842
self.new_pack.revision_index.key_count(),
843
time.time() - self.new_pack.start_time)
844
self._revision_keys = revision_keys
846
def _get_text_nodes(self):
847
text_index_map, text_indices = self._pack_map_and_index_list(
849
return text_index_map, self._index_contents(text_indices,
852
def _copy_text_texts(self):
854
text_index_map, text_nodes = self._get_text_nodes()
855
if self._text_filter is not None:
856
# We could return the keys copied as part of the return value from
857
# _copy_nodes_graph but this doesn't work all that well with the
858
# need to get line output too, so we check separately, and as we're
859
# going to buffer everything anyway, we check beforehand, which
860
# saves reading knit data over the wire when we know there are
862
text_nodes = set(text_nodes)
863
present_text_keys = set(_node[1] for _node in text_nodes)
864
missing_text_keys = set(self._text_filter) - present_text_keys
865
if missing_text_keys:
866
# TODO: raise a specific error that can handle many missing
868
trace.mutter("missing keys during fetch: %r",
870
a_missing_key = missing_text_keys.pop()
871
raise errors.RevisionNotPresent(a_missing_key[1],
873
# copy text keys and adjust values
874
self.pb.update("Copying content texts", 3)
875
total_items, readv_group_iter = self._least_readv_node_readv(
877
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
878
self.new_pack.text_index, readv_group_iter, total_items))
879
self._log_copied_texts()
881
def _create_pack_from_packs(self):
882
self.pb.update("Opening pack", 0, 5)
883
self.new_pack = self.open_pack()
884
new_pack = self.new_pack
885
# buffer data - we won't be reading-back during the pack creation and
886
# this makes a significant difference on sftp pushes.
887
new_pack.set_write_cache_size(1024 * 1024)
888
if 'pack' in debug.debug_flags:
889
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
890
for a_pack in self.packs]
891
if self.revision_ids is not None:
892
rev_count = len(self.revision_ids)
895
trace.mutter('%s: create_pack: creating pack from source packs: '
896
'%s%s %s revisions wanted %s t=0',
897
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
898
plain_pack_list, rev_count)
899
self._copy_revision_texts()
900
self._copy_inventory_texts()
901
self._copy_text_texts()
902
# select signature keys
903
signature_filter = self._revision_keys # same keyspace
904
signature_index_map, signature_indices = self._pack_map_and_index_list(
906
signature_nodes = self._index_contents(signature_indices,
908
# copy signature keys and adjust values
909
self.pb.update("Copying signature texts", 4)
910
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
911
new_pack.signature_index)
912
if 'pack' in debug.debug_flags:
913
trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
914
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
915
new_pack.signature_index.key_count(),
916
time.time() - new_pack.start_time)
917
new_pack._check_references()
918
if not self._use_pack(new_pack):
921
self.pb.update("Finishing pack", 5)
923
self._pack_collection.allocate(new_pack)
926
def _least_readv_node_readv(self, nodes):
927
"""Generate request groups for nodes using the least readv's.
929
:param nodes: An iterable of graph index nodes.
930
:return: Total node count and an iterator of the data needed to perform
931
readvs to obtain the data for nodes. Each item yielded by the
932
iterator is a tuple with:
933
index, readv_vector, node_vector. readv_vector is a list ready to
934
hand to the transport readv method, and node_vector is a list of
935
(key, eol_flag, references) for the node retrieved by the
936
matching readv_vector.
938
# group by pack so we do one readv per pack
939
nodes = sorted(nodes)
942
for index, key, value, references in nodes:
943
if index not in request_groups:
944
request_groups[index] = []
945
request_groups[index].append((key, value, references))
947
for index, items in viewitems(request_groups):
948
pack_readv_requests = []
949
for key, value, references in items:
950
# ---- KnitGraphIndex.get_position
951
bits = value[1:].split(b' ')
952
offset, length = int(bits[0]), int(bits[1])
953
pack_readv_requests.append(
954
((offset, length), (key, value[0:1], references)))
955
# linear scan up the pack to maximum range combining.
956
pack_readv_requests.sort()
957
# split out the readv and the node data.
958
pack_readv = [readv for readv, node in pack_readv_requests]
959
node_vector = [node for readv, node in pack_readv_requests]
960
result.append((index, pack_readv, node_vector))
963
def _revision_node_readv(self, revision_nodes):
964
"""Return the total revisions and the readv's to issue.
966
:param revision_nodes: The revision index contents for the packs being
967
incorporated into the new pack.
968
:return: As per _least_readv_node_readv.
970
return self._least_readv_node_readv(revision_nodes)
973
class KnitReconcilePacker(KnitPacker):
974
"""A packer which regenerates indices etc as it copies.
976
This is used by ``brz reconcile`` to cause parent text pointers to be
980
def __init__(self, *args, **kwargs):
981
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
982
self._data_changed = False
984
def _process_inventory_lines(self, inv_lines):
985
"""Generate a text key reference map rather for reconciling with."""
986
repo = self._pack_collection.repo
987
refs = repo._serializer._find_text_key_references(inv_lines)
988
self._text_refs = refs
989
# during reconcile we:
990
# - convert unreferenced texts to full texts
991
# - correct texts which reference a text not copied to be full texts
992
# - copy all others as-is but with corrected parents.
993
# - so at this point we don't know enough to decide what becomes a full
995
self._text_filter = None
997
def _copy_text_texts(self):
998
"""generate what texts we should have and then copy."""
999
self.pb.update("Copying content texts", 3)
1000
# we have three major tasks here:
1001
# 1) generate the ideal index
1002
repo = self._pack_collection.repo
1003
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
1004
_1, key, _2, refs in
1005
self.new_pack.revision_index.iter_all_entries()])
1006
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
1007
# 2) generate a text_nodes list that contains all the deltas that can
1008
# be used as-is, with corrected parents.
1011
discarded_nodes = []
1012
NULL_REVISION = _mod_revision.NULL_REVISION
1013
text_index_map, text_nodes = self._get_text_nodes()
1014
for node in text_nodes:
1020
ideal_parents = tuple(ideal_index[node[1]])
1022
discarded_nodes.append(node)
1023
self._data_changed = True
1025
if ideal_parents == (NULL_REVISION,):
1027
if ideal_parents == node[3][0]:
1029
ok_nodes.append(node)
1030
elif ideal_parents[0:1] == node[3][0][0:1]:
1031
# the left most parent is the same, or there are no parents
1032
# today. Either way, we can preserve the representation as
1033
# long as we change the refs to be inserted.
1034
self._data_changed = True
1035
ok_nodes.append((node[0], node[1], node[2],
1036
(ideal_parents, node[3][1])))
1037
self._data_changed = True
1039
# Reinsert this text completely
1040
bad_texts.append((node[1], ideal_parents))
1041
self._data_changed = True
1042
# we're finished with some data.
1045
# 3) bulk copy the ok data
1046
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1047
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1048
self.new_pack.text_index, readv_group_iter, total_items))
1049
# 4) adhoc copy all the other texts.
1050
# We have to topologically insert all texts otherwise we can fail to
1051
# reconcile when parts of a single delta chain are preserved intact,
1052
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1053
# reinserted, and if d3 has incorrect parents it will also be
1054
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1055
# copied), so we will try to delta, but d2 is not currently able to be
1056
# extracted because its basis d1 is not present. Topologically sorting
1057
# addresses this. The following generates a sort for all the texts that
1058
# are being inserted without having to reference the entire text key
1059
# space (we only topo sort the revisions, which is smaller).
1060
topo_order = tsort.topo_sort(ancestors)
1061
rev_order = dict(zip(topo_order, range(len(topo_order))))
1062
bad_texts.sort(key=lambda key: rev_order.get(key[0][1], 0))
1063
transaction = repo.get_transaction()
1064
file_id_index = GraphIndexPrefixAdapter(
1065
self.new_pack.text_index,
1067
add_nodes_callback=self.new_pack.text_index.add_nodes)
1068
data_access = _DirectPackAccess(
1069
{self.new_pack.text_index: self.new_pack.access_tuple()})
1070
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1071
self.new_pack.access_tuple())
1072
output_texts = KnitVersionedFiles(
1073
_KnitGraphIndex(self.new_pack.text_index,
1074
add_callback=self.new_pack.text_index.add_nodes,
1075
deltas=True, parents=True, is_locked=repo.is_locked),
1076
data_access=data_access, max_delta_chain=200)
1077
for key, parent_keys in bad_texts:
1078
# We refer to the new pack to delta data being output.
1079
# A possible improvement would be to catch errors on short reads
1080
# and only flush then.
1081
self.new_pack.flush()
1083
for parent_key in parent_keys:
1084
if parent_key[0] != key[0]:
1085
# Graph parents must match the fileid
1086
raise errors.BzrError('Mismatched key parent %r:%r' %
1088
parents.append(parent_key[1])
1089
text_lines = next(repo.texts.get_record_stream(
1090
[key], 'unordered', True)).get_bytes_as('lines')
1091
output_texts.add_lines(key, parent_keys, text_lines,
1092
random_id=True, check_content=False)
1093
# 5) check that nothing inserted has a reference outside the keyspace.
1094
missing_text_keys = self.new_pack.text_index._external_references()
1095
if missing_text_keys:
1096
raise errors.BzrCheckError('Reference to missing compression parents %r'
1097
% (missing_text_keys,))
1098
self._log_copied_texts()
1100
def _use_pack(self, new_pack):
1101
"""Override _use_pack to check for reconcile having changed content."""
1102
# XXX: we might be better checking this at the copy time.
1103
original_inventory_keys = set()
1104
inv_index = self._pack_collection.inventory_index.combined_index
1105
for entry in inv_index.iter_all_entries():
1106
original_inventory_keys.add(entry[1])
1107
new_inventory_keys = set()
1108
for entry in new_pack.inventory_index.iter_all_entries():
1109
new_inventory_keys.add(entry[1])
1110
if new_inventory_keys != original_inventory_keys:
1111
self._data_changed = True
1112
return new_pack.data_inserted() and self._data_changed
1115
class OptimisingKnitPacker(KnitPacker):
1116
"""A packer which spends more time to create better disk layouts."""
1118
def _revision_node_readv(self, revision_nodes):
1119
"""Return the total revisions and the readv's to issue.
1121
This sort places revisions in topological order with the ancestors
1124
:param revision_nodes: The revision index contents for the packs being
1125
incorporated into the new pack.
1126
:return: As per _least_readv_node_readv.
1128
# build an ancestors dict
1131
for index, key, value, references in revision_nodes:
1132
ancestors[key] = references[0]
1133
by_key[key] = (index, value, references)
1134
order = tsort.topo_sort(ancestors)
1136
# Single IO is pathological, but it will work as a starting point.
1138
for key in reversed(order):
1139
index, value, references = by_key[key]
1140
# ---- KnitGraphIndex.get_position
1141
bits = value[1:].split(b' ')
1142
offset, length = int(bits[0]), int(bits[1])
1144
(index, [(offset, length)], [(key, value[0:1], references)]))
1145
# TODO: combine requests in the same index that are in ascending order.
1146
return total, requests
1148
def open_pack(self):
1149
"""Open a pack for the pack we are creating."""
1150
new_pack = super(OptimisingKnitPacker, self).open_pack()
1151
# Turn on the optimization flags for all the index builders.
1152
new_pack.revision_index.set_optimize(for_size=True)
1153
new_pack.inventory_index.set_optimize(for_size=True)
1154
new_pack.text_index.set_optimize(for_size=True)
1155
new_pack.signature_index.set_optimize(for_size=True)
1159
class KnitRepositoryPackCollection(RepositoryPackCollection):
1160
"""A knit pack collection."""
1162
pack_factory = NewPack
1163
resumed_pack_factory = ResumedPack
1164
normal_packer_class = KnitPacker
1165
optimising_packer_class = OptimisingKnitPacker