1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from __future__ import absolute_import
21
from ..lazy_import import lazy_import
22
lazy_import(globals(), """
32
revision as _mod_revision,
40
from breezy.knit import (
53
GraphIndexPrefixAdapter,
56
from .knitrepo import (
59
from .pack_repo import (
67
PackRootCommitBuilder,
68
RepositoryPackCollection,
70
from ..sixish import (
74
from ..vf_repository import (
79
class KnitPackRepository(PackRepository, KnitRepository):
81
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
83
PackRepository.__init__(self, _format, a_bzrdir, control_files,
84
_commit_builder_class, _serializer)
85
if self._format.supports_chks:
86
raise AssertionError("chk not supported")
87
index_transport = self._transport.clone('indices')
88
self._pack_collection = KnitRepositoryPackCollection(self,
91
self._transport.clone('upload'),
92
self._transport.clone('packs'),
93
_format.index_builder_class,
97
self.inventories = KnitVersionedFiles(
98
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
99
add_callback=self._pack_collection.inventory_index.add_callback,
100
deltas=True, parents=True, is_locked=self.is_locked),
101
data_access=self._pack_collection.inventory_index.data_access,
103
self.revisions = KnitVersionedFiles(
104
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
105
add_callback=self._pack_collection.revision_index.add_callback,
106
deltas=False, parents=True, is_locked=self.is_locked,
107
track_external_parent_refs=True),
108
data_access=self._pack_collection.revision_index.data_access,
110
self.signatures = KnitVersionedFiles(
111
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
112
add_callback=self._pack_collection.signature_index.add_callback,
113
deltas=False, parents=False, is_locked=self.is_locked),
114
data_access=self._pack_collection.signature_index.data_access,
116
self.texts = KnitVersionedFiles(
117
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
118
add_callback=self._pack_collection.text_index.add_callback,
119
deltas=True, parents=True, is_locked=self.is_locked),
120
data_access=self._pack_collection.text_index.data_access,
122
self.chk_bytes = None
123
# True when the repository object is 'write locked' (as opposed to the
124
# physical lock only taken out around changes to the pack-names list.)
125
# Another way to represent this would be a decorator around the control
126
# files object that presents logical locks as physical ones - if this
127
# gets ugly consider that alternative design. RBC 20071011
128
self._write_lock_count = 0
129
self._transaction = None
131
self._reconcile_does_inventory_gc = True
132
self._reconcile_fixes_text_parents = True
133
self._reconcile_backsup_inventory = False
135
def _get_source(self, to_format):
136
if to_format.network_name() == self._format.network_name():
137
return KnitPackStreamSource(self, to_format)
138
return PackRepository._get_source(self, to_format)
140
def _reconcile_pack(self, collection, packs, extension, revs, pb):
141
packer = KnitReconcilePacker(collection, packs, extension, revs)
142
return packer.pack(pb)
145
class RepositoryFormatKnitPack1(RepositoryFormatPack):
146
"""A no-subtrees parameterized Pack repository.
148
This format was introduced in 0.92.
151
repository_class = KnitPackRepository
152
_commit_builder_class = PackCommitBuilder
154
def _serializer(self):
155
return xml5.serializer_v5
156
# What index classes to use
157
index_builder_class = InMemoryGraphIndex
158
index_class = GraphIndex
160
def _get_matching_bzrdir(self):
161
return controldir.format_registry.make_bzrdir('pack-0.92')
163
def _ignore_setting_bzrdir(self, format):
166
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
169
def get_format_string(cls):
170
"""See RepositoryFormat.get_format_string()."""
171
return "Bazaar pack repository format 1 (needs bzr 0.92)\n"
173
def get_format_description(self):
174
"""See RepositoryFormat.get_format_description()."""
175
return "Packs containing knits without subtree support"
178
class RepositoryFormatKnitPack3(RepositoryFormatPack):
179
"""A subtrees parameterized Pack repository.
181
This repository format uses the xml7 serializer to get:
182
- support for recording full info about the tree root
183
- support for recording tree-references
185
This format was introduced in 0.92.
188
repository_class = KnitPackRepository
189
_commit_builder_class = PackRootCommitBuilder
190
rich_root_data = True
192
supports_tree_reference = True
194
def _serializer(self):
195
return xml7.serializer_v7
196
# What index classes to use
197
index_builder_class = InMemoryGraphIndex
198
index_class = GraphIndex
200
def _get_matching_bzrdir(self):
201
return controldir.format_registry.make_bzrdir(
204
def _ignore_setting_bzrdir(self, format):
207
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
210
def get_format_string(cls):
211
"""See RepositoryFormat.get_format_string()."""
212
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
214
def get_format_description(self):
215
"""See RepositoryFormat.get_format_description()."""
216
return "Packs containing knits with subtree support\n"
219
class RepositoryFormatKnitPack4(RepositoryFormatPack):
220
"""A rich-root, no subtrees parameterized Pack repository.
222
This repository format uses the xml6 serializer to get:
223
- support for recording full info about the tree root
225
This format was introduced in 1.0.
228
repository_class = KnitPackRepository
229
_commit_builder_class = PackRootCommitBuilder
230
rich_root_data = True
231
supports_tree_reference = False
233
def _serializer(self):
234
return xml6.serializer_v6
235
# What index classes to use
236
index_builder_class = InMemoryGraphIndex
237
index_class = GraphIndex
239
def _get_matching_bzrdir(self):
240
return controldir.format_registry.make_bzrdir(
243
def _ignore_setting_bzrdir(self, format):
246
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
249
def get_format_string(cls):
250
"""See RepositoryFormat.get_format_string()."""
251
return ("Bazaar pack repository format 1 with rich root"
252
" (needs bzr 1.0)\n")
254
def get_format_description(self):
255
"""See RepositoryFormat.get_format_description()."""
256
return "Packs containing knits with rich root support\n"
259
class RepositoryFormatKnitPack5(RepositoryFormatPack):
260
"""Repository that supports external references to allow stacking.
264
Supports external lookups, which results in non-truncated ghosts after
265
reconcile compared to pack-0.92 formats.
268
repository_class = KnitPackRepository
269
_commit_builder_class = PackCommitBuilder
270
supports_external_lookups = True
271
# What index classes to use
272
index_builder_class = InMemoryGraphIndex
273
index_class = GraphIndex
276
def _serializer(self):
277
return xml5.serializer_v5
279
def _get_matching_bzrdir(self):
280
return controldir.format_registry.make_bzrdir('1.6')
282
def _ignore_setting_bzrdir(self, format):
285
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
288
def get_format_string(cls):
289
"""See RepositoryFormat.get_format_string()."""
290
return "Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
292
def get_format_description(self):
293
"""See RepositoryFormat.get_format_description()."""
294
return "Packs 5 (adds stacking support, requires bzr 1.6)"
297
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
298
"""A repository with rich roots and stacking.
300
New in release 1.6.1.
302
Supports stacking on other repositories, allowing data to be accessed
303
without being stored locally.
306
repository_class = KnitPackRepository
307
_commit_builder_class = PackRootCommitBuilder
308
rich_root_data = True
309
supports_tree_reference = False # no subtrees
310
supports_external_lookups = True
311
# What index classes to use
312
index_builder_class = InMemoryGraphIndex
313
index_class = GraphIndex
316
def _serializer(self):
317
return xml6.serializer_v6
319
def _get_matching_bzrdir(self):
320
return controldir.format_registry.make_bzrdir(
323
def _ignore_setting_bzrdir(self, format):
326
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
329
def get_format_string(cls):
330
"""See RepositoryFormat.get_format_string()."""
331
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
333
def get_format_description(self):
334
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
337
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
338
"""A repository with rich roots and external references.
342
Supports external lookups, which results in non-truncated ghosts after
343
reconcile compared to pack-0.92 formats.
345
This format was deprecated because the serializer it uses accidentally
346
supported subtrees, when the format was not intended to. This meant that
347
someone could accidentally fetch from an incorrect repository.
350
repository_class = KnitPackRepository
351
_commit_builder_class = PackRootCommitBuilder
352
rich_root_data = True
353
supports_tree_reference = False # no subtrees
355
supports_external_lookups = True
356
# What index classes to use
357
index_builder_class = InMemoryGraphIndex
358
index_class = GraphIndex
361
def _serializer(self):
362
return xml7.serializer_v7
364
def _get_matching_bzrdir(self):
365
matching = controldir.format_registry.make_bzrdir(
367
matching.repository_format = self
370
def _ignore_setting_bzrdir(self, format):
373
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
376
def get_format_string(cls):
377
"""See RepositoryFormat.get_format_string()."""
378
return "Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
380
def get_format_description(self):
381
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
384
def is_deprecated(self):
388
class RepositoryFormatKnitPack6(RepositoryFormatPack):
389
"""A repository with stacking and btree indexes,
390
without rich roots or subtrees.
392
This is equivalent to pack-1.6 with B+Tree indices.
395
repository_class = KnitPackRepository
396
_commit_builder_class = PackCommitBuilder
397
supports_external_lookups = True
398
# What index classes to use
399
index_builder_class = btree_index.BTreeBuilder
400
index_class = btree_index.BTreeGraphIndex
403
def _serializer(self):
404
return xml5.serializer_v5
406
def _get_matching_bzrdir(self):
407
return controldir.format_registry.make_bzrdir('1.9')
409
def _ignore_setting_bzrdir(self, format):
412
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
415
def get_format_string(cls):
416
"""See RepositoryFormat.get_format_string()."""
417
return "Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
419
def get_format_description(self):
420
"""See RepositoryFormat.get_format_description()."""
421
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
424
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
425
"""A repository with rich roots, no subtrees, stacking and btree indexes.
427
1.6-rich-root with B+Tree indices.
430
repository_class = KnitPackRepository
431
_commit_builder_class = PackRootCommitBuilder
432
rich_root_data = True
433
supports_tree_reference = False # no subtrees
434
supports_external_lookups = True
435
# What index classes to use
436
index_builder_class = btree_index.BTreeBuilder
437
index_class = btree_index.BTreeGraphIndex
440
def _serializer(self):
441
return xml6.serializer_v6
443
def _get_matching_bzrdir(self):
444
return controldir.format_registry.make_bzrdir(
447
def _ignore_setting_bzrdir(self, format):
450
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
453
def get_format_string(cls):
454
"""See RepositoryFormat.get_format_string()."""
455
return "Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
457
def get_format_description(self):
458
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
461
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
462
"""A subtrees development repository.
464
This format should be retained in 2.3, to provide an upgrade path from this
465
to RepositoryFormat2aSubtree. It can be removed in later releases.
467
1.6.1-subtree[as it might have been] with B+Tree indices.
470
repository_class = KnitPackRepository
471
_commit_builder_class = PackRootCommitBuilder
472
rich_root_data = True
474
supports_tree_reference = True
475
supports_external_lookups = True
476
# What index classes to use
477
index_builder_class = btree_index.BTreeBuilder
478
index_class = btree_index.BTreeGraphIndex
481
def _serializer(self):
482
return xml7.serializer_v7
484
def _get_matching_bzrdir(self):
485
return controldir.format_registry.make_bzrdir(
486
'development5-subtree')
488
def _ignore_setting_bzrdir(self, format):
491
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
494
def get_format_string(cls):
495
"""See RepositoryFormat.get_format_string()."""
496
return ("Bazaar development format 2 with subtree support "
497
"(needs bzr.dev from before 1.8)\n")
499
def get_format_description(self):
500
"""See RepositoryFormat.get_format_description()."""
501
return ("Development repository format, currently the same as "
502
"1.6.1-subtree with B+Tree indices.\n")
505
class KnitPackStreamSource(StreamSource):
506
"""A StreamSource used to transfer data between same-format KnitPack repos.
509
1) Same serialization format for all objects
510
2) Same root information
511
3) XML format inventories
512
4) Atomic inserts (so we can stream inventory texts before text
517
def __init__(self, from_repository, to_format):
518
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
519
self._text_keys = None
520
self._text_fetch_order = 'unordered'
522
def _get_filtered_inv_stream(self, revision_ids):
523
from_repo = self.from_repository
524
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
525
parent_keys = [(p,) for p in parent_ids]
526
find_text_keys = from_repo._serializer._find_text_key_references
527
parent_text_keys = set(find_text_keys(
528
from_repo._inventory_xml_lines_for_keys(parent_keys)))
529
content_text_keys = set()
530
knit = KnitVersionedFiles(None, None)
531
factory = KnitPlainFactory()
532
def find_text_keys_from_content(record):
533
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
534
raise ValueError("Unknown content storage kind for"
535
" inventory text: %s" % (record.storage_kind,))
536
# It's a knit record, it has a _raw_record field (even if it was
537
# reconstituted from a network stream).
538
raw_data = record._raw_record
539
# read the entire thing
540
revision_id = record.key[-1]
541
content, _ = knit._parse_record(revision_id, raw_data)
542
if record.storage_kind == 'knit-delta-gz':
543
line_iterator = factory.get_linedelta_content(content)
544
elif record.storage_kind == 'knit-ft-gz':
545
line_iterator = factory.get_fulltext_content(content)
546
content_text_keys.update(find_text_keys(
547
[(line, revision_id) for line in line_iterator]))
548
revision_keys = [(r,) for r in revision_ids]
549
def _filtered_inv_stream():
550
source_vf = from_repo.inventories
551
stream = source_vf.get_record_stream(revision_keys,
553
for record in stream:
554
if record.storage_kind == 'absent':
555
raise errors.NoSuchRevision(from_repo, record.key)
556
find_text_keys_from_content(record)
558
self._text_keys = content_text_keys - parent_text_keys
559
return ('inventories', _filtered_inv_stream())
561
def _get_text_stream(self):
562
# Note: We know we don't have to handle adding root keys, because both
563
# the source and target are the identical network name.
564
text_stream = self.from_repository.texts.get_record_stream(
565
self._text_keys, self._text_fetch_order, False)
566
return ('texts', text_stream)
568
def get_stream(self, search):
569
revision_ids = search.get_keys()
570
for stream_info in self._fetch_revision_texts(revision_ids):
572
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
573
yield self._get_filtered_inv_stream(revision_ids)
574
yield self._get_text_stream()
577
class KnitPacker(Packer):
578
"""Packer that works with knit packs."""
580
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
582
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
583
revision_ids=revision_ids,
584
reload_func=reload_func)
586
def _pack_map_and_index_list(self, index_attribute):
587
"""Convert a list of packs to an index pack map and index list.
589
:param index_attribute: The attribute that the desired index is found
591
:return: A tuple (map, list) where map contains the dict from
592
index:pack_tuple, and list contains the indices in the preferred
597
for pack_obj in self.packs:
598
index = getattr(pack_obj, index_attribute)
599
indices.append(index)
600
pack_map[index] = pack_obj
601
return pack_map, indices
603
def _index_contents(self, indices, key_filter=None):
604
"""Get an iterable of the index contents from a pack_map.
606
:param indices: The list of indices to query
607
:param key_filter: An optional filter to limit the keys returned.
609
all_index = CombinedGraphIndex(indices)
610
if key_filter is None:
611
return all_index.iter_all_entries()
613
return all_index.iter_entries(key_filter)
615
def _copy_nodes(self, nodes, index_map, writer, write_index,
617
"""Copy knit nodes between packs with no graph references.
619
:param output_lines: Output full texts of copied items.
621
pb = ui.ui_factory.nested_progress_bar()
623
return self._do_copy_nodes(nodes, index_map, writer,
624
write_index, pb, output_lines=output_lines)
628
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
630
# for record verification
631
knit = KnitVersionedFiles(None, None)
632
# plan a readv on each source pack:
634
nodes = sorted(nodes)
635
# how to map this into knit.py - or knit.py into this?
636
# we don't want the typical knit logic, we want grouping by pack
637
# at this point - perhaps a helper library for the following code
638
# duplication points?
640
for index, key, value in nodes:
641
if index not in request_groups:
642
request_groups[index] = []
643
request_groups[index].append((key, value))
645
pb.update("Copied record", record_index, len(nodes))
646
for index, items in viewitems(request_groups):
647
pack_readv_requests = []
648
for key, value in items:
649
# ---- KnitGraphIndex.get_position
650
bits = value[1:].split(' ')
651
offset, length = int(bits[0]), int(bits[1])
652
pack_readv_requests.append((offset, length, (key, value[0])))
653
# linear scan up the pack
654
pack_readv_requests.sort()
656
pack_obj = index_map[index]
657
transport, path = pack_obj.access_tuple()
659
reader = pack.make_readv_reader(transport, path,
660
[offset[0:2] for offset in pack_readv_requests])
661
except errors.NoSuchFile:
662
if self._reload_func is not None:
665
for (names, read_func), (_1, _2, (key, eol_flag)) in zip(
666
reader.iter_records(), pack_readv_requests):
667
raw_data = read_func(None)
668
# check the header only
669
if output_lines is not None:
670
output_lines(knit._parse_record(key[-1], raw_data)[0])
672
df, _ = knit._parse_record_header(key, raw_data)
674
pos, size = writer.add_bytes_record(raw_data, names)
675
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
676
pb.update("Copied record", record_index)
679
def _copy_nodes_graph(self, index_map, writer, write_index,
680
readv_group_iter, total_items, output_lines=False):
681
"""Copy knit nodes between packs.
683
:param output_lines: Return lines present in the copied data as
684
an iterator of line,version_id.
686
pb = ui.ui_factory.nested_progress_bar()
688
for result in self._do_copy_nodes_graph(index_map, writer,
689
write_index, output_lines, pb, readv_group_iter, total_items):
692
# Python 2.4 does not permit try:finally: in a generator.
698
def _do_copy_nodes_graph(self, index_map, writer, write_index,
699
output_lines, pb, readv_group_iter, total_items):
700
# for record verification
701
knit = KnitVersionedFiles(None, None)
702
# for line extraction when requested (inventories only)
704
factory = KnitPlainFactory()
706
pb.update("Copied record", record_index, total_items)
707
for index, readv_vector, node_vector in readv_group_iter:
709
pack_obj = index_map[index]
710
transport, path = pack_obj.access_tuple()
712
reader = pack.make_readv_reader(transport, path, readv_vector)
713
except errors.NoSuchFile:
714
if self._reload_func is not None:
717
for (names, read_func), (key, eol_flag, references) in zip(
718
reader.iter_records(), node_vector):
719
raw_data = read_func(None)
721
# read the entire thing
722
content, _ = knit._parse_record(key[-1], raw_data)
723
if len(references[-1]) == 0:
724
line_iterator = factory.get_fulltext_content(content)
726
line_iterator = factory.get_linedelta_content(content)
727
for line in line_iterator:
730
# check the header only
731
df, _ = knit._parse_record_header(key, raw_data)
733
pos, size = writer.add_bytes_record(raw_data, names)
734
write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
735
pb.update("Copied record", record_index)
738
def _process_inventory_lines(self, inv_lines):
739
"""Use up the inv_lines generator and setup a text key filter."""
740
repo = self._pack_collection.repo
741
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
742
inv_lines, self.revision_keys)
744
for fileid, file_revids in viewitems(fileid_revisions):
745
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
746
self._text_filter = text_filter
748
def _copy_inventory_texts(self):
749
# select inventory keys
750
inv_keys = self._revision_keys # currently the same keyspace, and note that
751
# querying for keys here could introduce a bug where an inventory item
752
# is missed, so do not change it to query separately without cross
753
# checking like the text key check below.
754
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
756
inv_nodes = self._index_contents(inventory_indices, inv_keys)
757
# copy inventory keys and adjust values
758
# XXX: Should be a helper function to allow different inv representation
760
self.pb.update("Copying inventory texts", 2)
761
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
762
# Only grab the output lines if we will be processing them
763
output_lines = bool(self.revision_ids)
764
inv_lines = self._copy_nodes_graph(inventory_index_map,
765
self.new_pack._writer, self.new_pack.inventory_index,
766
readv_group_iter, total_items, output_lines=output_lines)
767
if self.revision_ids:
768
self._process_inventory_lines(inv_lines)
770
# eat the iterator to cause it to execute.
772
self._text_filter = None
773
if 'pack' in debug.debug_flags:
774
trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
775
time.ctime(), self._pack_collection._upload_transport.base,
776
self.new_pack.random_name,
777
self.new_pack.inventory_index.key_count(),
778
time.time() - self.new_pack.start_time)
780
def _update_pack_order(self, entries, index_to_pack_map):
781
"""Determine how we want our packs to be ordered.
783
This changes the sort order of the self.packs list so that packs unused
784
by 'entries' will be at the end of the list, so that future requests
785
can avoid probing them. Used packs will be at the front of the
786
self.packs list, in the order of their first use in 'entries'.
788
:param entries: A list of (index, ...) tuples
789
:param index_to_pack_map: A mapping from index objects to pack objects.
793
for entry in entries:
795
if index not in seen_indexes:
796
packs.append(index_to_pack_map[index])
797
seen_indexes.add(index)
798
if len(packs) == len(self.packs):
799
if 'pack' in debug.debug_flags:
800
trace.mutter('Not changing pack list, all packs used.')
802
seen_packs = set(packs)
803
for pack in self.packs:
804
if pack not in seen_packs:
807
if 'pack' in debug.debug_flags:
808
old_names = [p.access_tuple()[1] for p in self.packs]
809
new_names = [p.access_tuple()[1] for p in packs]
810
trace.mutter('Reordering packs\nfrom: %s\n to: %s',
811
old_names, new_names)
814
def _copy_revision_texts(self):
816
if self.revision_ids:
817
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
820
# select revision keys
821
revision_index_map, revision_indices = self._pack_map_and_index_list(
823
revision_nodes = self._index_contents(revision_indices, revision_keys)
824
revision_nodes = list(revision_nodes)
825
self._update_pack_order(revision_nodes, revision_index_map)
826
# copy revision keys and adjust values
827
self.pb.update("Copying revision texts", 1)
828
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
829
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
830
self.new_pack.revision_index, readv_group_iter, total_items))
831
if 'pack' in debug.debug_flags:
832
trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
833
time.ctime(), self._pack_collection._upload_transport.base,
834
self.new_pack.random_name,
835
self.new_pack.revision_index.key_count(),
836
time.time() - self.new_pack.start_time)
837
self._revision_keys = revision_keys
839
def _get_text_nodes(self):
840
text_index_map, text_indices = self._pack_map_and_index_list(
842
return text_index_map, self._index_contents(text_indices,
845
def _copy_text_texts(self):
847
text_index_map, text_nodes = self._get_text_nodes()
848
if self._text_filter is not None:
849
# We could return the keys copied as part of the return value from
850
# _copy_nodes_graph but this doesn't work all that well with the
851
# need to get line output too, so we check separately, and as we're
852
# going to buffer everything anyway, we check beforehand, which
853
# saves reading knit data over the wire when we know there are
855
text_nodes = set(text_nodes)
856
present_text_keys = set(_node[1] for _node in text_nodes)
857
missing_text_keys = set(self._text_filter) - present_text_keys
858
if missing_text_keys:
859
# TODO: raise a specific error that can handle many missing
861
trace.mutter("missing keys during fetch: %r", missing_text_keys)
862
a_missing_key = missing_text_keys.pop()
863
raise errors.RevisionNotPresent(a_missing_key[1],
865
# copy text keys and adjust values
866
self.pb.update("Copying content texts", 3)
867
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
868
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
869
self.new_pack.text_index, readv_group_iter, total_items))
870
self._log_copied_texts()
872
def _create_pack_from_packs(self):
873
self.pb.update("Opening pack", 0, 5)
874
self.new_pack = self.open_pack()
875
new_pack = self.new_pack
876
# buffer data - we won't be reading-back during the pack creation and
877
# this makes a significant difference on sftp pushes.
878
new_pack.set_write_cache_size(1024*1024)
879
if 'pack' in debug.debug_flags:
880
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
881
for a_pack in self.packs]
882
if self.revision_ids is not None:
883
rev_count = len(self.revision_ids)
886
trace.mutter('%s: create_pack: creating pack from source packs: '
887
'%s%s %s revisions wanted %s t=0',
888
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
889
plain_pack_list, rev_count)
890
self._copy_revision_texts()
891
self._copy_inventory_texts()
892
self._copy_text_texts()
893
# select signature keys
894
signature_filter = self._revision_keys # same keyspace
895
signature_index_map, signature_indices = self._pack_map_and_index_list(
897
signature_nodes = self._index_contents(signature_indices,
899
# copy signature keys and adjust values
900
self.pb.update("Copying signature texts", 4)
901
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
902
new_pack.signature_index)
903
if 'pack' in debug.debug_flags:
904
trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
905
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
906
new_pack.signature_index.key_count(),
907
time.time() - new_pack.start_time)
908
new_pack._check_references()
909
if not self._use_pack(new_pack):
912
self.pb.update("Finishing pack", 5)
914
self._pack_collection.allocate(new_pack)
917
def _least_readv_node_readv(self, nodes):
918
"""Generate request groups for nodes using the least readv's.
920
:param nodes: An iterable of graph index nodes.
921
:return: Total node count and an iterator of the data needed to perform
922
readvs to obtain the data for nodes. Each item yielded by the
923
iterator is a tuple with:
924
index, readv_vector, node_vector. readv_vector is a list ready to
925
hand to the transport readv method, and node_vector is a list of
926
(key, eol_flag, references) for the node retrieved by the
927
matching readv_vector.
929
# group by pack so we do one readv per pack
930
nodes = sorted(nodes)
933
for index, key, value, references in nodes:
934
if index not in request_groups:
935
request_groups[index] = []
936
request_groups[index].append((key, value, references))
938
for index, items in viewitems(request_groups):
939
pack_readv_requests = []
940
for key, value, references in items:
941
# ---- KnitGraphIndex.get_position
942
bits = value[1:].split(' ')
943
offset, length = int(bits[0]), int(bits[1])
944
pack_readv_requests.append(
945
((offset, length), (key, value[0], references)))
946
# linear scan up the pack to maximum range combining.
947
pack_readv_requests.sort()
948
# split out the readv and the node data.
949
pack_readv = [readv for readv, node in pack_readv_requests]
950
node_vector = [node for readv, node in pack_readv_requests]
951
result.append((index, pack_readv, node_vector))
954
def _revision_node_readv(self, revision_nodes):
955
"""Return the total revisions and the readv's to issue.
957
:param revision_nodes: The revision index contents for the packs being
958
incorporated into the new pack.
959
:return: As per _least_readv_node_readv.
961
return self._least_readv_node_readv(revision_nodes)
964
class KnitReconcilePacker(KnitPacker):
965
"""A packer which regenerates indices etc as it copies.
967
This is used by ``brz reconcile`` to cause parent text pointers to be
971
def __init__(self, *args, **kwargs):
972
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
973
self._data_changed = False
975
def _process_inventory_lines(self, inv_lines):
976
"""Generate a text key reference map rather for reconciling with."""
977
repo = self._pack_collection.repo
978
refs = repo._serializer._find_text_key_references(inv_lines)
979
self._text_refs = refs
980
# during reconcile we:
981
# - convert unreferenced texts to full texts
982
# - correct texts which reference a text not copied to be full texts
983
# - copy all others as-is but with corrected parents.
984
# - so at this point we don't know enough to decide what becomes a full
986
self._text_filter = None
988
def _copy_text_texts(self):
989
"""generate what texts we should have and then copy."""
990
self.pb.update("Copying content texts", 3)
991
# we have three major tasks here:
992
# 1) generate the ideal index
993
repo = self._pack_collection.repo
994
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
996
self.new_pack.revision_index.iter_all_entries()])
997
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
998
# 2) generate a text_nodes list that contains all the deltas that can
999
# be used as-is, with corrected parents.
1002
discarded_nodes = []
1003
NULL_REVISION = _mod_revision.NULL_REVISION
1004
text_index_map, text_nodes = self._get_text_nodes()
1005
for node in text_nodes:
1011
ideal_parents = tuple(ideal_index[node[1]])
1013
discarded_nodes.append(node)
1014
self._data_changed = True
1016
if ideal_parents == (NULL_REVISION,):
1018
if ideal_parents == node[3][0]:
1020
ok_nodes.append(node)
1021
elif ideal_parents[0:1] == node[3][0][0:1]:
1022
# the left most parent is the same, or there are no parents
1023
# today. Either way, we can preserve the representation as
1024
# long as we change the refs to be inserted.
1025
self._data_changed = True
1026
ok_nodes.append((node[0], node[1], node[2],
1027
(ideal_parents, node[3][1])))
1028
self._data_changed = True
1030
# Reinsert this text completely
1031
bad_texts.append((node[1], ideal_parents))
1032
self._data_changed = True
1033
# we're finished with some data.
1036
# 3) bulk copy the ok data
1037
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1038
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1039
self.new_pack.text_index, readv_group_iter, total_items))
1040
# 4) adhoc copy all the other texts.
1041
# We have to topologically insert all texts otherwise we can fail to
1042
# reconcile when parts of a single delta chain are preserved intact,
1043
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1044
# reinserted, and if d3 has incorrect parents it will also be
1045
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1046
# copied), so we will try to delta, but d2 is not currently able to be
1047
# extracted because its basis d1 is not present. Topologically sorting
1048
# addresses this. The following generates a sort for all the texts that
1049
# are being inserted without having to reference the entire text key
1050
# space (we only topo sort the revisions, which is smaller).
1051
topo_order = tsort.topo_sort(ancestors)
1052
rev_order = dict(zip(topo_order, range(len(topo_order))))
1053
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1054
transaction = repo.get_transaction()
1055
file_id_index = GraphIndexPrefixAdapter(
1056
self.new_pack.text_index,
1058
add_nodes_callback=self.new_pack.text_index.add_nodes)
1059
data_access = _DirectPackAccess(
1060
{self.new_pack.text_index:self.new_pack.access_tuple()})
1061
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1062
self.new_pack.access_tuple())
1063
output_texts = KnitVersionedFiles(
1064
_KnitGraphIndex(self.new_pack.text_index,
1065
add_callback=self.new_pack.text_index.add_nodes,
1066
deltas=True, parents=True, is_locked=repo.is_locked),
1067
data_access=data_access, max_delta_chain=200)
1068
for key, parent_keys in bad_texts:
1069
# We refer to the new pack to delta data being output.
1070
# A possible improvement would be to catch errors on short reads
1071
# and only flush then.
1072
self.new_pack.flush()
1074
for parent_key in parent_keys:
1075
if parent_key[0] != key[0]:
1076
# Graph parents must match the fileid
1077
raise errors.BzrError('Mismatched key parent %r:%r' %
1079
parents.append(parent_key[1])
1080
text_lines = osutils.split_lines(repo.texts.get_record_stream(
1081
[key], 'unordered', True).next().get_bytes_as('fulltext'))
1082
output_texts.add_lines(key, parent_keys, text_lines,
1083
random_id=True, check_content=False)
1084
# 5) check that nothing inserted has a reference outside the keyspace.
1085
missing_text_keys = self.new_pack.text_index._external_references()
1086
if missing_text_keys:
1087
raise errors.BzrCheckError('Reference to missing compression parents %r'
1088
% (missing_text_keys,))
1089
self._log_copied_texts()
1091
def _use_pack(self, new_pack):
1092
"""Override _use_pack to check for reconcile having changed content."""
1093
# XXX: we might be better checking this at the copy time.
1094
original_inventory_keys = set()
1095
inv_index = self._pack_collection.inventory_index.combined_index
1096
for entry in inv_index.iter_all_entries():
1097
original_inventory_keys.add(entry[1])
1098
new_inventory_keys = set()
1099
for entry in new_pack.inventory_index.iter_all_entries():
1100
new_inventory_keys.add(entry[1])
1101
if new_inventory_keys != original_inventory_keys:
1102
self._data_changed = True
1103
return new_pack.data_inserted() and self._data_changed
1106
class OptimisingKnitPacker(KnitPacker):
1107
"""A packer which spends more time to create better disk layouts."""
1109
def _revision_node_readv(self, revision_nodes):
1110
"""Return the total revisions and the readv's to issue.
1112
This sort places revisions in topological order with the ancestors
1115
:param revision_nodes: The revision index contents for the packs being
1116
incorporated into the new pack.
1117
:return: As per _least_readv_node_readv.
1119
# build an ancestors dict
1122
for index, key, value, references in revision_nodes:
1123
ancestors[key] = references[0]
1124
by_key[key] = (index, value, references)
1125
order = tsort.topo_sort(ancestors)
1127
# Single IO is pathological, but it will work as a starting point.
1129
for key in reversed(order):
1130
index, value, references = by_key[key]
1131
# ---- KnitGraphIndex.get_position
1132
bits = value[1:].split(' ')
1133
offset, length = int(bits[0]), int(bits[1])
1135
(index, [(offset, length)], [(key, value[0], references)]))
1136
# TODO: combine requests in the same index that are in ascending order.
1137
return total, requests
1139
def open_pack(self):
1140
"""Open a pack for the pack we are creating."""
1141
new_pack = super(OptimisingKnitPacker, self).open_pack()
1142
# Turn on the optimization flags for all the index builders.
1143
new_pack.revision_index.set_optimize(for_size=True)
1144
new_pack.inventory_index.set_optimize(for_size=True)
1145
new_pack.text_index.set_optimize(for_size=True)
1146
new_pack.signature_index.set_optimize(for_size=True)
1150
class KnitRepositoryPackCollection(RepositoryPackCollection):
1151
"""A knit pack collection."""
1153
pack_factory = NewPack
1154
resumed_pack_factory = ResumedPack
1155
normal_packer_class = KnitPacker
1156
optimising_packer_class = OptimisingKnitPacker