1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from __future__ import absolute_import
21
from ..lazy_import import lazy_import
22
lazy_import(globals(), """
30
revision as _mod_revision,
35
from breezy.bzr import (
42
from breezy.bzr.knit import (
52
from ..bzr.index import (
55
GraphIndexPrefixAdapter,
58
from .knitrepo import (
61
from .pack_repo import (
69
RepositoryPackCollection,
71
from ..sixish import (
75
from ..bzr.vf_repository import (
80
class KnitPackRepository(PackRepository, KnitRepository):
82
def __init__(self, _format, a_controldir, control_files, _commit_builder_class,
84
PackRepository.__init__(self, _format, a_controldir, control_files,
85
_commit_builder_class, _serializer)
86
if self._format.supports_chks:
87
raise AssertionError("chk not supported")
88
index_transport = self._transport.clone('indices')
89
self._pack_collection = KnitRepositoryPackCollection(self,
92
self._transport.clone('upload'),
93
self._transport.clone('packs'),
94
_format.index_builder_class,
98
self.inventories = KnitVersionedFiles(
99
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
100
add_callback=self._pack_collection.inventory_index.add_callback,
101
deltas=True, parents=True, is_locked=self.is_locked),
102
data_access=self._pack_collection.inventory_index.data_access,
104
self.revisions = KnitVersionedFiles(
105
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
106
add_callback=self._pack_collection.revision_index.add_callback,
107
deltas=False, parents=True, is_locked=self.is_locked,
108
track_external_parent_refs=True),
109
data_access=self._pack_collection.revision_index.data_access,
111
self.signatures = KnitVersionedFiles(
112
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
113
add_callback=self._pack_collection.signature_index.add_callback,
114
deltas=False, parents=False, is_locked=self.is_locked),
115
data_access=self._pack_collection.signature_index.data_access,
117
self.texts = KnitVersionedFiles(
118
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
119
add_callback=self._pack_collection.text_index.add_callback,
120
deltas=True, parents=True, is_locked=self.is_locked),
121
data_access=self._pack_collection.text_index.data_access,
123
self.chk_bytes = None
124
# True when the repository object is 'write locked' (as opposed to the
125
# physical lock only taken out around changes to the pack-names list.)
126
# Another way to represent this would be a decorator around the control
127
# files object that presents logical locks as physical ones - if this
128
# gets ugly consider that alternative design. RBC 20071011
129
self._write_lock_count = 0
130
self._transaction = None
132
self._reconcile_does_inventory_gc = True
133
self._reconcile_fixes_text_parents = True
134
self._reconcile_backsup_inventory = False
136
def _get_source(self, to_format):
137
if to_format.network_name() == self._format.network_name():
138
return KnitPackStreamSource(self, to_format)
139
return PackRepository._get_source(self, to_format)
141
def _reconcile_pack(self, collection, packs, extension, revs, pb):
142
packer = KnitReconcilePacker(collection, packs, extension, revs)
143
return packer.pack(pb)
146
class RepositoryFormatKnitPack1(RepositoryFormatPack):
147
"""A no-subtrees parameterized Pack repository.
149
This format was introduced in 0.92.
152
repository_class = KnitPackRepository
153
_commit_builder_class = PackCommitBuilder
155
def _serializer(self):
156
return xml5.serializer_v5
157
# What index classes to use
158
index_builder_class = InMemoryGraphIndex
159
index_class = GraphIndex
161
def _get_matching_bzrdir(self):
162
return controldir.format_registry.make_controldir('pack-0.92')
164
def _ignore_setting_bzrdir(self, format):
167
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
170
def get_format_string(cls):
171
"""See RepositoryFormat.get_format_string()."""
172
return b"Bazaar pack repository format 1 (needs bzr 0.92)\n"
174
def get_format_description(self):
175
"""See RepositoryFormat.get_format_description()."""
176
return "Packs containing knits without subtree support"
179
class RepositoryFormatKnitPack3(RepositoryFormatPack):
180
"""A subtrees parameterized Pack repository.
182
This repository format uses the xml7 serializer to get:
183
- support for recording full info about the tree root
184
- support for recording tree-references
186
This format was introduced in 0.92.
189
repository_class = KnitPackRepository
190
_commit_builder_class = PackCommitBuilder
191
rich_root_data = True
193
supports_tree_reference = True
195
def _serializer(self):
196
return xml7.serializer_v7
197
# What index classes to use
198
index_builder_class = InMemoryGraphIndex
199
index_class = GraphIndex
201
def _get_matching_bzrdir(self):
202
return controldir.format_registry.make_controldir(
205
def _ignore_setting_bzrdir(self, format):
208
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
211
def get_format_string(cls):
212
"""See RepositoryFormat.get_format_string()."""
213
return b"Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
215
def get_format_description(self):
216
"""See RepositoryFormat.get_format_description()."""
217
return "Packs containing knits with subtree support\n"
220
class RepositoryFormatKnitPack4(RepositoryFormatPack):
221
"""A rich-root, no subtrees parameterized Pack repository.
223
This repository format uses the xml6 serializer to get:
224
- support for recording full info about the tree root
226
This format was introduced in 1.0.
229
repository_class = KnitPackRepository
230
_commit_builder_class = PackCommitBuilder
231
rich_root_data = True
232
supports_tree_reference = False
234
def _serializer(self):
235
return xml6.serializer_v6
236
# What index classes to use
237
index_builder_class = InMemoryGraphIndex
238
index_class = GraphIndex
240
def _get_matching_bzrdir(self):
241
return controldir.format_registry.make_controldir(
244
def _ignore_setting_bzrdir(self, format):
247
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
250
def get_format_string(cls):
251
"""See RepositoryFormat.get_format_string()."""
252
return (b"Bazaar pack repository format 1 with rich root"
253
b" (needs bzr 1.0)\n")
255
def get_format_description(self):
256
"""See RepositoryFormat.get_format_description()."""
257
return "Packs containing knits with rich root support\n"
260
class RepositoryFormatKnitPack5(RepositoryFormatPack):
261
"""Repository that supports external references to allow stacking.
265
Supports external lookups, which results in non-truncated ghosts after
266
reconcile compared to pack-0.92 formats.
269
repository_class = KnitPackRepository
270
_commit_builder_class = PackCommitBuilder
271
supports_external_lookups = True
272
# What index classes to use
273
index_builder_class = InMemoryGraphIndex
274
index_class = GraphIndex
277
def _serializer(self):
278
return xml5.serializer_v5
280
def _get_matching_bzrdir(self):
281
return controldir.format_registry.make_controldir('1.6')
283
def _ignore_setting_bzrdir(self, format):
286
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
289
def get_format_string(cls):
290
"""See RepositoryFormat.get_format_string()."""
291
return b"Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
293
def get_format_description(self):
294
"""See RepositoryFormat.get_format_description()."""
295
return "Packs 5 (adds stacking support, requires bzr 1.6)"
298
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
299
"""A repository with rich roots and stacking.
301
New in release 1.6.1.
303
Supports stacking on other repositories, allowing data to be accessed
304
without being stored locally.
307
repository_class = KnitPackRepository
308
_commit_builder_class = PackCommitBuilder
309
rich_root_data = True
310
supports_tree_reference = False # no subtrees
311
supports_external_lookups = True
312
# What index classes to use
313
index_builder_class = InMemoryGraphIndex
314
index_class = GraphIndex
317
def _serializer(self):
318
return xml6.serializer_v6
320
def _get_matching_bzrdir(self):
321
return controldir.format_registry.make_controldir(
324
def _ignore_setting_bzrdir(self, format):
327
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
330
def get_format_string(cls):
331
"""See RepositoryFormat.get_format_string()."""
332
return b"Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
334
def get_format_description(self):
335
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
338
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
339
"""A repository with rich roots and external references.
343
Supports external lookups, which results in non-truncated ghosts after
344
reconcile compared to pack-0.92 formats.
346
This format was deprecated because the serializer it uses accidentally
347
supported subtrees, when the format was not intended to. This meant that
348
someone could accidentally fetch from an incorrect repository.
351
repository_class = KnitPackRepository
352
_commit_builder_class = PackCommitBuilder
353
rich_root_data = True
354
supports_tree_reference = False # no subtrees
356
supports_external_lookups = True
357
# What index classes to use
358
index_builder_class = InMemoryGraphIndex
359
index_class = GraphIndex
362
def _serializer(self):
363
return xml7.serializer_v7
365
def _get_matching_bzrdir(self):
366
matching = controldir.format_registry.make_controldir(
368
matching.repository_format = self
371
def _ignore_setting_bzrdir(self, format):
374
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
377
def get_format_string(cls):
378
"""See RepositoryFormat.get_format_string()."""
379
return b"Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
381
def get_format_description(self):
382
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
385
def is_deprecated(self):
389
class RepositoryFormatKnitPack6(RepositoryFormatPack):
390
"""A repository with stacking and btree indexes,
391
without rich roots or subtrees.
393
This is equivalent to pack-1.6 with B+Tree indices.
396
repository_class = KnitPackRepository
397
_commit_builder_class = PackCommitBuilder
398
supports_external_lookups = True
399
# What index classes to use
400
index_builder_class = btree_index.BTreeBuilder
401
index_class = btree_index.BTreeGraphIndex
404
def _serializer(self):
405
return xml5.serializer_v5
407
def _get_matching_bzrdir(self):
408
return controldir.format_registry.make_controldir('1.9')
410
def _ignore_setting_bzrdir(self, format):
413
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
416
def get_format_string(cls):
417
"""See RepositoryFormat.get_format_string()."""
418
return b"Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
420
def get_format_description(self):
421
"""See RepositoryFormat.get_format_description()."""
422
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
425
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
426
"""A repository with rich roots, no subtrees, stacking and btree indexes.
428
1.6-rich-root with B+Tree indices.
431
repository_class = KnitPackRepository
432
_commit_builder_class = PackCommitBuilder
433
rich_root_data = True
434
supports_tree_reference = False # no subtrees
435
supports_external_lookups = True
436
# What index classes to use
437
index_builder_class = btree_index.BTreeBuilder
438
index_class = btree_index.BTreeGraphIndex
441
def _serializer(self):
442
return xml6.serializer_v6
444
def _get_matching_bzrdir(self):
445
return controldir.format_registry.make_controldir(
448
def _ignore_setting_bzrdir(self, format):
451
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
454
def get_format_string(cls):
455
"""See RepositoryFormat.get_format_string()."""
456
return b"Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
458
def get_format_description(self):
459
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
462
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
463
"""A subtrees development repository.
465
This format should be retained in 2.3, to provide an upgrade path from this
466
to RepositoryFormat2aSubtree. It can be removed in later releases.
468
1.6.1-subtree[as it might have been] with B+Tree indices.
471
repository_class = KnitPackRepository
472
_commit_builder_class = PackCommitBuilder
473
rich_root_data = True
475
supports_tree_reference = True
476
supports_external_lookups = True
477
# What index classes to use
478
index_builder_class = btree_index.BTreeBuilder
479
index_class = btree_index.BTreeGraphIndex
482
def _serializer(self):
483
return xml7.serializer_v7
485
def _get_matching_bzrdir(self):
486
return controldir.format_registry.make_controldir(
487
'development5-subtree')
489
def _ignore_setting_bzrdir(self, format):
492
_matchingcontroldir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
495
def get_format_string(cls):
496
"""See RepositoryFormat.get_format_string()."""
497
return (b"Bazaar development format 2 with subtree support "
498
b"(needs bzr.dev from before 1.8)\n")
500
def get_format_description(self):
501
"""See RepositoryFormat.get_format_description()."""
502
return ("Development repository format, currently the same as "
503
"1.6.1-subtree with B+Tree indices.\n")
506
class KnitPackStreamSource(StreamSource):
507
"""A StreamSource used to transfer data between same-format KnitPack repos.
510
1) Same serialization format for all objects
511
2) Same root information
512
3) XML format inventories
513
4) Atomic inserts (so we can stream inventory texts before text
518
def __init__(self, from_repository, to_format):
519
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
520
self._text_keys = None
521
self._text_fetch_order = 'unordered'
523
def _get_filtered_inv_stream(self, revision_ids):
524
from_repo = self.from_repository
525
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
526
parent_keys = [(p,) for p in parent_ids]
527
find_text_keys = from_repo._serializer._find_text_key_references
528
parent_text_keys = set(find_text_keys(
529
from_repo._inventory_xml_lines_for_keys(parent_keys)))
530
content_text_keys = set()
531
knit = KnitVersionedFiles(None, None)
532
factory = KnitPlainFactory()
533
def find_text_keys_from_content(record):
534
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
535
raise ValueError("Unknown content storage kind for"
536
" inventory text: %s" % (record.storage_kind,))
537
# It's a knit record, it has a _raw_record field (even if it was
538
# reconstituted from a network stream).
539
raw_data = record._raw_record
540
# read the entire thing
541
revision_id = record.key[-1]
542
content, _ = knit._parse_record(revision_id, raw_data)
543
if record.storage_kind == 'knit-delta-gz':
544
line_iterator = factory.get_linedelta_content(content)
545
elif record.storage_kind == 'knit-ft-gz':
546
line_iterator = factory.get_fulltext_content(content)
547
content_text_keys.update(find_text_keys(
548
[(line, revision_id) for line in line_iterator]))
549
revision_keys = [(r,) for r in revision_ids]
550
def _filtered_inv_stream():
551
source_vf = from_repo.inventories
552
stream = source_vf.get_record_stream(revision_keys,
554
for record in stream:
555
if record.storage_kind == 'absent':
556
raise errors.NoSuchRevision(from_repo, record.key)
557
find_text_keys_from_content(record)
559
self._text_keys = content_text_keys - parent_text_keys
560
return ('inventories', _filtered_inv_stream())
562
def _get_text_stream(self):
563
# Note: We know we don't have to handle adding root keys, because both
564
# the source and target are the identical network name.
565
text_stream = self.from_repository.texts.get_record_stream(
566
self._text_keys, self._text_fetch_order, False)
567
return ('texts', text_stream)
569
def get_stream(self, search):
570
revision_ids = search.get_keys()
571
for stream_info in self._fetch_revision_texts(revision_ids):
573
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
574
yield self._get_filtered_inv_stream(revision_ids)
575
yield self._get_text_stream()
578
class KnitPacker(Packer):
579
"""Packer that works with knit packs."""
581
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
583
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
584
revision_ids=revision_ids,
585
reload_func=reload_func)
587
def _pack_map_and_index_list(self, index_attribute):
588
"""Convert a list of packs to an index pack map and index list.
590
:param index_attribute: The attribute that the desired index is found
592
:return: A tuple (map, list) where map contains the dict from
593
index:pack_tuple, and list contains the indices in the preferred
598
for pack_obj in self.packs:
599
index = getattr(pack_obj, index_attribute)
600
indices.append(index)
601
pack_map[index] = pack_obj
602
return pack_map, indices
604
def _index_contents(self, indices, key_filter=None):
605
"""Get an iterable of the index contents from a pack_map.
607
:param indices: The list of indices to query
608
:param key_filter: An optional filter to limit the keys returned.
610
all_index = CombinedGraphIndex(indices)
611
if key_filter is None:
612
return all_index.iter_all_entries()
614
return all_index.iter_entries(key_filter)
616
def _copy_nodes(self, nodes, index_map, writer, write_index,
618
"""Copy knit nodes between packs with no graph references.
620
:param output_lines: Output full texts of copied items.
622
with ui.ui_factory.nested_progress_bar() as pb:
623
return self._do_copy_nodes(nodes, index_map, writer,
624
write_index, pb, output_lines=output_lines)
626
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
628
# for record verification
629
knit = KnitVersionedFiles(None, None)
630
# plan a readv on each source pack:
632
nodes = sorted(nodes)
633
# how to map this into knit.py - or knit.py into this?
634
# we don't want the typical knit logic, we want grouping by pack
635
# at this point - perhaps a helper library for the following code
636
# duplication points?
638
for index, key, value in nodes:
639
if index not in request_groups:
640
request_groups[index] = []
641
request_groups[index].append((key, value))
643
pb.update("Copied record", record_index, len(nodes))
644
for index, items in viewitems(request_groups):
645
pack_readv_requests = []
646
for key, value in items:
647
# ---- KnitGraphIndex.get_position
648
bits = value[1:].split(' ')
649
offset, length = int(bits[0]), int(bits[1])
650
pack_readv_requests.append((offset, length, (key, value[0:1])))
651
# linear scan up the pack
652
pack_readv_requests.sort()
654
pack_obj = index_map[index]
655
transport, path = pack_obj.access_tuple()
657
reader = pack.make_readv_reader(transport, path,
658
[offset[0:2] for offset in pack_readv_requests])
659
except errors.NoSuchFile:
660
if self._reload_func is not None:
663
for (names, read_func), (_1, _2, (key, eol_flag)) in zip(
664
reader.iter_records(), pack_readv_requests):
665
raw_data = read_func(None)
666
# check the header only
667
if output_lines is not None:
668
output_lines(knit._parse_record(key[-1], raw_data)[0])
670
df, _ = knit._parse_record_header(key, raw_data)
672
pos, size = writer.add_bytes_record(raw_data, names)
673
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
674
pb.update("Copied record", record_index)
677
def _copy_nodes_graph(self, index_map, writer, write_index,
678
readv_group_iter, total_items, output_lines=False):
679
"""Copy knit nodes between packs.
681
:param output_lines: Return lines present in the copied data as
682
an iterator of line,version_id.
684
with ui.ui_factory.nested_progress_bar() as pb:
685
for result in self._do_copy_nodes_graph(index_map, writer,
686
write_index, output_lines, pb, readv_group_iter, total_items):
689
def _do_copy_nodes_graph(self, index_map, writer, write_index,
690
output_lines, pb, readv_group_iter, total_items):
691
# for record verification
692
knit = KnitVersionedFiles(None, None)
693
# for line extraction when requested (inventories only)
695
factory = KnitPlainFactory()
697
pb.update("Copied record", record_index, total_items)
698
for index, readv_vector, node_vector in readv_group_iter:
700
pack_obj = index_map[index]
701
transport, path = pack_obj.access_tuple()
703
reader = pack.make_readv_reader(transport, path, readv_vector)
704
except errors.NoSuchFile:
705
if self._reload_func is not None:
708
for (names, read_func), (key, eol_flag, references) in zip(
709
reader.iter_records(), node_vector):
710
raw_data = read_func(None)
712
# read the entire thing
713
content, _ = knit._parse_record(key[-1], raw_data)
714
if len(references[-1]) == 0:
715
line_iterator = factory.get_fulltext_content(content)
717
line_iterator = factory.get_linedelta_content(content)
718
for line in line_iterator:
721
# check the header only
722
df, _ = knit._parse_record_header(key, raw_data)
724
pos, size = writer.add_bytes_record(raw_data, names)
725
write_index.add_node(key, eol_flag + b"%d %d" % (pos, size), references)
726
pb.update("Copied record", record_index)
729
def _process_inventory_lines(self, inv_lines):
730
"""Use up the inv_lines generator and setup a text key filter."""
731
repo = self._pack_collection.repo
732
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
733
inv_lines, self.revision_keys)
735
for fileid, file_revids in viewitems(fileid_revisions):
736
text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
737
self._text_filter = text_filter
739
def _copy_inventory_texts(self):
740
# select inventory keys
741
inv_keys = self._revision_keys # currently the same keyspace, and note that
742
# querying for keys here could introduce a bug where an inventory item
743
# is missed, so do not change it to query separately without cross
744
# checking like the text key check below.
745
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
747
inv_nodes = self._index_contents(inventory_indices, inv_keys)
748
# copy inventory keys and adjust values
749
# XXX: Should be a helper function to allow different inv representation
751
self.pb.update("Copying inventory texts", 2)
752
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
753
# Only grab the output lines if we will be processing them
754
output_lines = bool(self.revision_ids)
755
inv_lines = self._copy_nodes_graph(inventory_index_map,
756
self.new_pack._writer, self.new_pack.inventory_index,
757
readv_group_iter, total_items, output_lines=output_lines)
758
if self.revision_ids:
759
self._process_inventory_lines(inv_lines)
761
# eat the iterator to cause it to execute.
763
self._text_filter = None
764
if 'pack' in debug.debug_flags:
765
trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
766
time.ctime(), self._pack_collection._upload_transport.base,
767
self.new_pack.random_name,
768
self.new_pack.inventory_index.key_count(),
769
time.time() - self.new_pack.start_time)
771
def _update_pack_order(self, entries, index_to_pack_map):
772
"""Determine how we want our packs to be ordered.
774
This changes the sort order of the self.packs list so that packs unused
775
by 'entries' will be at the end of the list, so that future requests
776
can avoid probing them. Used packs will be at the front of the
777
self.packs list, in the order of their first use in 'entries'.
779
:param entries: A list of (index, ...) tuples
780
:param index_to_pack_map: A mapping from index objects to pack objects.
784
for entry in entries:
786
if index not in seen_indexes:
787
packs.append(index_to_pack_map[index])
788
seen_indexes.add(index)
789
if len(packs) == len(self.packs):
790
if 'pack' in debug.debug_flags:
791
trace.mutter('Not changing pack list, all packs used.')
793
seen_packs = set(packs)
794
for pack in self.packs:
795
if pack not in seen_packs:
798
if 'pack' in debug.debug_flags:
799
old_names = [p.access_tuple()[1] for p in self.packs]
800
new_names = [p.access_tuple()[1] for p in packs]
801
trace.mutter('Reordering packs\nfrom: %s\n to: %s',
802
old_names, new_names)
805
def _copy_revision_texts(self):
807
if self.revision_ids:
808
revision_keys = [(revision_id,) for revision_id in self.revision_ids]
811
# select revision keys
812
revision_index_map, revision_indices = self._pack_map_and_index_list(
814
revision_nodes = self._index_contents(revision_indices, revision_keys)
815
revision_nodes = list(revision_nodes)
816
self._update_pack_order(revision_nodes, revision_index_map)
817
# copy revision keys and adjust values
818
self.pb.update("Copying revision texts", 1)
819
total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
820
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
821
self.new_pack.revision_index, readv_group_iter, total_items))
822
if 'pack' in debug.debug_flags:
823
trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
824
time.ctime(), self._pack_collection._upload_transport.base,
825
self.new_pack.random_name,
826
self.new_pack.revision_index.key_count(),
827
time.time() - self.new_pack.start_time)
828
self._revision_keys = revision_keys
830
def _get_text_nodes(self):
831
text_index_map, text_indices = self._pack_map_and_index_list(
833
return text_index_map, self._index_contents(text_indices,
836
def _copy_text_texts(self):
838
text_index_map, text_nodes = self._get_text_nodes()
839
if self._text_filter is not None:
840
# We could return the keys copied as part of the return value from
841
# _copy_nodes_graph but this doesn't work all that well with the
842
# need to get line output too, so we check separately, and as we're
843
# going to buffer everything anyway, we check beforehand, which
844
# saves reading knit data over the wire when we know there are
846
text_nodes = set(text_nodes)
847
present_text_keys = set(_node[1] for _node in text_nodes)
848
missing_text_keys = set(self._text_filter) - present_text_keys
849
if missing_text_keys:
850
# TODO: raise a specific error that can handle many missing
852
trace.mutter("missing keys during fetch: %r", missing_text_keys)
853
a_missing_key = missing_text_keys.pop()
854
raise errors.RevisionNotPresent(a_missing_key[1],
856
# copy text keys and adjust values
857
self.pb.update("Copying content texts", 3)
858
total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
859
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
860
self.new_pack.text_index, readv_group_iter, total_items))
861
self._log_copied_texts()
863
def _create_pack_from_packs(self):
864
self.pb.update("Opening pack", 0, 5)
865
self.new_pack = self.open_pack()
866
new_pack = self.new_pack
867
# buffer data - we won't be reading-back during the pack creation and
868
# this makes a significant difference on sftp pushes.
869
new_pack.set_write_cache_size(1024*1024)
870
if 'pack' in debug.debug_flags:
871
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
872
for a_pack in self.packs]
873
if self.revision_ids is not None:
874
rev_count = len(self.revision_ids)
877
trace.mutter('%s: create_pack: creating pack from source packs: '
878
'%s%s %s revisions wanted %s t=0',
879
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
880
plain_pack_list, rev_count)
881
self._copy_revision_texts()
882
self._copy_inventory_texts()
883
self._copy_text_texts()
884
# select signature keys
885
signature_filter = self._revision_keys # same keyspace
886
signature_index_map, signature_indices = self._pack_map_and_index_list(
888
signature_nodes = self._index_contents(signature_indices,
890
# copy signature keys and adjust values
891
self.pb.update("Copying signature texts", 4)
892
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
893
new_pack.signature_index)
894
if 'pack' in debug.debug_flags:
895
trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
896
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
897
new_pack.signature_index.key_count(),
898
time.time() - new_pack.start_time)
899
new_pack._check_references()
900
if not self._use_pack(new_pack):
903
self.pb.update("Finishing pack", 5)
905
self._pack_collection.allocate(new_pack)
908
def _least_readv_node_readv(self, nodes):
909
"""Generate request groups for nodes using the least readv's.
911
:param nodes: An iterable of graph index nodes.
912
:return: Total node count and an iterator of the data needed to perform
913
readvs to obtain the data for nodes. Each item yielded by the
914
iterator is a tuple with:
915
index, readv_vector, node_vector. readv_vector is a list ready to
916
hand to the transport readv method, and node_vector is a list of
917
(key, eol_flag, references) for the node retrieved by the
918
matching readv_vector.
920
# group by pack so we do one readv per pack
921
nodes = sorted(nodes)
924
for index, key, value, references in nodes:
925
if index not in request_groups:
926
request_groups[index] = []
927
request_groups[index].append((key, value, references))
929
for index, items in viewitems(request_groups):
930
pack_readv_requests = []
931
for key, value, references in items:
932
# ---- KnitGraphIndex.get_position
933
bits = value[1:].split(b' ')
934
offset, length = int(bits[0]), int(bits[1])
935
pack_readv_requests.append(
936
((offset, length), (key, value[0:1], references)))
937
# linear scan up the pack to maximum range combining.
938
pack_readv_requests.sort()
939
# split out the readv and the node data.
940
pack_readv = [readv for readv, node in pack_readv_requests]
941
node_vector = [node for readv, node in pack_readv_requests]
942
result.append((index, pack_readv, node_vector))
945
def _revision_node_readv(self, revision_nodes):
946
"""Return the total revisions and the readv's to issue.
948
:param revision_nodes: The revision index contents for the packs being
949
incorporated into the new pack.
950
:return: As per _least_readv_node_readv.
952
return self._least_readv_node_readv(revision_nodes)
955
class KnitReconcilePacker(KnitPacker):
956
"""A packer which regenerates indices etc as it copies.
958
This is used by ``brz reconcile`` to cause parent text pointers to be
962
def __init__(self, *args, **kwargs):
963
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
964
self._data_changed = False
966
def _process_inventory_lines(self, inv_lines):
967
"""Generate a text key reference map rather for reconciling with."""
968
repo = self._pack_collection.repo
969
refs = repo._serializer._find_text_key_references(inv_lines)
970
self._text_refs = refs
971
# during reconcile we:
972
# - convert unreferenced texts to full texts
973
# - correct texts which reference a text not copied to be full texts
974
# - copy all others as-is but with corrected parents.
975
# - so at this point we don't know enough to decide what becomes a full
977
self._text_filter = None
979
def _copy_text_texts(self):
980
"""generate what texts we should have and then copy."""
981
self.pb.update("Copying content texts", 3)
982
# we have three major tasks here:
983
# 1) generate the ideal index
984
repo = self._pack_collection.repo
985
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
987
self.new_pack.revision_index.iter_all_entries()])
988
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
989
# 2) generate a text_nodes list that contains all the deltas that can
990
# be used as-is, with corrected parents.
994
NULL_REVISION = _mod_revision.NULL_REVISION
995
text_index_map, text_nodes = self._get_text_nodes()
996
for node in text_nodes:
1002
ideal_parents = tuple(ideal_index[node[1]])
1004
discarded_nodes.append(node)
1005
self._data_changed = True
1007
if ideal_parents == (NULL_REVISION,):
1009
if ideal_parents == node[3][0]:
1011
ok_nodes.append(node)
1012
elif ideal_parents[0:1] == node[3][0][0:1]:
1013
# the left most parent is the same, or there are no parents
1014
# today. Either way, we can preserve the representation as
1015
# long as we change the refs to be inserted.
1016
self._data_changed = True
1017
ok_nodes.append((node[0], node[1], node[2],
1018
(ideal_parents, node[3][1])))
1019
self._data_changed = True
1021
# Reinsert this text completely
1022
bad_texts.append((node[1], ideal_parents))
1023
self._data_changed = True
1024
# we're finished with some data.
1027
# 3) bulk copy the ok data
1028
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1029
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1030
self.new_pack.text_index, readv_group_iter, total_items))
1031
# 4) adhoc copy all the other texts.
1032
# We have to topologically insert all texts otherwise we can fail to
1033
# reconcile when parts of a single delta chain are preserved intact,
1034
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1035
# reinserted, and if d3 has incorrect parents it will also be
1036
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1037
# copied), so we will try to delta, but d2 is not currently able to be
1038
# extracted because its basis d1 is not present. Topologically sorting
1039
# addresses this. The following generates a sort for all the texts that
1040
# are being inserted without having to reference the entire text key
1041
# space (we only topo sort the revisions, which is smaller).
1042
topo_order = tsort.topo_sort(ancestors)
1043
rev_order = dict(zip(topo_order, range(len(topo_order))))
1044
bad_texts.sort(key=lambda key:rev_order.get(key[0][1], 0))
1045
transaction = repo.get_transaction()
1046
file_id_index = GraphIndexPrefixAdapter(
1047
self.new_pack.text_index,
1049
add_nodes_callback=self.new_pack.text_index.add_nodes)
1050
data_access = _DirectPackAccess(
1051
{self.new_pack.text_index:self.new_pack.access_tuple()})
1052
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1053
self.new_pack.access_tuple())
1054
output_texts = KnitVersionedFiles(
1055
_KnitGraphIndex(self.new_pack.text_index,
1056
add_callback=self.new_pack.text_index.add_nodes,
1057
deltas=True, parents=True, is_locked=repo.is_locked),
1058
data_access=data_access, max_delta_chain=200)
1059
for key, parent_keys in bad_texts:
1060
# We refer to the new pack to delta data being output.
1061
# A possible improvement would be to catch errors on short reads
1062
# and only flush then.
1063
self.new_pack.flush()
1065
for parent_key in parent_keys:
1066
if parent_key[0] != key[0]:
1067
# Graph parents must match the fileid
1068
raise errors.BzrError('Mismatched key parent %r:%r' %
1070
parents.append(parent_key[1])
1071
text_lines = osutils.split_lines(next(repo.texts.get_record_stream(
1072
[key], 'unordered', True)).get_bytes_as('fulltext'))
1073
output_texts.add_lines(key, parent_keys, text_lines,
1074
random_id=True, check_content=False)
1075
# 5) check that nothing inserted has a reference outside the keyspace.
1076
missing_text_keys = self.new_pack.text_index._external_references()
1077
if missing_text_keys:
1078
raise errors.BzrCheckError('Reference to missing compression parents %r'
1079
% (missing_text_keys,))
1080
self._log_copied_texts()
1082
def _use_pack(self, new_pack):
1083
"""Override _use_pack to check for reconcile having changed content."""
1084
# XXX: we might be better checking this at the copy time.
1085
original_inventory_keys = set()
1086
inv_index = self._pack_collection.inventory_index.combined_index
1087
for entry in inv_index.iter_all_entries():
1088
original_inventory_keys.add(entry[1])
1089
new_inventory_keys = set()
1090
for entry in new_pack.inventory_index.iter_all_entries():
1091
new_inventory_keys.add(entry[1])
1092
if new_inventory_keys != original_inventory_keys:
1093
self._data_changed = True
1094
return new_pack.data_inserted() and self._data_changed
1097
class OptimisingKnitPacker(KnitPacker):
1098
"""A packer which spends more time to create better disk layouts."""
1100
def _revision_node_readv(self, revision_nodes):
1101
"""Return the total revisions and the readv's to issue.
1103
This sort places revisions in topological order with the ancestors
1106
:param revision_nodes: The revision index contents for the packs being
1107
incorporated into the new pack.
1108
:return: As per _least_readv_node_readv.
1110
# build an ancestors dict
1113
for index, key, value, references in revision_nodes:
1114
ancestors[key] = references[0]
1115
by_key[key] = (index, value, references)
1116
order = tsort.topo_sort(ancestors)
1118
# Single IO is pathological, but it will work as a starting point.
1120
for key in reversed(order):
1121
index, value, references = by_key[key]
1122
# ---- KnitGraphIndex.get_position
1123
bits = value[1:].split(b' ')
1124
offset, length = int(bits[0]), int(bits[1])
1126
(index, [(offset, length)], [(key, value[0:1], references)]))
1127
# TODO: combine requests in the same index that are in ascending order.
1128
return total, requests
1130
def open_pack(self):
1131
"""Open a pack for the pack we are creating."""
1132
new_pack = super(OptimisingKnitPacker, self).open_pack()
1133
# Turn on the optimization flags for all the index builders.
1134
new_pack.revision_index.set_optimize(for_size=True)
1135
new_pack.inventory_index.set_optimize(for_size=True)
1136
new_pack.text_index.set_optimize(for_size=True)
1137
new_pack.signature_index.set_optimize(for_size=True)
1141
class KnitRepositoryPackCollection(RepositoryPackCollection):
1142
"""A knit pack collection."""
1144
pack_factory = NewPack
1145
resumed_pack_factory = ResumedPack
1146
normal_packer_class = KnitPacker
1147
optimising_packer_class = OptimisingKnitPacker