1
# Copyright (C) 2007-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Knit-based pack repository formats."""
19
from __future__ import absolute_import
21
from ..lazy_import import lazy_import
22
lazy_import(globals(), """
30
revision as _mod_revision,
35
from breezy.bzr import (
41
from breezy.bzr.knit import (
51
from ..bzr.index import (
54
GraphIndexPrefixAdapter,
57
from .knitrepo import (
60
from .pack_repo import (
68
RepositoryPackCollection,
70
from ..sixish import (
74
from ..bzr.vf_repository import (
79
class KnitPackRepository(PackRepository, KnitRepository):
81
def __init__(self, _format, a_controldir, control_files, _commit_builder_class,
83
PackRepository.__init__(self, _format, a_controldir, control_files,
84
_commit_builder_class, _serializer)
85
if self._format.supports_chks:
86
raise AssertionError("chk not supported")
87
index_transport = self._transport.clone('indices')
88
self._pack_collection = KnitRepositoryPackCollection(self,
91
self._transport.clone(
93
self._transport.clone(
95
_format.index_builder_class,
99
self.inventories = KnitVersionedFiles(
100
_KnitGraphIndex(self._pack_collection.inventory_index.combined_index,
101
add_callback=self._pack_collection.inventory_index.add_callback,
102
deltas=True, parents=True, is_locked=self.is_locked),
103
data_access=self._pack_collection.inventory_index.data_access,
105
self.revisions = KnitVersionedFiles(
106
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
107
add_callback=self._pack_collection.revision_index.add_callback,
108
deltas=False, parents=True, is_locked=self.is_locked,
109
track_external_parent_refs=True),
110
data_access=self._pack_collection.revision_index.data_access,
112
self.signatures = KnitVersionedFiles(
113
_KnitGraphIndex(self._pack_collection.signature_index.combined_index,
114
add_callback=self._pack_collection.signature_index.add_callback,
115
deltas=False, parents=False, is_locked=self.is_locked),
116
data_access=self._pack_collection.signature_index.data_access,
118
self.texts = KnitVersionedFiles(
119
_KnitGraphIndex(self._pack_collection.text_index.combined_index,
120
add_callback=self._pack_collection.text_index.add_callback,
121
deltas=True, parents=True, is_locked=self.is_locked),
122
data_access=self._pack_collection.text_index.data_access,
124
self.chk_bytes = None
125
# True when the repository object is 'write locked' (as opposed to the
126
# physical lock only taken out around changes to the pack-names list.)
127
# Another way to represent this would be a decorator around the control
128
# files object that presents logical locks as physical ones - if this
129
# gets ugly consider that alternative design. RBC 20071011
130
self._write_lock_count = 0
131
self._transaction = None
133
self._reconcile_does_inventory_gc = True
134
self._reconcile_fixes_text_parents = True
135
self._reconcile_backsup_inventory = False
137
def _get_source(self, to_format):
138
if to_format.network_name() == self._format.network_name():
139
return KnitPackStreamSource(self, to_format)
140
return PackRepository._get_source(self, to_format)
142
def _reconcile_pack(self, collection, packs, extension, revs, pb):
143
packer = KnitReconcilePacker(collection, packs, extension, revs)
144
return packer.pack(pb)
147
class RepositoryFormatKnitPack1(RepositoryFormatPack):
148
"""A no-subtrees parameterized Pack repository.
150
This format was introduced in 0.92.
153
repository_class = KnitPackRepository
154
_commit_builder_class = PackCommitBuilder
157
def _serializer(self):
158
return xml5.serializer_v5
159
# What index classes to use
160
index_builder_class = InMemoryGraphIndex
161
index_class = GraphIndex
163
def _get_matching_bzrdir(self):
164
return controldir.format_registry.make_controldir('pack-0.92')
166
def _ignore_setting_bzrdir(self, format):
169
_matchingcontroldir = property(
170
_get_matching_bzrdir, _ignore_setting_bzrdir)
173
def get_format_string(cls):
174
"""See RepositoryFormat.get_format_string()."""
175
return b"Bazaar pack repository format 1 (needs bzr 0.92)\n"
177
def get_format_description(self):
178
"""See RepositoryFormat.get_format_description()."""
179
return "Packs containing knits without subtree support"
182
class RepositoryFormatKnitPack3(RepositoryFormatPack):
183
"""A subtrees parameterized Pack repository.
185
This repository format uses the xml7 serializer to get:
186
- support for recording full info about the tree root
187
- support for recording tree-references
189
This format was introduced in 0.92.
192
repository_class = KnitPackRepository
193
_commit_builder_class = PackCommitBuilder
194
rich_root_data = True
196
supports_tree_reference = True
199
def _serializer(self):
200
return xml7.serializer_v7
201
# What index classes to use
202
index_builder_class = InMemoryGraphIndex
203
index_class = GraphIndex
205
def _get_matching_bzrdir(self):
206
return controldir.format_registry.make_controldir(
209
def _ignore_setting_bzrdir(self, format):
212
_matchingcontroldir = property(
213
_get_matching_bzrdir, _ignore_setting_bzrdir)
216
def get_format_string(cls):
217
"""See RepositoryFormat.get_format_string()."""
218
return b"Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
220
def get_format_description(self):
221
"""See RepositoryFormat.get_format_description()."""
222
return "Packs containing knits with subtree support\n"
225
class RepositoryFormatKnitPack4(RepositoryFormatPack):
226
"""A rich-root, no subtrees parameterized Pack repository.
228
This repository format uses the xml6 serializer to get:
229
- support for recording full info about the tree root
231
This format was introduced in 1.0.
234
repository_class = KnitPackRepository
235
_commit_builder_class = PackCommitBuilder
236
rich_root_data = True
237
supports_tree_reference = False
240
def _serializer(self):
241
return xml6.serializer_v6
242
# What index classes to use
243
index_builder_class = InMemoryGraphIndex
244
index_class = GraphIndex
246
def _get_matching_bzrdir(self):
247
return controldir.format_registry.make_controldir(
250
def _ignore_setting_bzrdir(self, format):
253
_matchingcontroldir = property(
254
_get_matching_bzrdir, _ignore_setting_bzrdir)
257
def get_format_string(cls):
258
"""See RepositoryFormat.get_format_string()."""
259
return (b"Bazaar pack repository format 1 with rich root"
260
b" (needs bzr 1.0)\n")
262
def get_format_description(self):
263
"""See RepositoryFormat.get_format_description()."""
264
return "Packs containing knits with rich root support\n"
267
class RepositoryFormatKnitPack5(RepositoryFormatPack):
268
"""Repository that supports external references to allow stacking.
272
Supports external lookups, which results in non-truncated ghosts after
273
reconcile compared to pack-0.92 formats.
276
repository_class = KnitPackRepository
277
_commit_builder_class = PackCommitBuilder
278
supports_external_lookups = True
279
# What index classes to use
280
index_builder_class = InMemoryGraphIndex
281
index_class = GraphIndex
284
def _serializer(self):
285
return xml5.serializer_v5
287
def _get_matching_bzrdir(self):
288
return controldir.format_registry.make_controldir('1.6')
290
def _ignore_setting_bzrdir(self, format):
293
_matchingcontroldir = property(
294
_get_matching_bzrdir, _ignore_setting_bzrdir)
297
def get_format_string(cls):
298
"""See RepositoryFormat.get_format_string()."""
299
return b"Bazaar RepositoryFormatKnitPack5 (bzr 1.6)\n"
301
def get_format_description(self):
302
"""See RepositoryFormat.get_format_description()."""
303
return "Packs 5 (adds stacking support, requires bzr 1.6)"
306
class RepositoryFormatKnitPack5RichRoot(RepositoryFormatPack):
307
"""A repository with rich roots and stacking.
309
New in release 1.6.1.
311
Supports stacking on other repositories, allowing data to be accessed
312
without being stored locally.
315
repository_class = KnitPackRepository
316
_commit_builder_class = PackCommitBuilder
317
rich_root_data = True
318
supports_tree_reference = False # no subtrees
319
supports_external_lookups = True
320
# What index classes to use
321
index_builder_class = InMemoryGraphIndex
322
index_class = GraphIndex
325
def _serializer(self):
326
return xml6.serializer_v6
328
def _get_matching_bzrdir(self):
329
return controldir.format_registry.make_controldir(
332
def _ignore_setting_bzrdir(self, format):
335
_matchingcontroldir = property(
336
_get_matching_bzrdir, _ignore_setting_bzrdir)
339
def get_format_string(cls):
340
"""See RepositoryFormat.get_format_string()."""
341
return b"Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6.1)\n"
343
def get_format_description(self):
344
return "Packs 5 rich-root (adds stacking support, requires bzr 1.6.1)"
347
class RepositoryFormatKnitPack5RichRootBroken(RepositoryFormatPack):
348
"""A repository with rich roots and external references.
352
Supports external lookups, which results in non-truncated ghosts after
353
reconcile compared to pack-0.92 formats.
355
This format was deprecated because the serializer it uses accidentally
356
supported subtrees, when the format was not intended to. This meant that
357
someone could accidentally fetch from an incorrect repository.
360
repository_class = KnitPackRepository
361
_commit_builder_class = PackCommitBuilder
362
rich_root_data = True
363
supports_tree_reference = False # no subtrees
365
supports_external_lookups = True
366
# What index classes to use
367
index_builder_class = InMemoryGraphIndex
368
index_class = GraphIndex
371
def _serializer(self):
372
return xml7.serializer_v7
374
def _get_matching_bzrdir(self):
375
matching = controldir.format_registry.make_controldir(
377
matching.repository_format = self
380
def _ignore_setting_bzrdir(self, format):
383
_matchingcontroldir = property(
384
_get_matching_bzrdir, _ignore_setting_bzrdir)
387
def get_format_string(cls):
388
"""See RepositoryFormat.get_format_string()."""
389
return b"Bazaar RepositoryFormatKnitPack5RichRoot (bzr 1.6)\n"
391
def get_format_description(self):
392
return ("Packs 5 rich-root (adds stacking support, requires bzr 1.6)"
395
def is_deprecated(self):
399
class RepositoryFormatKnitPack6(RepositoryFormatPack):
400
"""A repository with stacking and btree indexes,
401
without rich roots or subtrees.
403
This is equivalent to pack-1.6 with B+Tree indices.
406
repository_class = KnitPackRepository
407
_commit_builder_class = PackCommitBuilder
408
supports_external_lookups = True
409
# What index classes to use
410
index_builder_class = btree_index.BTreeBuilder
411
index_class = btree_index.BTreeGraphIndex
414
def _serializer(self):
415
return xml5.serializer_v5
417
def _get_matching_bzrdir(self):
418
return controldir.format_registry.make_controldir('1.9')
420
def _ignore_setting_bzrdir(self, format):
423
_matchingcontroldir = property(
424
_get_matching_bzrdir, _ignore_setting_bzrdir)
427
def get_format_string(cls):
428
"""See RepositoryFormat.get_format_string()."""
429
return b"Bazaar RepositoryFormatKnitPack6 (bzr 1.9)\n"
431
def get_format_description(self):
432
"""See RepositoryFormat.get_format_description()."""
433
return "Packs 6 (uses btree indexes, requires bzr 1.9)"
436
class RepositoryFormatKnitPack6RichRoot(RepositoryFormatPack):
437
"""A repository with rich roots, no subtrees, stacking and btree indexes.
439
1.6-rich-root with B+Tree indices.
442
repository_class = KnitPackRepository
443
_commit_builder_class = PackCommitBuilder
444
rich_root_data = True
445
supports_tree_reference = False # no subtrees
446
supports_external_lookups = True
447
# What index classes to use
448
index_builder_class = btree_index.BTreeBuilder
449
index_class = btree_index.BTreeGraphIndex
452
def _serializer(self):
453
return xml6.serializer_v6
455
def _get_matching_bzrdir(self):
456
return controldir.format_registry.make_controldir(
459
def _ignore_setting_bzrdir(self, format):
462
_matchingcontroldir = property(
463
_get_matching_bzrdir, _ignore_setting_bzrdir)
466
def get_format_string(cls):
467
"""See RepositoryFormat.get_format_string()."""
468
return b"Bazaar RepositoryFormatKnitPack6RichRoot (bzr 1.9)\n"
470
def get_format_description(self):
471
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
474
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
475
"""A subtrees development repository.
477
This format should be retained in 2.3, to provide an upgrade path from this
478
to RepositoryFormat2aSubtree. It can be removed in later releases.
480
1.6.1-subtree[as it might have been] with B+Tree indices.
483
repository_class = KnitPackRepository
484
_commit_builder_class = PackCommitBuilder
485
rich_root_data = True
487
supports_tree_reference = True
488
supports_external_lookups = True
489
# What index classes to use
490
index_builder_class = btree_index.BTreeBuilder
491
index_class = btree_index.BTreeGraphIndex
494
def _serializer(self):
495
return xml7.serializer_v7
497
def _get_matching_bzrdir(self):
498
return controldir.format_registry.make_controldir(
499
'development5-subtree')
501
def _ignore_setting_bzrdir(self, format):
504
_matchingcontroldir = property(
505
_get_matching_bzrdir, _ignore_setting_bzrdir)
508
def get_format_string(cls):
509
"""See RepositoryFormat.get_format_string()."""
510
return (b"Bazaar development format 2 with subtree support "
511
b"(needs bzr.dev from before 1.8)\n")
513
def get_format_description(self):
514
"""See RepositoryFormat.get_format_description()."""
515
return ("Development repository format, currently the same as "
516
"1.6.1-subtree with B+Tree indices.\n")
519
class KnitPackStreamSource(StreamSource):
520
"""A StreamSource used to transfer data between same-format KnitPack repos.
523
1) Same serialization format for all objects
524
2) Same root information
525
3) XML format inventories
526
4) Atomic inserts (so we can stream inventory texts before text
531
def __init__(self, from_repository, to_format):
532
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
533
self._text_keys = None
534
self._text_fetch_order = 'unordered'
536
def _get_filtered_inv_stream(self, revision_ids):
537
from_repo = self.from_repository
538
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
539
parent_keys = [(p,) for p in parent_ids]
540
find_text_keys = from_repo._serializer._find_text_key_references
541
parent_text_keys = set(find_text_keys(
542
from_repo._inventory_xml_lines_for_keys(parent_keys)))
543
content_text_keys = set()
544
knit = KnitVersionedFiles(None, None)
545
factory = KnitPlainFactory()
547
def find_text_keys_from_content(record):
548
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
549
raise ValueError("Unknown content storage kind for"
550
" inventory text: %s" % (record.storage_kind,))
551
# It's a knit record, it has a _raw_record field (even if it was
552
# reconstituted from a network stream).
553
raw_data = record._raw_record
554
# read the entire thing
555
revision_id = record.key[-1]
556
content, _ = knit._parse_record(revision_id, raw_data)
557
if record.storage_kind == 'knit-delta-gz':
558
line_iterator = factory.get_linedelta_content(content)
559
elif record.storage_kind == 'knit-ft-gz':
560
line_iterator = factory.get_fulltext_content(content)
561
content_text_keys.update(find_text_keys(
562
[(line, revision_id) for line in line_iterator]))
563
revision_keys = [(r,) for r in revision_ids]
565
def _filtered_inv_stream():
566
source_vf = from_repo.inventories
567
stream = source_vf.get_record_stream(revision_keys,
569
for record in stream:
570
if record.storage_kind == 'absent':
571
raise errors.NoSuchRevision(from_repo, record.key)
572
find_text_keys_from_content(record)
574
self._text_keys = content_text_keys - parent_text_keys
575
return ('inventories', _filtered_inv_stream())
577
def _get_text_stream(self):
578
# Note: We know we don't have to handle adding root keys, because both
579
# the source and target are the identical network name.
580
text_stream = self.from_repository.texts.get_record_stream(
581
self._text_keys, self._text_fetch_order, False)
582
return ('texts', text_stream)
584
def get_stream(self, search):
585
revision_ids = search.get_keys()
586
for stream_info in self._fetch_revision_texts(revision_ids):
588
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
589
yield self._get_filtered_inv_stream(revision_ids)
590
yield self._get_text_stream()
593
class KnitPacker(Packer):
594
"""Packer that works with knit packs."""
596
def __init__(self, pack_collection, packs, suffix, revision_ids=None,
598
super(KnitPacker, self).__init__(pack_collection, packs, suffix,
599
revision_ids=revision_ids,
600
reload_func=reload_func)
602
def _pack_map_and_index_list(self, index_attribute):
603
"""Convert a list of packs to an index pack map and index list.
605
:param index_attribute: The attribute that the desired index is found
607
:return: A tuple (map, list) where map contains the dict from
608
index:pack_tuple, and list contains the indices in the preferred
613
for pack_obj in self.packs:
614
index = getattr(pack_obj, index_attribute)
615
indices.append(index)
616
pack_map[index] = pack_obj
617
return pack_map, indices
619
def _index_contents(self, indices, key_filter=None):
620
"""Get an iterable of the index contents from a pack_map.
622
:param indices: The list of indices to query
623
:param key_filter: An optional filter to limit the keys returned.
625
all_index = CombinedGraphIndex(indices)
626
if key_filter is None:
627
return all_index.iter_all_entries()
629
return all_index.iter_entries(key_filter)
631
def _copy_nodes(self, nodes, index_map, writer, write_index,
633
"""Copy knit nodes between packs with no graph references.
635
:param output_lines: Output full texts of copied items.
637
with ui.ui_factory.nested_progress_bar() as pb:
638
return self._do_copy_nodes(nodes, index_map, writer,
639
write_index, pb, output_lines=output_lines)
641
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
643
# for record verification
644
knit = KnitVersionedFiles(None, None)
645
# plan a readv on each source pack:
647
nodes = sorted(nodes)
648
# how to map this into knit.py - or knit.py into this?
649
# we don't want the typical knit logic, we want grouping by pack
650
# at this point - perhaps a helper library for the following code
651
# duplication points?
653
for index, key, value in nodes:
654
if index not in request_groups:
655
request_groups[index] = []
656
request_groups[index].append((key, value))
658
pb.update("Copied record", record_index, len(nodes))
659
for index, items in viewitems(request_groups):
660
pack_readv_requests = []
661
for key, value in items:
662
# ---- KnitGraphIndex.get_position
663
bits = value[1:].split(' ')
664
offset, length = int(bits[0]), int(bits[1])
665
pack_readv_requests.append((offset, length, (key, value[0:1])))
666
# linear scan up the pack
667
pack_readv_requests.sort()
669
pack_obj = index_map[index]
670
transport, path = pack_obj.access_tuple()
672
reader = pack.make_readv_reader(transport, path,
673
[offset[0:2] for offset in pack_readv_requests])
674
except errors.NoSuchFile:
675
if self._reload_func is not None:
678
for (names, read_func), (_1, _2, (key, eol_flag)) in zip(
679
reader.iter_records(), pack_readv_requests):
680
raw_data = read_func(None)
681
# check the header only
682
if output_lines is not None:
683
output_lines(knit._parse_record(key[-1], raw_data)[0])
685
df, _ = knit._parse_record_header(key, raw_data)
687
pos, size = writer.add_bytes_record(raw_data, names)
688
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
689
pb.update("Copied record", record_index)
692
def _copy_nodes_graph(self, index_map, writer, write_index,
693
readv_group_iter, total_items, output_lines=False):
694
"""Copy knit nodes between packs.
696
:param output_lines: Return lines present in the copied data as
697
an iterator of line,version_id.
699
with ui.ui_factory.nested_progress_bar() as pb:
700
for result in self._do_copy_nodes_graph(index_map, writer,
701
write_index, output_lines, pb, readv_group_iter, total_items):
704
def _do_copy_nodes_graph(self, index_map, writer, write_index,
705
output_lines, pb, readv_group_iter, total_items):
706
# for record verification
707
knit = KnitVersionedFiles(None, None)
708
# for line extraction when requested (inventories only)
710
factory = KnitPlainFactory()
712
pb.update("Copied record", record_index, total_items)
713
for index, readv_vector, node_vector in readv_group_iter:
715
pack_obj = index_map[index]
716
transport, path = pack_obj.access_tuple()
718
reader = pack.make_readv_reader(transport, path, readv_vector)
719
except errors.NoSuchFile:
720
if self._reload_func is not None:
723
for (names, read_func), (key, eol_flag, references) in zip(
724
reader.iter_records(), node_vector):
725
raw_data = read_func(None)
727
# read the entire thing
728
content, _ = knit._parse_record(key[-1], raw_data)
729
if len(references[-1]) == 0:
730
line_iterator = factory.get_fulltext_content(content)
732
line_iterator = factory.get_linedelta_content(content)
733
for line in line_iterator:
736
# check the header only
737
df, _ = knit._parse_record_header(key, raw_data)
739
pos, size = writer.add_bytes_record(raw_data, names)
740
write_index.add_node(key, eol_flag + b"%d %d" %
741
(pos, size), references)
742
pb.update("Copied record", record_index)
745
def _process_inventory_lines(self, inv_lines):
746
"""Use up the inv_lines generator and setup a text key filter."""
747
repo = self._pack_collection.repo
748
fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
749
inv_lines, self.revision_keys)
751
for fileid, file_revids in viewitems(fileid_revisions):
752
text_filter.extend([(fileid, file_revid)
753
for file_revid in file_revids])
754
self._text_filter = text_filter
756
def _copy_inventory_texts(self):
757
# select inventory keys
758
inv_keys = self._revision_keys # currently the same keyspace, and note that
759
# querying for keys here could introduce a bug where an inventory item
760
# is missed, so do not change it to query separately without cross
761
# checking like the text key check below.
762
inventory_index_map, inventory_indices = self._pack_map_and_index_list(
764
inv_nodes = self._index_contents(inventory_indices, inv_keys)
765
# copy inventory keys and adjust values
766
# XXX: Should be a helper function to allow different inv representation
768
self.pb.update("Copying inventory texts", 2)
769
total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
770
# Only grab the output lines if we will be processing them
771
output_lines = bool(self.revision_ids)
772
inv_lines = self._copy_nodes_graph(inventory_index_map,
773
self.new_pack._writer, self.new_pack.inventory_index,
774
readv_group_iter, total_items, output_lines=output_lines)
775
if self.revision_ids:
776
self._process_inventory_lines(inv_lines)
778
# eat the iterator to cause it to execute.
780
self._text_filter = None
781
if 'pack' in debug.debug_flags:
782
trace.mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
783
time.ctime(), self._pack_collection._upload_transport.base,
784
self.new_pack.random_name,
785
self.new_pack.inventory_index.key_count(),
786
time.time() - self.new_pack.start_time)
788
def _update_pack_order(self, entries, index_to_pack_map):
789
"""Determine how we want our packs to be ordered.
791
This changes the sort order of the self.packs list so that packs unused
792
by 'entries' will be at the end of the list, so that future requests
793
can avoid probing them. Used packs will be at the front of the
794
self.packs list, in the order of their first use in 'entries'.
796
:param entries: A list of (index, ...) tuples
797
:param index_to_pack_map: A mapping from index objects to pack objects.
801
for entry in entries:
803
if index not in seen_indexes:
804
packs.append(index_to_pack_map[index])
805
seen_indexes.add(index)
806
if len(packs) == len(self.packs):
807
if 'pack' in debug.debug_flags:
808
trace.mutter('Not changing pack list, all packs used.')
810
seen_packs = set(packs)
811
for pack in self.packs:
812
if pack not in seen_packs:
815
if 'pack' in debug.debug_flags:
816
old_names = [p.access_tuple()[1] for p in self.packs]
817
new_names = [p.access_tuple()[1] for p in packs]
818
trace.mutter('Reordering packs\nfrom: %s\n to: %s',
819
old_names, new_names)
822
def _copy_revision_texts(self):
824
if self.revision_ids:
825
revision_keys = [(revision_id,)
826
for revision_id in self.revision_ids]
829
# select revision keys
830
revision_index_map, revision_indices = self._pack_map_and_index_list(
832
revision_nodes = self._index_contents(revision_indices, revision_keys)
833
revision_nodes = list(revision_nodes)
834
self._update_pack_order(revision_nodes, revision_index_map)
835
# copy revision keys and adjust values
836
self.pb.update("Copying revision texts", 1)
837
total_items, readv_group_iter = self._revision_node_readv(
839
list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
840
self.new_pack.revision_index, readv_group_iter, total_items))
841
if 'pack' in debug.debug_flags:
842
trace.mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
843
time.ctime(), self._pack_collection._upload_transport.base,
844
self.new_pack.random_name,
845
self.new_pack.revision_index.key_count(),
846
time.time() - self.new_pack.start_time)
847
self._revision_keys = revision_keys
849
def _get_text_nodes(self):
850
text_index_map, text_indices = self._pack_map_and_index_list(
852
return text_index_map, self._index_contents(text_indices,
855
def _copy_text_texts(self):
857
text_index_map, text_nodes = self._get_text_nodes()
858
if self._text_filter is not None:
859
# We could return the keys copied as part of the return value from
860
# _copy_nodes_graph but this doesn't work all that well with the
861
# need to get line output too, so we check separately, and as we're
862
# going to buffer everything anyway, we check beforehand, which
863
# saves reading knit data over the wire when we know there are
865
text_nodes = set(text_nodes)
866
present_text_keys = set(_node[1] for _node in text_nodes)
867
missing_text_keys = set(self._text_filter) - present_text_keys
868
if missing_text_keys:
869
# TODO: raise a specific error that can handle many missing
871
trace.mutter("missing keys during fetch: %r",
873
a_missing_key = missing_text_keys.pop()
874
raise errors.RevisionNotPresent(a_missing_key[1],
876
# copy text keys and adjust values
877
self.pb.update("Copying content texts", 3)
878
total_items, readv_group_iter = self._least_readv_node_readv(
880
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
881
self.new_pack.text_index, readv_group_iter, total_items))
882
self._log_copied_texts()
884
def _create_pack_from_packs(self):
885
self.pb.update("Opening pack", 0, 5)
886
self.new_pack = self.open_pack()
887
new_pack = self.new_pack
888
# buffer data - we won't be reading-back during the pack creation and
889
# this makes a significant difference on sftp pushes.
890
new_pack.set_write_cache_size(1024 * 1024)
891
if 'pack' in debug.debug_flags:
892
plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
893
for a_pack in self.packs]
894
if self.revision_ids is not None:
895
rev_count = len(self.revision_ids)
898
trace.mutter('%s: create_pack: creating pack from source packs: '
899
'%s%s %s revisions wanted %s t=0',
900
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
901
plain_pack_list, rev_count)
902
self._copy_revision_texts()
903
self._copy_inventory_texts()
904
self._copy_text_texts()
905
# select signature keys
906
signature_filter = self._revision_keys # same keyspace
907
signature_index_map, signature_indices = self._pack_map_and_index_list(
909
signature_nodes = self._index_contents(signature_indices,
911
# copy signature keys and adjust values
912
self.pb.update("Copying signature texts", 4)
913
self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
914
new_pack.signature_index)
915
if 'pack' in debug.debug_flags:
916
trace.mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
917
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
918
new_pack.signature_index.key_count(),
919
time.time() - new_pack.start_time)
920
new_pack._check_references()
921
if not self._use_pack(new_pack):
924
self.pb.update("Finishing pack", 5)
926
self._pack_collection.allocate(new_pack)
929
def _least_readv_node_readv(self, nodes):
930
"""Generate request groups for nodes using the least readv's.
932
:param nodes: An iterable of graph index nodes.
933
:return: Total node count and an iterator of the data needed to perform
934
readvs to obtain the data for nodes. Each item yielded by the
935
iterator is a tuple with:
936
index, readv_vector, node_vector. readv_vector is a list ready to
937
hand to the transport readv method, and node_vector is a list of
938
(key, eol_flag, references) for the node retrieved by the
939
matching readv_vector.
941
# group by pack so we do one readv per pack
942
nodes = sorted(nodes)
945
for index, key, value, references in nodes:
946
if index not in request_groups:
947
request_groups[index] = []
948
request_groups[index].append((key, value, references))
950
for index, items in viewitems(request_groups):
951
pack_readv_requests = []
952
for key, value, references in items:
953
# ---- KnitGraphIndex.get_position
954
bits = value[1:].split(b' ')
955
offset, length = int(bits[0]), int(bits[1])
956
pack_readv_requests.append(
957
((offset, length), (key, value[0:1], references)))
958
# linear scan up the pack to maximum range combining.
959
pack_readv_requests.sort()
960
# split out the readv and the node data.
961
pack_readv = [readv for readv, node in pack_readv_requests]
962
node_vector = [node for readv, node in pack_readv_requests]
963
result.append((index, pack_readv, node_vector))
966
def _revision_node_readv(self, revision_nodes):
967
"""Return the total revisions and the readv's to issue.
969
:param revision_nodes: The revision index contents for the packs being
970
incorporated into the new pack.
971
:return: As per _least_readv_node_readv.
973
return self._least_readv_node_readv(revision_nodes)
976
class KnitReconcilePacker(KnitPacker):
977
"""A packer which regenerates indices etc as it copies.
979
This is used by ``brz reconcile`` to cause parent text pointers to be
983
def __init__(self, *args, **kwargs):
984
super(KnitReconcilePacker, self).__init__(*args, **kwargs)
985
self._data_changed = False
987
def _process_inventory_lines(self, inv_lines):
988
"""Generate a text key reference map rather for reconciling with."""
989
repo = self._pack_collection.repo
990
refs = repo._serializer._find_text_key_references(inv_lines)
991
self._text_refs = refs
992
# during reconcile we:
993
# - convert unreferenced texts to full texts
994
# - correct texts which reference a text not copied to be full texts
995
# - copy all others as-is but with corrected parents.
996
# - so at this point we don't know enough to decide what becomes a full
998
self._text_filter = None
1000
def _copy_text_texts(self):
1001
"""generate what texts we should have and then copy."""
1002
self.pb.update("Copying content texts", 3)
1003
# we have three major tasks here:
1004
# 1) generate the ideal index
1005
repo = self._pack_collection.repo
1006
ancestors = dict([(key[0], tuple(ref[0] for ref in refs[0])) for
1007
_1, key, _2, refs in
1008
self.new_pack.revision_index.iter_all_entries()])
1009
ideal_index = repo._generate_text_key_index(self._text_refs, ancestors)
1010
# 2) generate a text_nodes list that contains all the deltas that can
1011
# be used as-is, with corrected parents.
1014
discarded_nodes = []
1015
NULL_REVISION = _mod_revision.NULL_REVISION
1016
text_index_map, text_nodes = self._get_text_nodes()
1017
for node in text_nodes:
1023
ideal_parents = tuple(ideal_index[node[1]])
1025
discarded_nodes.append(node)
1026
self._data_changed = True
1028
if ideal_parents == (NULL_REVISION,):
1030
if ideal_parents == node[3][0]:
1032
ok_nodes.append(node)
1033
elif ideal_parents[0:1] == node[3][0][0:1]:
1034
# the left most parent is the same, or there are no parents
1035
# today. Either way, we can preserve the representation as
1036
# long as we change the refs to be inserted.
1037
self._data_changed = True
1038
ok_nodes.append((node[0], node[1], node[2],
1039
(ideal_parents, node[3][1])))
1040
self._data_changed = True
1042
# Reinsert this text completely
1043
bad_texts.append((node[1], ideal_parents))
1044
self._data_changed = True
1045
# we're finished with some data.
1048
# 3) bulk copy the ok data
1049
total_items, readv_group_iter = self._least_readv_node_readv(ok_nodes)
1050
list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
1051
self.new_pack.text_index, readv_group_iter, total_items))
1052
# 4) adhoc copy all the other texts.
1053
# We have to topologically insert all texts otherwise we can fail to
1054
# reconcile when parts of a single delta chain are preserved intact,
1055
# and other parts are not. E.g. Discarded->d1->d2->d3. d1 will be
1056
# reinserted, and if d3 has incorrect parents it will also be
1057
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1058
# copied), so we will try to delta, but d2 is not currently able to be
1059
# extracted because its basis d1 is not present. Topologically sorting
1060
# addresses this. The following generates a sort for all the texts that
1061
# are being inserted without having to reference the entire text key
1062
# space (we only topo sort the revisions, which is smaller).
1063
topo_order = tsort.topo_sort(ancestors)
1064
rev_order = dict(zip(topo_order, range(len(topo_order))))
1065
bad_texts.sort(key=lambda key: rev_order.get(key[0][1], 0))
1066
transaction = repo.get_transaction()
1067
file_id_index = GraphIndexPrefixAdapter(
1068
self.new_pack.text_index,
1070
add_nodes_callback=self.new_pack.text_index.add_nodes)
1071
data_access = _DirectPackAccess(
1072
{self.new_pack.text_index: self.new_pack.access_tuple()})
1073
data_access.set_writer(self.new_pack._writer, self.new_pack.text_index,
1074
self.new_pack.access_tuple())
1075
output_texts = KnitVersionedFiles(
1076
_KnitGraphIndex(self.new_pack.text_index,
1077
add_callback=self.new_pack.text_index.add_nodes,
1078
deltas=True, parents=True, is_locked=repo.is_locked),
1079
data_access=data_access, max_delta_chain=200)
1080
for key, parent_keys in bad_texts:
1081
# We refer to the new pack to delta data being output.
1082
# A possible improvement would be to catch errors on short reads
1083
# and only flush then.
1084
self.new_pack.flush()
1086
for parent_key in parent_keys:
1087
if parent_key[0] != key[0]:
1088
# Graph parents must match the fileid
1089
raise errors.BzrError('Mismatched key parent %r:%r' %
1091
parents.append(parent_key[1])
1092
text_lines = osutils.split_lines(next(repo.texts.get_record_stream(
1093
[key], 'unordered', True)).get_bytes_as('fulltext'))
1094
output_texts.add_lines(key, parent_keys, text_lines,
1095
random_id=True, check_content=False)
1096
# 5) check that nothing inserted has a reference outside the keyspace.
1097
missing_text_keys = self.new_pack.text_index._external_references()
1098
if missing_text_keys:
1099
raise errors.BzrCheckError('Reference to missing compression parents %r'
1100
% (missing_text_keys,))
1101
self._log_copied_texts()
1103
def _use_pack(self, new_pack):
1104
"""Override _use_pack to check for reconcile having changed content."""
1105
# XXX: we might be better checking this at the copy time.
1106
original_inventory_keys = set()
1107
inv_index = self._pack_collection.inventory_index.combined_index
1108
for entry in inv_index.iter_all_entries():
1109
original_inventory_keys.add(entry[1])
1110
new_inventory_keys = set()
1111
for entry in new_pack.inventory_index.iter_all_entries():
1112
new_inventory_keys.add(entry[1])
1113
if new_inventory_keys != original_inventory_keys:
1114
self._data_changed = True
1115
return new_pack.data_inserted() and self._data_changed
1118
class OptimisingKnitPacker(KnitPacker):
1119
"""A packer which spends more time to create better disk layouts."""
1121
def _revision_node_readv(self, revision_nodes):
1122
"""Return the total revisions and the readv's to issue.
1124
This sort places revisions in topological order with the ancestors
1127
:param revision_nodes: The revision index contents for the packs being
1128
incorporated into the new pack.
1129
:return: As per _least_readv_node_readv.
1131
# build an ancestors dict
1134
for index, key, value, references in revision_nodes:
1135
ancestors[key] = references[0]
1136
by_key[key] = (index, value, references)
1137
order = tsort.topo_sort(ancestors)
1139
# Single IO is pathological, but it will work as a starting point.
1141
for key in reversed(order):
1142
index, value, references = by_key[key]
1143
# ---- KnitGraphIndex.get_position
1144
bits = value[1:].split(b' ')
1145
offset, length = int(bits[0]), int(bits[1])
1147
(index, [(offset, length)], [(key, value[0:1], references)]))
1148
# TODO: combine requests in the same index that are in ascending order.
1149
return total, requests
1151
def open_pack(self):
1152
"""Open a pack for the pack we are creating."""
1153
new_pack = super(OptimisingKnitPacker, self).open_pack()
1154
# Turn on the optimization flags for all the index builders.
1155
new_pack.revision_index.set_optimize(for_size=True)
1156
new_pack.inventory_index.set_optimize(for_size=True)
1157
new_pack.text_index.set_optimize(for_size=True)
1158
new_pack.signature_index.set_optimize(for_size=True)
1162
class KnitRepositoryPackCollection(RepositoryPackCollection):
1163
"""A knit pack collection."""
1165
pack_factory = NewPack
1166
resumed_pack_factory = ResumedPack
1167
normal_packer_class = KnitPacker
1168
optimising_packer_class = OptimisingKnitPacker