154
151
texts/deltas (via (fileid, revisionid) tuples).
155
152
:param signature_index: A GraphIndex for determining what signatures are
156
153
present in the Pack and accessing the locations of their texts.
157
:param chk_index: A GraphIndex for accessing content by CHK, if the
160
155
self.revision_index = revision_index
161
156
self.inventory_index = inventory_index
162
157
self.text_index = text_index
163
158
self.signature_index = signature_index
164
self.chk_index = chk_index
166
160
def access_tuple(self):
167
161
"""Return a tuple (transport, name) for the pack content."""
228
222
return self.index_name('text', name)
230
224
def _replace_index_with_readonly(self, index_type):
231
unlimited_cache = False
232
if index_type == 'chk':
233
unlimited_cache = True
234
225
setattr(self, index_type + '_index',
235
226
self.index_class(self.index_transport,
236
227
self.index_name(index_type, self.name),
237
self.index_sizes[self.index_offset(index_type)],
238
unlimited_cache=unlimited_cache))
228
self.index_sizes[self.index_offset(index_type)]))
241
231
class ExistingPack(Pack):
242
232
"""An in memory proxy for an existing .pack and its disk indices."""
244
234
def __init__(self, pack_transport, name, revision_index, inventory_index,
245
text_index, signature_index, chk_index=None):
235
text_index, signature_index):
246
236
"""Create an ExistingPack object.
248
238
:param pack_transport: The transport where the pack file resides.
249
239
:param name: The name of the pack on disk in the pack_transport.
251
241
Pack.__init__(self, revision_index, inventory_index, text_index,
252
signature_index, chk_index)
254
244
self.pack_transport = pack_transport
255
245
if None in (revision_index, inventory_index, text_index,
273
263
def __init__(self, name, revision_index, inventory_index, text_index,
274
264
signature_index, upload_transport, pack_transport, index_transport,
275
pack_collection, chk_index=None):
276
266
"""Create a ResumedPack object."""
277
267
ExistingPack.__init__(self, pack_transport, name, revision_index,
278
inventory_index, text_index, signature_index,
268
inventory_index, text_index, signature_index)
280
269
self.upload_transport = upload_transport
281
270
self.index_transport = index_transport
282
271
self.index_sizes = [None, None, None, None]
309
295
self.upload_transport.delete(self.file_name())
310
296
indices = [self.revision_index, self.inventory_index, self.text_index,
311
297
self.signature_index]
312
if self.chk_index is not None:
313
indices.append(self.chk_index)
314
298
for index in indices:
315
299
index._transport.delete(index._name)
317
301
def finish(self):
318
302
self._check_references()
319
index_types = ['revision', 'inventory', 'text', 'signature']
320
if self.chk_index is not None:
321
index_types.append('chk')
322
for index_type in index_types:
303
new_name = '../packs/' + self.file_name()
304
self.upload_transport.rename(self.file_name(), new_name)
305
for index_type in ['revision', 'inventory', 'text', 'signature']:
323
306
old_name = self.index_name(index_type, self.name)
324
307
new_name = '../indices/' + old_name
325
308
self.upload_transport.rename(old_name, new_name)
326
309
self._replace_index_with_readonly(index_type)
327
new_name = '../packs/' + self.file_name()
328
self.upload_transport.rename(self.file_name(), new_name)
329
310
self._state = 'finished'
331
312
def _get_external_refs(self, index):
332
"""Return compression parents for this index that are not present.
334
This returns any compression parents that are referenced by this index,
335
which are not contained *in* this index. They may be present elsewhere.
337
313
return index.external_references(1)
385
355
self._file_mode = file_mode
386
356
# tracks the content written to the .pack file.
387
357
self._hash = osutils.md5()
388
# a tuple with the length in bytes of the indices, once the pack
389
# is finalised. (rev, inv, text, sigs, chk_if_in_use)
358
# a four-tuple with the length in bytes of the indices, once the pack
359
# is finalised. (rev, inv, text, sigs)
390
360
self.index_sizes = None
391
361
# How much data to cache when writing packs. Note that this is not
392
362
# synchronised with reads, because it's not in the transport layer, so
455
423
return bool(self.get_revision_count() or
456
424
self.inventory_index.key_count() or
457
425
self.text_index.key_count() or
458
self.signature_index.key_count() or
459
(self.chk_index is not None and self.chk_index.key_count()))
461
def finish_content(self):
462
if self.name is not None:
466
self._write_data('', flush=True)
467
self.name = self._hash.hexdigest()
426
self.signature_index.key_count())
469
428
def finish(self, suspend=False):
470
429
"""Finish the new pack.
492
454
self._write_index('text', self.text_index, 'file texts', suspend)
493
455
self._write_index('signature', self.signature_index,
494
456
'revision signatures', suspend)
495
if self.chk_index is not None:
496
self.index_sizes.append(None)
497
self._write_index('chk', self.chk_index,
498
'content hash bytes', suspend)
499
457
self.write_stream.close()
500
458
# Note that this will clobber an existing pack with the same name,
501
459
# without checking for hash collisions. While this is undesirable this
589
547
flush_func=flush_func)
590
548
self.add_callback = None
550
def replace_indices(self, index_to_pack, indices):
551
"""Replace the current mappings with fresh ones.
553
This should probably not be used eventually, rather incremental add and
554
removal of indices. It has been added during refactoring of existing
557
:param index_to_pack: A mapping from index objects to
558
(transport, name) tuples for the pack file data.
559
:param indices: A list of indices.
561
# refresh the revision pack map dict without replacing the instance.
562
self.index_to_pack.clear()
563
self.index_to_pack.update(index_to_pack)
564
# XXX: API break - clearly a 'replace' method would be good?
565
self.combined_index._indices[:] = indices
566
# the current add nodes callback for the current writable index if
568
self.add_callback = None
592
570
def add_index(self, index, pack):
593
571
"""Add index to the aggregate, which is an index for Pack pack.
627
605
self.data_access.set_writer(None, None, (None, None))
628
606
self.index_to_pack.clear()
629
607
del self.combined_index._indices[:]
630
del self.combined_index._index_names[:]
631
608
self.add_callback = None
633
def remove_index(self, index):
610
def remove_index(self, index, pack):
634
611
"""Remove index from the indices used to answer queries.
636
613
:param index: An index from the pack parameter.
614
:param pack: A Pack instance.
638
616
del self.index_to_pack[index]
639
pos = self.combined_index._indices.index(index)
640
del self.combined_index._indices[pos]
641
del self.combined_index._index_names[pos]
617
self.combined_index._indices.remove(index)
642
618
if (self.add_callback is not None and
643
619
getattr(index, 'add_nodes', None) == self.add_callback):
644
620
self.add_callback = None
922
897
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
923
898
new_pack.signature_index.key_count(),
924
899
time.time() - new_pack.start_time)
926
# NB XXX: how to check CHK references are present? perhaps by yielding
927
# the items? How should that interact with stacked repos?
928
if new_pack.chk_index is not None:
930
if 'pack' in debug.debug_flags:
931
mutter('%s: create_pack: chk content copied: %s%s %d items t+%6.3fs',
932
time.ctime(), self._pack_collection._upload_transport.base,
933
new_pack.random_name,
934
new_pack.chk_index.key_count(),
935
time.time() - new_pack.start_time)
936
900
new_pack._check_references()
937
901
if not self._use_pack(new_pack):
942
906
self._pack_collection.allocate(new_pack)
945
def _copy_chks(self, refs=None):
946
# XXX: Todo, recursive follow-pointers facility when fetching some
948
chk_index_map, chk_indices = self._pack_map_and_index_list(
950
chk_nodes = self._index_contents(chk_indices, refs)
952
# TODO: This isn't strictly tasteful as we are accessing some private
953
# variables (_serializer). Perhaps a better way would be to have
954
# Repository._deserialise_chk_node()
955
search_key_func = chk_map.search_key_registry.get(
956
self._pack_collection.repo._serializer.search_key_name)
957
def accumlate_refs(lines):
958
# XXX: move to a generic location
960
bytes = ''.join(lines)
961
node = chk_map._deserialise(bytes, ("unknown",), search_key_func)
962
new_refs.update(node.refs())
963
self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,
964
self.new_pack.chk_index, output_lines=accumlate_refs)
967
def _copy_nodes(self, nodes, index_map, writer, write_index,
969
"""Copy knit nodes between packs with no graph references.
971
:param output_lines: Output full texts of copied items.
909
def _copy_nodes(self, nodes, index_map, writer, write_index):
910
"""Copy knit nodes between packs with no graph references."""
973
911
pb = ui.ui_factory.nested_progress_bar()
975
913
return self._do_copy_nodes(nodes, index_map, writer,
976
write_index, pb, output_lines=output_lines)
980
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
918
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):
982
919
# for record verification
983
920
knit = KnitVersionedFiles(None, None)
984
921
# plan a readv on each source pack:
1018
955
izip(reader.iter_records(), pack_readv_requests):
1019
956
raw_data = read_func(None)
1020
957
# check the header only
1021
if output_lines is not None:
1022
output_lines(knit._parse_record(key[-1], raw_data)[0])
1024
df, _ = knit._parse_record_header(key, raw_data)
958
df, _ = knit._parse_record_header(key, raw_data)
1026
960
pos, size = writer.add_bytes_record(raw_data, names)
1027
961
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
1028
962
pb.update("Copied record", record_index)
1358
1292
:ivar _names: map of {pack_name: (index_size,)}
1361
pack_factory = NewPack
1362
resumed_pack_factory = ResumedPack
1364
1295
def __init__(self, repo, transport, index_transport, upload_transport,
1365
pack_transport, index_builder_class, index_class,
1296
pack_transport, index_builder_class, index_class):
1367
1297
"""Create a new RepositoryPackCollection.
1369
1299
:param transport: Addresses the repository base directory
1400
1328
self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1401
1329
self.text_index = AggregateIndex(self.reload_pack_names, flush)
1402
1330
self.signature_index = AggregateIndex(self.reload_pack_names, flush)
1403
all_indices = [self.revision_index, self.inventory_index,
1404
self.text_index, self.signature_index]
1406
self.chk_index = AggregateIndex(self.reload_pack_names, flush)
1407
all_indices.append(self.chk_index)
1409
# used to determine if we're using a chk_index elsewhere.
1410
self.chk_index = None
1411
# Tell all the CombinedGraphIndex objects about each other, so they can
1412
# share hints about which pack names to search first.
1413
all_combined = [agg_idx.combined_index for agg_idx in all_indices]
1414
for combined_idx in all_combined:
1415
combined_idx.set_sibling_indices(
1416
set(all_combined).difference([combined_idx]))
1417
1331
# resumed packs
1418
1332
self._resumed_packs = []
1421
return '%s(%r)' % (self.__class__.__name__, self.repo)
1423
1334
def add_pack_to_memory(self, pack):
1424
1335
"""Make a Pack object available to the repository to satisfy queries.
1506
1417
'containing %d revisions. Packing %d files into %d affecting %d'
1507
1418
' revisions', self, total_packs, total_revisions, num_old_packs,
1508
1419
num_new_packs, num_revs_affected)
1509
result = self._execute_pack_operations(pack_operations,
1420
self._execute_pack_operations(pack_operations,
1510
1421
reload_func=self._restart_autopack)
1511
mutter('Auto-packing repository %s completed', self)
1514
1424
def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
1515
1425
reload_func=None):
1539
1449
self._remove_pack_from_memory(pack)
1540
1450
# record the newly available packs and stop advertising the old
1542
to_be_obsoleted = []
1543
for _, packs in pack_operations:
1544
to_be_obsoleted.extend(packs)
1545
result = self._save_pack_names(clear_obsolete_packs=True,
1546
obsolete_packs=to_be_obsoleted)
1452
self._save_pack_names(clear_obsolete_packs=True)
1453
# Move the old packs out of the way now they are no longer referenced.
1454
for revision_count, packs in pack_operations:
1455
self._obsolete_packs(packs)
1549
1457
def _flush_new_pack(self):
1550
1458
if self._new_pack is not None:
1559
1467
self.repo.control_files.lock_write()
1561
def _already_packed(self):
1562
"""Is the collection already packed?"""
1563
return not (self.repo._format.pack_compresses or (len(self._names) > 1))
1565
def pack(self, hint=None, clean_obsolete_packs=False):
1566
1470
"""Pack the pack collection totally."""
1567
1471
self.ensure_loaded()
1568
1472
total_packs = len(self._names)
1569
if self._already_packed():
1474
# This is arguably wrong because we might not be optimal, but for
1475
# now lets leave it in. (e.g. reconcile -> one pack. But not
1571
1478
total_revisions = self.revision_index.combined_index.key_count()
1572
1479
# XXX: the following may want to be a class, to pack with a given
1574
1481
mutter('Packing repository %s, which has %d pack files, '
1575
'containing %d revisions with hint %r.', self, total_packs,
1576
total_revisions, hint)
1482
'containing %d revisions into 1 packs.', self, total_packs,
1577
1484
# determine which packs need changing
1485
pack_distribution = [1]
1578
1486
pack_operations = [[0, []]]
1579
1487
for pack in self.all_packs():
1580
if hint is None or pack.name in hint:
1581
# Either no hint was provided (so we are packing everything),
1582
# or this pack was included in the hint.
1583
pack_operations[-1][0] += pack.get_revision_count()
1584
pack_operations[-1][1].append(pack)
1488
pack_operations[-1][0] += pack.get_revision_count()
1489
pack_operations[-1][1].append(pack)
1585
1490
self._execute_pack_operations(pack_operations, OptimisingPacker)
1587
if clean_obsolete_packs:
1588
self._clear_obsolete_packs()
1590
1492
def plan_autopack_combinations(self, existing_packs, pack_distribution):
1591
1493
"""Plan a pack operation.
1679
1581
inv_index = self._make_index(name, '.iix')
1680
1582
txt_index = self._make_index(name, '.tix')
1681
1583
sig_index = self._make_index(name, '.six')
1682
if self.chk_index is not None:
1683
chk_index = self._make_index(name, '.cix', unlimited_cache=True)
1686
1584
result = ExistingPack(self._pack_transport, name, rev_index,
1687
inv_index, txt_index, sig_index, chk_index)
1585
inv_index, txt_index, sig_index)
1688
1586
self.add_pack_to_memory(result)
1704
1602
inv_index = self._make_index(name, '.iix', resume=True)
1705
1603
txt_index = self._make_index(name, '.tix', resume=True)
1706
1604
sig_index = self._make_index(name, '.six', resume=True)
1707
if self.chk_index is not None:
1708
chk_index = self._make_index(name, '.cix', resume=True,
1709
unlimited_cache=True)
1712
result = self.resumed_pack_factory(name, rev_index, inv_index,
1713
txt_index, sig_index, self._upload_transport,
1714
self._pack_transport, self._index_transport, self,
1715
chk_index=chk_index)
1605
result = ResumedPack(name, rev_index, inv_index, txt_index,
1606
sig_index, self._upload_transport, self._pack_transport,
1607
self._index_transport, self)
1716
1608
except errors.NoSuchFile, e:
1717
1609
raise errors.UnresumableWriteGroup(self.repo, [name], str(e))
1718
1610
self.add_pack_to_memory(result)
1786
1677
:param return: None.
1788
1679
for pack in packs:
1790
pack.pack_transport.rename(pack.file_name(),
1791
'../obsolete_packs/' + pack.file_name())
1792
except (errors.PathError, errors.TransportError), e:
1793
# TODO: Should these be warnings or mutters?
1794
mutter("couldn't rename obsolete pack, skipping it:\n%s"
1680
pack.pack_transport.rename(pack.file_name(),
1681
'../obsolete_packs/' + pack.file_name())
1796
1682
# TODO: Probably needs to know all possible indices for this pack
1797
1683
# - or maybe list the directory and move all indices matching this
1798
1684
# name whether we recognize it or not?
1799
suffixes = ['.iix', '.six', '.tix', '.rix']
1800
if self.chk_index is not None:
1801
suffixes.append('.cix')
1802
for suffix in suffixes:
1804
self._index_transport.rename(pack.name + suffix,
1805
'../obsolete_packs/' + pack.name + suffix)
1806
except (errors.PathError, errors.TransportError), e:
1807
mutter("couldn't rename obsolete index, skipping it:\n%s"
1685
for suffix in ('.iix', '.six', '.tix', '.rix'):
1686
self._index_transport.rename(pack.name + suffix,
1687
'../obsolete_packs/' + pack.name + suffix)
1810
1689
def pack_distribution(self, total_revisions):
1811
1690
"""Generate a list of the number of revisions to put in each pack.
1837
1716
self._remove_pack_indices(pack)
1838
1717
self.packs.remove(pack)
1840
def _remove_pack_indices(self, pack, ignore_missing=False):
1841
"""Remove the indices for pack from the aggregated indices.
1843
:param ignore_missing: Suppress KeyErrors from calling remove_index.
1845
for index_type in Pack.index_definitions.keys():
1846
attr_name = index_type + '_index'
1847
aggregate_index = getattr(self, attr_name)
1848
if aggregate_index is not None:
1849
pack_index = getattr(pack, attr_name)
1851
aggregate_index.remove_index(pack_index)
1719
def _remove_pack_indices(self, pack):
1720
"""Remove the indices for pack from the aggregated indices."""
1721
self.revision_index.remove_index(pack.revision_index, pack)
1722
self.inventory_index.remove_index(pack.inventory_index, pack)
1723
self.text_index.remove_index(pack.text_index, pack)
1724
self.signature_index.remove_index(pack.signature_index, pack)
1857
1726
def reset(self):
1858
1727
"""Clear all cached data."""
1859
1728
# cached revision data
1729
self.repo._revision_knit = None
1860
1730
self.revision_index.clear()
1861
1731
# cached signature data
1732
self.repo._signature_knit = None
1862
1733
self.signature_index.clear()
1863
1734
# cached file text data
1864
1735
self.text_index.clear()
1736
self.repo._text_knit = None
1865
1737
# cached inventory data
1866
1738
self.inventory_index.clear()
1868
if self.chk_index is not None:
1869
self.chk_index.clear()
1870
1739
# remove the open pack
1871
1740
self._new_pack = None
1872
1741
# information about packs.
1967
1835
:param clear_obsolete_packs: If True, clear out the contents of the
1968
1836
obsolete_packs directory.
1969
:param obsolete_packs: Packs that are obsolete once the new pack-names
1970
file has been written.
1971
:return: A list of the names saved that were not previously on disk.
1973
already_obsolete = []
1974
1838
self.lock_names()
1976
1840
builder = self._index_builder_class()
1977
(disk_nodes, deleted_nodes, new_nodes,
1978
orig_disk_nodes) = self._diff_pack_names()
1841
disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
1979
1842
# TODO: handle same-name, index-size-changes here -
1980
1843
# e.g. use the value from disk, not ours, *unless* we're the one
1983
1846
builder.add_node(key, value)
1984
1847
self.transport.put_file('pack-names', builder.finish(),
1985
1848
mode=self.repo.bzrdir._get_file_mode())
1849
# move the baseline forward
1986
1850
self._packs_at_load = disk_nodes
1987
1851
if clear_obsolete_packs:
1990
to_preserve = set([o.name for o in obsolete_packs])
1991
already_obsolete = self._clear_obsolete_packs(to_preserve)
1852
self._clear_obsolete_packs()
1993
1854
self._unlock_names()
1994
1855
# synchronise the memory packs list with what we just wrote:
1995
1856
self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1997
# TODO: We could add one more condition here. "if o.name not in
1998
# orig_disk_nodes and o != the new_pack we haven't written to
1999
# disk yet. However, the new pack object is not easily
2000
# accessible here (it would have to be passed through the
2001
# autopacking code, etc.)
2002
obsolete_packs = [o for o in obsolete_packs
2003
if o.name not in already_obsolete]
2004
self._obsolete_packs(obsolete_packs)
2005
return [new_node[0][0] for new_node in new_nodes]
2007
1858
def reload_pack_names(self):
2008
1859
"""Sync our pack listing with what is present in the repository.
2024
1875
# out the new value.
2025
(disk_nodes, deleted_nodes, new_nodes,
2026
orig_disk_nodes) = self._diff_pack_names()
2027
# _packs_at_load is meant to be the explicit list of names in
2028
# 'pack-names' at then start. As such, it should not contain any
2029
# pending names that haven't been written out yet.
2030
self._packs_at_load = orig_disk_nodes
1876
disk_nodes, _, _ = self._diff_pack_names()
1877
self._packs_at_load = disk_nodes
2031
1878
(removed, added,
2032
1879
modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
2033
1880
if removed or added or modified:
2043
1890
raise errors.RetryAutopack(self.repo, False, sys.exc_info())
2045
def _clear_obsolete_packs(self, preserve=None):
1892
def _clear_obsolete_packs(self):
2046
1893
"""Delete everything from the obsolete-packs directory.
2048
:return: A list of pack identifiers (the filename without '.pack') that
2049
were found in obsolete_packs.
2052
1895
obsolete_pack_transport = self.transport.clone('obsolete_packs')
2053
if preserve is None:
2055
1896
for filename in obsolete_pack_transport.list_dir('.'):
2056
name, ext = osutils.splitext(filename)
2059
if name in preserve:
2062
1898
obsolete_pack_transport.delete(filename)
2063
1899
except (errors.PathError, errors.TransportError), e:
2064
warning("couldn't delete obsolete pack, skipping it:\n%s"
1900
warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
2068
1902
def _start_write_group(self):
2069
1903
# Do not permit preparation for writing if we're not in a 'write lock'.
2070
1904
if not self.repo.is_write_locked():
2071
1905
raise errors.NotWriteLocked(self)
2072
self._new_pack = self.pack_factory(self, upload_suffix='.pack',
1906
self._new_pack = NewPack(self, upload_suffix='.pack',
2073
1907
file_mode=self.repo.bzrdir._get_file_mode())
2074
1908
# allow writing: queue writes to a new index
2075
1909
self.revision_index.add_writable_index(self._new_pack.revision_index,
2078
1912
self._new_pack)
2079
1913
self.text_index.add_writable_index(self._new_pack.text_index,
2080
1914
self._new_pack)
2081
self._new_pack.text_index.set_optimize(combine_backing_indices=False)
2082
1915
self.signature_index.add_writable_index(self._new_pack.signature_index,
2083
1916
self._new_pack)
2084
if self.chk_index is not None:
2085
self.chk_index.add_writable_index(self._new_pack.chk_index,
2087
self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
2088
self._new_pack.chk_index.set_optimize(combine_backing_indices=False)
2090
1918
self.repo.inventories._index._add_callback = self.inventory_index.add_callback
2091
1919
self.repo.revisions._index._add_callback = self.revision_index.add_callback
2096
1924
# FIXME: just drop the transient index.
2097
1925
# forget what names there are
2098
1926
if self._new_pack is not None:
2099
operation = cleanup.OperationWithCleanups(self._new_pack.abort)
2100
operation.add_cleanup(setattr, self, '_new_pack', None)
2101
# If we aborted while in the middle of finishing the write
2102
# group, _remove_pack_indices could fail because the indexes are
2103
# already gone. But they're not there we shouldn't fail in this
2104
# case, so we pass ignore_missing=True.
2105
operation.add_cleanup(self._remove_pack_indices, self._new_pack,
2106
ignore_missing=True)
2107
operation.run_simple()
1928
self._new_pack.abort()
1930
# XXX: If we aborted while in the middle of finishing the write
1931
# group, _remove_pack_indices can fail because the indexes are
1932
# already gone. If they're not there we shouldn't fail in this
1933
# case. -- mbp 20081113
1934
self._remove_pack_indices(self._new_pack)
1935
self._new_pack = None
2108
1936
for resumed_pack in self._resumed_packs:
2109
operation = cleanup.OperationWithCleanups(resumed_pack.abort)
2110
# See comment in previous finally block.
2111
operation.add_cleanup(self._remove_pack_indices, resumed_pack,
2112
ignore_missing=True)
2113
operation.run_simple()
1938
resumed_pack.abort()
1940
# See comment in previous finally block.
1942
self._remove_pack_indices(resumed_pack)
2114
1945
del self._resumed_packs[:]
1946
self.repo._text_knit = None
2116
1948
def _remove_resumed_pack_indices(self):
2117
1949
for resumed_pack in self._resumed_packs:
2118
1950
self._remove_pack_indices(resumed_pack)
2119
1951
del self._resumed_packs[:]
2121
def _check_new_inventories(self):
2122
"""Detect missing inventories in this write group.
2124
:returns: list of strs, summarising any problems found. If the list is
2125
empty no problems were found.
2127
# The base implementation does no checks. GCRepositoryPackCollection
2131
1953
def _commit_write_group(self):
2132
1954
all_missing = set()
2133
1955
for prefix, versioned_file in (
2142
1964
raise errors.BzrCheckError(
2143
1965
"Repository %s has missing compression parent(s) %r "
2144
1966
% (self.repo, sorted(all_missing)))
2145
problems = self._check_new_inventories()
2147
problems_summary = '\n'.join(problems)
2148
raise errors.BzrCheckError(
2149
"Cannot add revision(s) to repository: " + problems_summary)
2150
1967
self._remove_pack_indices(self._new_pack)
2151
any_new_content = False
1968
should_autopack = False
2152
1969
if self._new_pack.data_inserted():
2153
1970
# get all the data to disk and read to use
2154
1971
self._new_pack.finish()
2155
1972
self.allocate(self._new_pack)
2156
1973
self._new_pack = None
2157
any_new_content = True
1974
should_autopack = True
2159
1976
self._new_pack.abort()
2160
1977
self._new_pack = None
2242
2056
self.revisions = KnitVersionedFiles(
2243
2057
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
2244
2058
add_callback=self._pack_collection.revision_index.add_callback,
2245
deltas=False, parents=True, is_locked=self.is_locked,
2246
track_external_parent_refs=True),
2059
deltas=False, parents=True, is_locked=self.is_locked),
2247
2060
data_access=self._pack_collection.revision_index.data_access,
2248
2061
max_delta_chain=0)
2249
2062
self.signatures = KnitVersionedFiles(
2258
2071
deltas=True, parents=True, is_locked=self.is_locked),
2259
2072
data_access=self._pack_collection.text_index.data_access,
2260
2073
max_delta_chain=200)
2261
if _format.supports_chks:
2262
# No graph, no compression:- references from chks are between
2263
# different objects not temporal versions of the same; and without
2264
# some sort of temporal structure knit compression will just fail.
2265
self.chk_bytes = KnitVersionedFiles(
2266
_KnitGraphIndex(self._pack_collection.chk_index.combined_index,
2267
add_callback=self._pack_collection.chk_index.add_callback,
2268
deltas=False, parents=False, is_locked=self.is_locked),
2269
data_access=self._pack_collection.chk_index.data_access,
2272
self.chk_bytes = None
2273
2074
# True when the repository object is 'write locked' (as opposed to the
2274
2075
# physical lock only taken out around changes to the pack-names list.)
2275
2076
# Another way to represent this would be a decorator around the control
2282
2083
self._reconcile_fixes_text_parents = True
2283
2084
self._reconcile_backsup_inventory = False
2285
def _warn_if_deprecated(self, branch=None):
2086
def _warn_if_deprecated(self):
2286
2087
# This class isn't deprecated, but one sub-format is
2287
2088
if isinstance(self._format, RepositoryFormatKnitPack5RichRootBroken):
2288
super(KnitPackRepository, self)._warn_if_deprecated(branch)
2089
from bzrlib import repository
2090
if repository._deprecation_warning_done:
2092
repository._deprecation_warning_done = True
2093
warning("Format %s for %s is deprecated - please use"
2094
" 'bzr upgrade --1.6.1-rich-root'"
2095
% (self._format, self.bzrdir.transport.base))
2290
2097
def _abort_write_group(self):
2291
self.revisions._index._key_dependencies.clear()
2292
2098
self._pack_collection._abort_write_group()
2294
def _get_source(self, to_format):
2295
if to_format.network_name() == self._format.network_name():
2296
return KnitPackStreamSource(self, to_format)
2297
return super(KnitPackRepository, self)._get_source(to_format)
2100
def _find_inconsistent_revision_parents(self):
2101
"""Find revisions with incorrectly cached parents.
2103
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
2104
parents-in-revision).
2106
if not self.is_locked():
2107
raise errors.ObjectNotLocked(self)
2108
pb = ui.ui_factory.nested_progress_bar()
2111
revision_nodes = self._pack_collection.revision_index \
2112
.combined_index.iter_all_entries()
2113
index_positions = []
2114
# Get the cached index values for all revisions, and also the location
2115
# in each index of the revision text so we can perform linear IO.
2116
for index, key, value, refs in revision_nodes:
2117
pos, length = value[1:].split(' ')
2118
index_positions.append((index, int(pos), key[0],
2119
tuple(parent[0] for parent in refs[0])))
2120
pb.update("Reading revision index", 0, 0)
2121
index_positions.sort()
2122
batch_count = len(index_positions) / 1000 + 1
2123
pb.update("Checking cached revision graph", 0, batch_count)
2124
for offset in xrange(batch_count):
2125
pb.update("Checking cached revision graph", offset)
2126
to_query = index_positions[offset * 1000:(offset + 1) * 1000]
2129
rev_ids = [item[2] for item in to_query]
2130
revs = self.get_revisions(rev_ids)
2131
for revision, item in zip(revs, to_query):
2132
index_parents = item[3]
2133
rev_parents = tuple(revision.parent_ids)
2134
if index_parents != rev_parents:
2135
result.append((revision.revision_id, index_parents, rev_parents))
2299
2140
def _make_parents_provider(self):
2300
2141
return graph.CachingParentsProvider(self)
2308
2149
self._pack_collection._start_write_group()
2310
2151
def _commit_write_group(self):
2311
hint = self._pack_collection._commit_write_group()
2312
self.revisions._index._key_dependencies.clear()
2152
return self._pack_collection._commit_write_group()
2315
2154
def suspend_write_group(self):
2316
2155
# XXX check self._write_group is self.get_transaction()?
2317
2156
tokens = self._pack_collection._suspend_write_group()
2318
self.revisions._index._key_dependencies.clear()
2319
2157
self._write_group = None
2322
2160
def _resume_write_group(self, tokens):
2323
2161
self._start_write_group()
2325
self._pack_collection._resume_write_group(tokens)
2326
except errors.UnresumableWriteGroup:
2327
self._abort_write_group()
2329
for pack in self._pack_collection._resumed_packs:
2330
self.revisions._index.scan_unvalidated_index(pack.revision_index)
2162
self._pack_collection._resume_write_group(tokens)
2332
2164
def get_transaction(self):
2333
2165
if self._write_lock_count:
2342
2174
return self._write_lock_count
2344
2176
def lock_write(self, token=None):
2345
"""Lock the repository for writes.
2347
:return: A bzrlib.repository.RepositoryWriteLockResult.
2349
2177
locked = self.is_locked()
2350
2178
if not self._write_lock_count and locked:
2351
2179
raise errors.ReadOnlyError(self)
2352
2180
self._write_lock_count += 1
2353
2181
if self._write_lock_count == 1:
2354
2182
self._transaction = transactions.WriteTransaction()
2356
if 'relock' in debug.debug_flags and self._prev_lock == 'w':
2357
note('%r was write locked again', self)
2358
self._prev_lock = 'w'
2359
2183
for repo in self._fallback_repositories:
2360
2184
# Writes don't affect fallback repos
2361
2185
repo.lock_read()
2362
2187
self._refresh_data()
2363
return RepositoryWriteLockResult(self.unlock, None)
2365
2189
def lock_read(self):
2366
"""Lock the repository for reads.
2368
:return: A bzrlib.lock.LogicalLockResult.
2370
2190
locked = self.is_locked()
2371
2191
if self._write_lock_count:
2372
2192
self._write_lock_count += 1
2374
2194
self.control_files.lock_read()
2376
if 'relock' in debug.debug_flags and self._prev_lock == 'r':
2377
note('%r was read locked again', self)
2378
self._prev_lock = 'r'
2379
2195
for repo in self._fallback_repositories:
2196
# Writes don't affect fallback repos
2380
2197
repo.lock_read()
2381
2199
self._refresh_data()
2382
return LogicalLockResult(self.unlock)
2384
2201
def leave_lock_in_place(self):
2385
2202
# not supported - raise an error
2425
2237
transaction = self._transaction
2426
2238
self._transaction = None
2427
2239
transaction.finish()
2240
for repo in self._fallback_repositories:
2429
2243
self.control_files.unlock()
2431
if not self.is_locked():
2432
2244
for repo in self._fallback_repositories:
2436
class KnitPackStreamSource(StreamSource):
2437
"""A StreamSource used to transfer data between same-format KnitPack repos.
2439
This source assumes:
2440
1) Same serialization format for all objects
2441
2) Same root information
2442
3) XML format inventories
2443
4) Atomic inserts (so we can stream inventory texts before text
2448
def __init__(self, from_repository, to_format):
2449
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
2450
self._text_keys = None
2451
self._text_fetch_order = 'unordered'
2453
def _get_filtered_inv_stream(self, revision_ids):
2454
from_repo = self.from_repository
2455
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
2456
parent_keys = [(p,) for p in parent_ids]
2457
find_text_keys = from_repo._find_text_key_references_from_xml_inventory_lines
2458
parent_text_keys = set(find_text_keys(
2459
from_repo._inventory_xml_lines_for_keys(parent_keys)))
2460
content_text_keys = set()
2461
knit = KnitVersionedFiles(None, None)
2462
factory = KnitPlainFactory()
2463
def find_text_keys_from_content(record):
2464
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
2465
raise ValueError("Unknown content storage kind for"
2466
" inventory text: %s" % (record.storage_kind,))
2467
# It's a knit record, it has a _raw_record field (even if it was
2468
# reconstituted from a network stream).
2469
raw_data = record._raw_record
2470
# read the entire thing
2471
revision_id = record.key[-1]
2472
content, _ = knit._parse_record(revision_id, raw_data)
2473
if record.storage_kind == 'knit-delta-gz':
2474
line_iterator = factory.get_linedelta_content(content)
2475
elif record.storage_kind == 'knit-ft-gz':
2476
line_iterator = factory.get_fulltext_content(content)
2477
content_text_keys.update(find_text_keys(
2478
[(line, revision_id) for line in line_iterator]))
2479
revision_keys = [(r,) for r in revision_ids]
2480
def _filtered_inv_stream():
2481
source_vf = from_repo.inventories
2482
stream = source_vf.get_record_stream(revision_keys,
2484
for record in stream:
2485
if record.storage_kind == 'absent':
2486
raise errors.NoSuchRevision(from_repo, record.key)
2487
find_text_keys_from_content(record)
2489
self._text_keys = content_text_keys - parent_text_keys
2490
return ('inventories', _filtered_inv_stream())
2492
def _get_text_stream(self):
2493
# Note: We know we don't have to handle adding root keys, because both
2494
# the source and target are the identical network name.
2495
text_stream = self.from_repository.texts.get_record_stream(
2496
self._text_keys, self._text_fetch_order, False)
2497
return ('texts', text_stream)
2499
def get_stream(self, search):
2500
revision_ids = search.get_keys()
2501
for stream_info in self._fetch_revision_texts(revision_ids):
2503
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
2504
yield self._get_filtered_inv_stream(revision_ids)
2505
yield self._get_text_stream()
2509
2248
class RepositoryFormatPack(MetaDirRepositoryFormat):
2510
2249
"""Format logic for pack structured repositories.
2558
2295
utf8_files = [('format', self.get_format_string())]
2560
2297
self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
2561
repository = self.open(a_bzrdir=a_bzrdir, _found=True)
2562
self._run_post_repo_init_hooks(repository, a_bzrdir, shared)
2298
return self.open(a_bzrdir=a_bzrdir, _found=True)
2565
2300
def open(self, a_bzrdir, _found=False, _override_transport=None):
2566
2301
"""See RepositoryFormat.open().
2648
2385
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2387
def check_conversion_target(self, target_format):
2388
if not target_format.rich_root_data:
2389
raise errors.BadConversionTarget(
2390
'Does not support rich root data.', target_format)
2391
if not getattr(target_format, 'supports_tree_reference', False):
2392
raise errors.BadConversionTarget(
2393
'Does not support nested trees', target_format)
2650
2395
def get_format_string(self):
2651
2396
"""See RepositoryFormat.get_format_string()."""
2652
2397
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
2889
2660
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2663
class RepositoryFormatPackDevelopment2(RepositoryFormatPack):
2664
"""A no-subtrees development repository.
2666
This format should be retained until the second release after bzr 1.7.
2668
This is pack-1.6.1 with B+Tree indices.
2671
repository_class = KnitPackRepository
2672
_commit_builder_class = PackCommitBuilder
2673
supports_external_lookups = True
2674
# What index classes to use
2675
index_builder_class = BTreeBuilder
2676
index_class = BTreeGraphIndex
2677
# Set to true to get the fast-commit code path tested until a really fast
2678
# format lands in trunk. Not actually fast in this format.
2682
def _serializer(self):
2683
return xml5.serializer_v5
2685
def _get_matching_bzrdir(self):
2686
return bzrdir.format_registry.make_bzrdir('development2')
2688
def _ignore_setting_bzrdir(self, format):
2691
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2693
def get_format_string(self):
2694
"""See RepositoryFormat.get_format_string()."""
2695
return "Bazaar development format 2 (needs bzr.dev from before 1.8)\n"
2697
def get_format_description(self):
2698
"""See RepositoryFormat.get_format_description()."""
2699
return ("Development repository format, currently the same as "
2700
"1.6.1 with B+Trees.\n")
2702
def check_conversion_target(self, target_format):
2892
2706
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2893
2707
"""A subtrees development repository.
2895
2709
This format should be retained until the second release after bzr 1.7.
2897
2711
1.6.1-subtree[as it might have been] with B+Tree indices.
2899
This is [now] retained until we have a CHK based subtree format in
2903
2714
repository_class = KnitPackRepository
2904
2715
_commit_builder_class = PackRootCommitBuilder
2905
2716
rich_root_data = True
2907
2717
supports_tree_reference = True
2908
2718
supports_external_lookups = True
2909
2719
# What index classes to use
2917
2727
def _get_matching_bzrdir(self):
2918
2728
return bzrdir.format_registry.make_bzrdir(
2919
'development-subtree')
2729
'development2-subtree')
2921
2731
def _ignore_setting_bzrdir(self, format):
2924
2734
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2736
def check_conversion_target(self, target_format):
2737
if not target_format.rich_root_data:
2738
raise errors.BadConversionTarget(
2739
'Does not support rich root data.', target_format)
2740
if not getattr(target_format, 'supports_tree_reference', False):
2741
raise errors.BadConversionTarget(
2742
'Does not support nested trees', target_format)
2926
2744
def get_format_string(self):
2927
2745
"""See RepositoryFormat.get_format_string()."""
2928
2746
return ("Bazaar development format 2 with subtree support "