1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
1
# Copyright (C) 2007-2010 Canonical Ltd
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
57
59
from bzrlib.decorators import needs_write_lock, only_raises
58
from bzrlib.btree_index import (
62
60
from bzrlib.index import (
64
62
InMemoryGraphIndex,
64
from bzrlib.lock import LogicalLockResult
66
65
from bzrlib.repofmt.knitrepo import KnitRepository
67
66
from bzrlib.repository import (
69
68
MetaDirRepositoryFormat,
70
RepositoryWriteLockResult,
228
228
unlimited_cache = False
229
229
if index_type == 'chk':
230
230
unlimited_cache = True
231
setattr(self, index_type + '_index',
232
self.index_class(self.index_transport,
233
self.index_name(index_type, self.name),
234
self.index_sizes[self.index_offset(index_type)],
235
unlimited_cache=unlimited_cache))
231
index = self.index_class(self.index_transport,
232
self.index_name(index_type, self.name),
233
self.index_sizes[self.index_offset(index_type)],
234
unlimited_cache=unlimited_cache)
235
if index_type == 'chk':
236
index._leaf_factory = btree_index._gcchk_factory
237
setattr(self, index_type + '_index', index)
238
240
class ExistingPack(Pack):
586
588
flush_func=flush_func)
587
589
self.add_callback = None
589
def replace_indices(self, index_to_pack, indices):
590
"""Replace the current mappings with fresh ones.
592
This should probably not be used eventually, rather incremental add and
593
removal of indices. It has been added during refactoring of existing
596
:param index_to_pack: A mapping from index objects to
597
(transport, name) tuples for the pack file data.
598
:param indices: A list of indices.
600
# refresh the revision pack map dict without replacing the instance.
601
self.index_to_pack.clear()
602
self.index_to_pack.update(index_to_pack)
603
# XXX: API break - clearly a 'replace' method would be good?
604
self.combined_index._indices[:] = indices
605
# the current add nodes callback for the current writable index if
607
self.add_callback = None
609
591
def add_index(self, index, pack):
610
592
"""Add index to the aggregate, which is an index for Pack pack.
618
600
# expose it to the index map
619
601
self.index_to_pack[index] = pack.access_tuple()
620
602
# put it at the front of the linear index list
621
self.combined_index.insert_index(0, index)
603
self.combined_index.insert_index(0, index, pack.name)
623
605
def add_writable_index(self, index, pack):
624
606
"""Add an index which is able to have data added to it.
644
626
self.data_access.set_writer(None, None, (None, None))
645
627
self.index_to_pack.clear()
646
628
del self.combined_index._indices[:]
629
del self.combined_index._index_names[:]
647
630
self.add_callback = None
649
def remove_index(self, index, pack):
632
def remove_index(self, index):
650
633
"""Remove index from the indices used to answer queries.
652
635
:param index: An index from the pack parameter.
653
:param pack: A Pack instance.
655
637
del self.index_to_pack[index]
656
self.combined_index._indices.remove(index)
638
pos = self.combined_index._indices.index(index)
639
del self.combined_index._indices[pos]
640
del self.combined_index._index_names[pos]
657
641
if (self.add_callback is not None and
658
642
getattr(index, 'add_nodes', None) == self.add_callback):
659
643
self.add_callback = None
738
722
:return: A Pack object, or None if nothing was copied.
740
724
# open a pack - using the same name as the last temporary file
741
# - which has already been flushed, so its safe.
725
# - which has already been flushed, so it's safe.
742
726
# XXX: - duplicate code warning with start_write_group; fix before
743
727
# considering 'done'.
744
728
if self._pack_collection._new_pack is not None:
1308
1292
# reinserted, and if d3 has incorrect parents it will also be
1309
1293
# reinserted. If we insert d3 first, d2 is present (as it was bulk
1310
1294
# copied), so we will try to delta, but d2 is not currently able to be
1311
# extracted because it's basis d1 is not present. Topologically sorting
1295
# extracted because its basis d1 is not present. Topologically sorting
1312
1296
# addresses this. The following generates a sort for all the texts that
1313
1297
# are being inserted without having to reference the entire text key
1314
1298
# space (we only topo sort the revisions, which is smaller).
1415
1399
self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1416
1400
self.text_index = AggregateIndex(self.reload_pack_names, flush)
1417
1401
self.signature_index = AggregateIndex(self.reload_pack_names, flush)
1402
all_indices = [self.revision_index, self.inventory_index,
1403
self.text_index, self.signature_index]
1418
1404
if use_chk_index:
1419
1405
self.chk_index = AggregateIndex(self.reload_pack_names, flush)
1406
all_indices.append(self.chk_index)
1421
1408
# used to determine if we're using a chk_index elsewhere.
1422
1409
self.chk_index = None
1410
# Tell all the CombinedGraphIndex objects about each other, so they can
1411
# share hints about which pack names to search first.
1412
all_combined = [agg_idx.combined_index for agg_idx in all_indices]
1413
for combined_idx in all_combined:
1414
combined_idx.set_sibling_indices(
1415
set(all_combined).difference([combined_idx]))
1423
1416
# resumed packs
1424
1417
self._resumed_packs = []
1420
return '%s(%r)' % (self.__class__.__name__, self.repo)
1426
1422
def add_pack_to_memory(self, pack):
1427
1423
"""Make a Pack object available to the repository to satisfy queries.
1542
1538
self._remove_pack_from_memory(pack)
1543
1539
# record the newly available packs and stop advertising the old
1545
result = self._save_pack_names(clear_obsolete_packs=True)
1546
# Move the old packs out of the way now they are no longer referenced.
1547
for revision_count, packs in pack_operations:
1548
self._obsolete_packs(packs)
1541
to_be_obsoleted = []
1542
for _, packs in pack_operations:
1543
to_be_obsoleted.extend(packs)
1544
result = self._save_pack_names(clear_obsolete_packs=True,
1545
obsolete_packs=to_be_obsoleted)
1551
1548
def _flush_new_pack(self):
1564
1561
"""Is the collection already packed?"""
1565
1562
return not (self.repo._format.pack_compresses or (len(self._names) > 1))
1567
def pack(self, hint=None):
1564
def pack(self, hint=None, clean_obsolete_packs=False):
1568
1565
"""Pack the pack collection totally."""
1569
1566
self.ensure_loaded()
1570
1567
total_packs = len(self._names)
1586
1583
pack_operations[-1][1].append(pack)
1587
1584
self._execute_pack_operations(pack_operations, OptimisingPacker)
1586
if clean_obsolete_packs:
1587
self._clear_obsolete_packs()
1589
1589
def plan_autopack_combinations(self, existing_packs, pack_distribution):
1590
1590
"""Plan a pack operation.
1600
1600
pack_operations = [[0, []]]
1601
1601
# plan out what packs to keep, and what to reorganise
1602
1602
while len(existing_packs):
1603
# take the largest pack, and if its less than the head of the
1603
# take the largest pack, and if it's less than the head of the
1604
1604
# distribution chart we will include its contents in the new pack
1605
# for that position. If its larger, we remove its size from the
1605
# for that position. If it's larger, we remove its size from the
1606
1606
# distribution chart
1607
1607
next_pack_rev_count, next_pack = existing_packs.pop(0)
1608
1608
if next_pack_rev_count >= pack_distribution[0]:
1644
1644
:return: True if the disk names had not been previously read.
1646
# NB: if you see an assertion error here, its probably access against
1646
# NB: if you see an assertion error here, it's probably access against
1647
1647
# an unlocked repo. Naughty.
1648
1648
if not self.repo.is_locked():
1649
1649
raise errors.ObjectNotLocked(self.repo)
1679
1679
txt_index = self._make_index(name, '.tix')
1680
1680
sig_index = self._make_index(name, '.six')
1681
1681
if self.chk_index is not None:
1682
chk_index = self._make_index(name, '.cix', unlimited_cache=True)
1682
chk_index = self._make_index(name, '.cix', is_chk=True)
1684
1684
chk_index = None
1685
1685
result = ExistingPack(self._pack_transport, name, rev_index,
1705
1705
sig_index = self._make_index(name, '.six', resume=True)
1706
1706
if self.chk_index is not None:
1707
1707
chk_index = self._make_index(name, '.cix', resume=True,
1708
unlimited_cache=True)
1710
1710
chk_index = None
1711
1711
result = self.resumed_pack_factory(name, rev_index, inv_index,
1741
1741
return self._index_class(self.transport, 'pack-names', None
1742
1742
).iter_all_entries()
1744
def _make_index(self, name, suffix, resume=False, unlimited_cache=False):
1744
def _make_index(self, name, suffix, resume=False, is_chk=False):
1745
1745
size_offset = self._suffix_offsets[suffix]
1746
1746
index_name = name + suffix
1751
1751
transport = self._index_transport
1752
1752
index_size = self._names[name][size_offset]
1753
return self._index_class(transport, index_name, index_size,
1754
unlimited_cache=unlimited_cache)
1753
index = self._index_class(transport, index_name, index_size,
1754
unlimited_cache=is_chk)
1755
if is_chk and self._index_class is btree_index.BTreeGraphIndex:
1756
index._leaf_factory = btree_index._gcchk_factory
1756
1759
def _max_pack_count(self, total_revisions):
1757
1760
"""Return the maximum number of packs to use for total revisions.
1785
1788
:param return: None.
1787
1790
for pack in packs:
1788
pack.pack_transport.rename(pack.file_name(),
1789
'../obsolete_packs/' + pack.file_name())
1792
pack.pack_transport.rename(pack.file_name(),
1793
'../obsolete_packs/' + pack.file_name())
1794
except (errors.PathError, errors.TransportError), e:
1795
# TODO: Should these be warnings or mutters?
1796
mutter("couldn't rename obsolete pack, skipping it:\n%s"
1790
1798
# TODO: Probably needs to know all possible indices for this pack
1791
1799
# - or maybe list the directory and move all indices matching this
1792
1800
# name whether we recognize it or not?
1794
1802
if self.chk_index is not None:
1795
1803
suffixes.append('.cix')
1796
1804
for suffix in suffixes:
1797
self._index_transport.rename(pack.name + suffix,
1798
'../obsolete_packs/' + pack.name + suffix)
1806
self._index_transport.rename(pack.name + suffix,
1807
'../obsolete_packs/' + pack.name + suffix)
1808
except (errors.PathError, errors.TransportError), e:
1809
mutter("couldn't rename obsolete index, skipping it:\n%s"
1800
1812
def pack_distribution(self, total_revisions):
1801
1813
"""Generate a list of the number of revisions to put in each pack.
1827
1839
self._remove_pack_indices(pack)
1828
1840
self.packs.remove(pack)
1830
def _remove_pack_indices(self, pack):
1831
"""Remove the indices for pack from the aggregated indices."""
1832
self.revision_index.remove_index(pack.revision_index, pack)
1833
self.inventory_index.remove_index(pack.inventory_index, pack)
1834
self.text_index.remove_index(pack.text_index, pack)
1835
self.signature_index.remove_index(pack.signature_index, pack)
1836
if self.chk_index is not None:
1837
self.chk_index.remove_index(pack.chk_index, pack)
1842
def _remove_pack_indices(self, pack, ignore_missing=False):
1843
"""Remove the indices for pack from the aggregated indices.
1845
:param ignore_missing: Suppress KeyErrors from calling remove_index.
1847
for index_type in Pack.index_definitions.keys():
1848
attr_name = index_type + '_index'
1849
aggregate_index = getattr(self, attr_name)
1850
if aggregate_index is not None:
1851
pack_index = getattr(pack, attr_name)
1853
aggregate_index.remove_index(pack_index)
1839
1859
def reset(self):
1840
1860
"""Clear all cached data."""
1873
1893
disk_nodes = set()
1874
1894
for index, key, value in self._iter_disk_pack_index():
1875
1895
disk_nodes.add((key, value))
1896
orig_disk_nodes = set(disk_nodes)
1877
1898
# do a two-way diff against our original content
1878
1899
current_nodes = set()
1891
1912
disk_nodes.difference_update(deleted_nodes)
1892
1913
disk_nodes.update(new_nodes)
1894
return disk_nodes, deleted_nodes, new_nodes
1915
return disk_nodes, deleted_nodes, new_nodes, orig_disk_nodes
1896
1917
def _syncronize_pack_names_from_disk_nodes(self, disk_nodes):
1897
1918
"""Given the correct set of pack files, update our saved info.
1925
1946
# disk index because the set values are the same, unless
1926
1947
# the only index shows up as deleted by the set difference
1927
1948
# - which it may. Until there is a specific test for this,
1928
# assume its broken. RBC 20071017.
1949
# assume it's broken. RBC 20071017.
1929
1950
self._remove_pack_from_memory(self.get_pack_by_name(name))
1930
1951
self._names[name] = sizes
1931
1952
self.get_pack_by_name(name)
1937
1958
added.append(name)
1938
1959
return removed, added, modified
1940
def _save_pack_names(self, clear_obsolete_packs=False):
1961
def _save_pack_names(self, clear_obsolete_packs=False, obsolete_packs=None):
1941
1962
"""Save the list of packs.
1943
1964
This will take out the mutex around the pack names list for the
1948
1969
:param clear_obsolete_packs: If True, clear out the contents of the
1949
1970
obsolete_packs directory.
1971
:param obsolete_packs: Packs that are obsolete once the new pack-names
1972
file has been written.
1950
1973
:return: A list of the names saved that were not previously on disk.
1975
already_obsolete = []
1952
1976
self.lock_names()
1954
1978
builder = self._index_builder_class()
1955
disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
1979
(disk_nodes, deleted_nodes, new_nodes,
1980
orig_disk_nodes) = self._diff_pack_names()
1956
1981
# TODO: handle same-name, index-size-changes here -
1957
1982
# e.g. use the value from disk, not ours, *unless* we're the one
1960
1985
builder.add_node(key, value)
1961
1986
self.transport.put_file('pack-names', builder.finish(),
1962
1987
mode=self.repo.bzrdir._get_file_mode())
1963
# move the baseline forward
1964
1988
self._packs_at_load = disk_nodes
1965
1989
if clear_obsolete_packs:
1966
self._clear_obsolete_packs()
1992
to_preserve = set([o.name for o in obsolete_packs])
1993
already_obsolete = self._clear_obsolete_packs(to_preserve)
1968
1995
self._unlock_names()
1969
1996
# synchronise the memory packs list with what we just wrote:
1970
1997
self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1999
# TODO: We could add one more condition here. "if o.name not in
2000
# orig_disk_nodes and o != the new_pack we haven't written to
2001
# disk yet. However, the new pack object is not easily
2002
# accessible here (it would have to be passed through the
2003
# autopacking code, etc.)
2004
obsolete_packs = [o for o in obsolete_packs
2005
if o.name not in already_obsolete]
2006
self._obsolete_packs(obsolete_packs)
1971
2007
return [new_node[0][0] for new_node in new_nodes]
1973
2009
def reload_pack_names(self):
1982
2018
# The ensure_loaded call is to handle the case where the first call
1983
2019
# made involving the collection was to reload_pack_names, where we
1984
# don't have a view of disk contents. Its a bit of a bandaid, and
1985
# causes two reads of pack-names, but its a rare corner case not struck
1986
# with regular push/pull etc.
2020
# don't have a view of disk contents. It's a bit of a bandaid, and
2021
# causes two reads of pack-names, but it's a rare corner case not
2022
# struck with regular push/pull etc.
1987
2023
first_read = self.ensure_loaded()
1990
2026
# out the new value.
1991
disk_nodes, _, _ = self._diff_pack_names()
1992
self._packs_at_load = disk_nodes
2027
(disk_nodes, deleted_nodes, new_nodes,
2028
orig_disk_nodes) = self._diff_pack_names()
2029
# _packs_at_load is meant to be the explicit list of names in
2030
# 'pack-names' at then start. As such, it should not contain any
2031
# pending names that haven't been written out yet.
2032
self._packs_at_load = orig_disk_nodes
1993
2033
(removed, added,
1994
2034
modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1995
2035
if removed or added or modified:
2005
2045
raise errors.RetryAutopack(self.repo, False, sys.exc_info())
2007
def _clear_obsolete_packs(self):
2047
def _clear_obsolete_packs(self, preserve=None):
2008
2048
"""Delete everything from the obsolete-packs directory.
2050
:return: A list of pack identifiers (the filename without '.pack') that
2051
were found in obsolete_packs.
2010
2054
obsolete_pack_transport = self.transport.clone('obsolete_packs')
2055
if preserve is None:
2011
2057
for filename in obsolete_pack_transport.list_dir('.'):
2058
name, ext = osutils.splitext(filename)
2061
if name in preserve:
2013
2064
obsolete_pack_transport.delete(filename)
2014
2065
except (errors.PathError, errors.TransportError), e:
2015
warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
2066
warning("couldn't delete obsolete pack, skipping it:\n%s"
2017
2070
def _start_write_group(self):
2018
2071
# Do not permit preparation for writing if we're not in a 'write lock'.
2045
2098
# FIXME: just drop the transient index.
2046
2099
# forget what names there are
2047
2100
if self._new_pack is not None:
2049
self._new_pack.abort()
2051
# XXX: If we aborted while in the middle of finishing the write
2052
# group, _remove_pack_indices can fail because the indexes are
2053
# already gone. If they're not there we shouldn't fail in this
2054
# case. -- mbp 20081113
2055
self._remove_pack_indices(self._new_pack)
2056
self._new_pack = None
2101
operation = cleanup.OperationWithCleanups(self._new_pack.abort)
2102
operation.add_cleanup(setattr, self, '_new_pack', None)
2103
# If we aborted while in the middle of finishing the write
2104
# group, _remove_pack_indices could fail because the indexes are
2105
# already gone. But they're not there we shouldn't fail in this
2106
# case, so we pass ignore_missing=True.
2107
operation.add_cleanup(self._remove_pack_indices, self._new_pack,
2108
ignore_missing=True)
2109
operation.run_simple()
2057
2110
for resumed_pack in self._resumed_packs:
2059
resumed_pack.abort()
2061
# See comment in previous finally block.
2063
self._remove_pack_indices(resumed_pack)
2111
operation = cleanup.OperationWithCleanups(resumed_pack.abort)
2112
# See comment in previous finally block.
2113
operation.add_cleanup(self._remove_pack_indices, resumed_pack,
2114
ignore_missing=True)
2115
operation.run_simple()
2066
2116
del self._resumed_packs[:]
2068
2118
def _remove_resumed_pack_indices(self):
2294
2344
return self._write_lock_count
2296
2346
def lock_write(self, token=None):
2347
"""Lock the repository for writes.
2349
:return: A bzrlib.repository.RepositoryWriteLockResult.
2297
2351
locked = self.is_locked()
2298
2352
if not self._write_lock_count and locked:
2299
2353
raise errors.ReadOnlyError(self)
2308
2362
# Writes don't affect fallback repos
2309
2363
repo.lock_read()
2310
2364
self._refresh_data()
2365
return RepositoryWriteLockResult(self.unlock, None)
2312
2367
def lock_read(self):
2368
"""Lock the repository for reads.
2370
:return: A bzrlib.lock.LogicalLockResult.
2313
2372
locked = self.is_locked()
2314
2373
if self._write_lock_count:
2315
2374
self._write_lock_count += 1
2332
2392
raise NotImplementedError(self.dont_leave_lock_in_place)
2334
2394
@needs_write_lock
2335
def pack(self, hint=None):
2395
def pack(self, hint=None, clean_obsolete_packs=False):
2336
2396
"""Compress the data within the repository.
2338
2398
This will pack all the data to a single pack. In future it may
2339
2399
recompress deltas or do other such expensive operations.
2341
self._pack_collection.pack(hint=hint)
2401
self._pack_collection.pack(hint=hint, clean_obsolete_packs=clean_obsolete_packs)
2343
2403
@needs_write_lock
2344
2404
def reconcile(self, other=None, thorough=False):
2500
2560
utf8_files = [('format', self.get_format_string())]
2502
2562
self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
2503
return self.open(a_bzrdir=a_bzrdir, _found=True)
2563
repository = self.open(a_bzrdir=a_bzrdir, _found=True)
2564
self._run_post_repo_init_hooks(repository, a_bzrdir, shared)
2505
2567
def open(self, a_bzrdir, _found=False, _override_transport=None):
2506
2568
"""See RepositoryFormat.open().
2768
2831
_commit_builder_class = PackCommitBuilder
2769
2832
supports_external_lookups = True
2770
2833
# What index classes to use
2771
index_builder_class = BTreeBuilder
2772
index_class = BTreeGraphIndex
2834
index_builder_class = btree_index.BTreeBuilder
2835
index_class = btree_index.BTreeGraphIndex
2775
2838
def _serializer(self):
2804
2867
supports_tree_reference = False # no subtrees
2805
2868
supports_external_lookups = True
2806
2869
# What index classes to use
2807
index_builder_class = BTreeBuilder
2808
index_class = BTreeGraphIndex
2870
index_builder_class = btree_index.BTreeBuilder
2871
index_class = btree_index.BTreeGraphIndex
2811
2874
def _serializer(self):
2842
2905
repository_class = KnitPackRepository
2843
2906
_commit_builder_class = PackRootCommitBuilder
2844
2907
rich_root_data = True
2845
2909
supports_tree_reference = True
2846
2910
supports_external_lookups = True
2847
2911
# What index classes to use
2848
index_builder_class = BTreeBuilder
2849
index_class = BTreeGraphIndex
2912
index_builder_class = btree_index.BTreeBuilder
2913
index_class = btree_index.BTreeGraphIndex
2852
2916
def _serializer(self):