154
151
texts/deltas (via (fileid, revisionid) tuples).
155
152
:param signature_index: A GraphIndex for determining what signatures are
156
153
present in the Pack and accessing the locations of their texts.
157
:param chk_index: A GraphIndex for accessing content by CHK, if the
160
155
self.revision_index = revision_index
161
156
self.inventory_index = inventory_index
162
157
self.text_index = text_index
163
158
self.signature_index = signature_index
164
self.chk_index = chk_index
166
160
def access_tuple(self):
167
161
"""Return a tuple (transport, name) for the pack content."""
228
222
return self.index_name('text', name)
230
224
def _replace_index_with_readonly(self, index_type):
231
unlimited_cache = False
232
if index_type == 'chk':
233
unlimited_cache = True
234
225
setattr(self, index_type + '_index',
235
226
self.index_class(self.index_transport,
236
227
self.index_name(index_type, self.name),
237
self.index_sizes[self.index_offset(index_type)],
238
unlimited_cache=unlimited_cache))
228
self.index_sizes[self.index_offset(index_type)]))
241
231
class ExistingPack(Pack):
242
232
"""An in memory proxy for an existing .pack and its disk indices."""
244
234
def __init__(self, pack_transport, name, revision_index, inventory_index,
245
text_index, signature_index, chk_index=None):
235
text_index, signature_index):
246
236
"""Create an ExistingPack object.
248
238
:param pack_transport: The transport where the pack file resides.
249
239
:param name: The name of the pack on disk in the pack_transport.
251
241
Pack.__init__(self, revision_index, inventory_index, text_index,
252
signature_index, chk_index)
254
244
self.pack_transport = pack_transport
255
245
if None in (revision_index, inventory_index, text_index,
273
263
def __init__(self, name, revision_index, inventory_index, text_index,
274
264
signature_index, upload_transport, pack_transport, index_transport,
275
pack_collection, chk_index=None):
276
266
"""Create a ResumedPack object."""
277
267
ExistingPack.__init__(self, pack_transport, name, revision_index,
278
inventory_index, text_index, signature_index,
268
inventory_index, text_index, signature_index)
280
269
self.upload_transport = upload_transport
281
270
self.index_transport = index_transport
282
271
self.index_sizes = [None, None, None, None]
309
295
self.upload_transport.delete(self.file_name())
310
296
indices = [self.revision_index, self.inventory_index, self.text_index,
311
297
self.signature_index]
312
if self.chk_index is not None:
313
indices.append(self.chk_index)
314
298
for index in indices:
315
299
index._transport.delete(index._name)
317
301
def finish(self):
318
302
self._check_references()
319
index_types = ['revision', 'inventory', 'text', 'signature']
320
if self.chk_index is not None:
321
index_types.append('chk')
322
for index_type in index_types:
303
new_name = '../packs/' + self.file_name()
304
self.upload_transport.rename(self.file_name(), new_name)
305
for index_type in ['revision', 'inventory', 'text', 'signature']:
323
306
old_name = self.index_name(index_type, self.name)
324
307
new_name = '../indices/' + old_name
325
308
self.upload_transport.rename(old_name, new_name)
326
309
self._replace_index_with_readonly(index_type)
327
new_name = '../packs/' + self.file_name()
328
self.upload_transport.rename(self.file_name(), new_name)
329
310
self._state = 'finished'
331
312
def _get_external_refs(self, index):
332
"""Return compression parents for this index that are not present.
334
This returns any compression parents that are referenced by this index,
335
which are not contained *in* this index. They may be present elsewhere.
337
313
return index.external_references(1)
385
355
self._file_mode = file_mode
386
356
# tracks the content written to the .pack file.
387
357
self._hash = osutils.md5()
388
# a tuple with the length in bytes of the indices, once the pack
389
# is finalised. (rev, inv, text, sigs, chk_if_in_use)
358
# a four-tuple with the length in bytes of the indices, once the pack
359
# is finalised. (rev, inv, text, sigs)
390
360
self.index_sizes = None
391
361
# How much data to cache when writing packs. Note that this is not
392
362
# synchronised with reads, because it's not in the transport layer, so
455
423
return bool(self.get_revision_count() or
456
424
self.inventory_index.key_count() or
457
425
self.text_index.key_count() or
458
self.signature_index.key_count() or
459
(self.chk_index is not None and self.chk_index.key_count()))
461
def finish_content(self):
462
if self.name is not None:
466
self._write_data('', flush=True)
467
self.name = self._hash.hexdigest()
426
self.signature_index.key_count())
469
428
def finish(self, suspend=False):
470
429
"""Finish the new pack.
492
454
self._write_index('text', self.text_index, 'file texts', suspend)
493
455
self._write_index('signature', self.signature_index,
494
456
'revision signatures', suspend)
495
if self.chk_index is not None:
496
self.index_sizes.append(None)
497
self._write_index('chk', self.chk_index,
498
'content hash bytes', suspend)
499
457
self.write_stream.close()
500
458
# Note that this will clobber an existing pack with the same name,
501
459
# without checking for hash collisions. While this is undesirable this
585
543
self.index_to_pack = {}
586
544
self.combined_index = CombinedGraphIndex([], reload_func=reload_func)
587
545
self.data_access = _DirectPackAccess(self.index_to_pack,
588
reload_func=reload_func,
589
flush_func=flush_func)
546
reload_func=reload_func)
547
self.add_callback = None
549
def replace_indices(self, index_to_pack, indices):
550
"""Replace the current mappings with fresh ones.
552
This should probably not be used eventually, rather incremental add and
553
removal of indices. It has been added during refactoring of existing
556
:param index_to_pack: A mapping from index objects to
557
(transport, name) tuples for the pack file data.
558
:param indices: A list of indices.
560
# refresh the revision pack map dict without replacing the instance.
561
self.index_to_pack.clear()
562
self.index_to_pack.update(index_to_pack)
563
# XXX: API break - clearly a 'replace' method would be good?
564
self.combined_index._indices[:] = indices
565
# the current add nodes callback for the current writable index if
590
567
self.add_callback = None
592
569
def add_index(self, index, pack):
627
604
self.data_access.set_writer(None, None, (None, None))
628
605
self.index_to_pack.clear()
629
606
del self.combined_index._indices[:]
630
del self.combined_index._index_names[:]
631
607
self.add_callback = None
633
def remove_index(self, index):
609
def remove_index(self, index, pack):
634
610
"""Remove index from the indices used to answer queries.
636
612
:param index: An index from the pack parameter.
613
:param pack: A Pack instance.
638
615
del self.index_to_pack[index]
639
pos = self.combined_index._indices.index(index)
640
del self.combined_index._indices[pos]
641
del self.combined_index._index_names[pos]
616
self.combined_index._indices.remove(index)
642
617
if (self.add_callback is not None and
643
618
getattr(index, 'add_nodes', None) == self.add_callback):
644
619
self.add_callback = None
922
896
time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
923
897
new_pack.signature_index.key_count(),
924
898
time.time() - new_pack.start_time)
926
# NB XXX: how to check CHK references are present? perhaps by yielding
927
# the items? How should that interact with stacked repos?
928
if new_pack.chk_index is not None:
930
if 'pack' in debug.debug_flags:
931
mutter('%s: create_pack: chk content copied: %s%s %d items t+%6.3fs',
932
time.ctime(), self._pack_collection._upload_transport.base,
933
new_pack.random_name,
934
new_pack.chk_index.key_count(),
935
time.time() - new_pack.start_time)
936
899
new_pack._check_references()
937
900
if not self._use_pack(new_pack):
942
905
self._pack_collection.allocate(new_pack)
945
def _copy_chks(self, refs=None):
946
# XXX: Todo, recursive follow-pointers facility when fetching some
948
chk_index_map, chk_indices = self._pack_map_and_index_list(
950
chk_nodes = self._index_contents(chk_indices, refs)
952
# TODO: This isn't strictly tasteful as we are accessing some private
953
# variables (_serializer). Perhaps a better way would be to have
954
# Repository._deserialise_chk_node()
955
search_key_func = chk_map.search_key_registry.get(
956
self._pack_collection.repo._serializer.search_key_name)
957
def accumlate_refs(lines):
958
# XXX: move to a generic location
960
bytes = ''.join(lines)
961
node = chk_map._deserialise(bytes, ("unknown",), search_key_func)
962
new_refs.update(node.refs())
963
self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,
964
self.new_pack.chk_index, output_lines=accumlate_refs)
967
def _copy_nodes(self, nodes, index_map, writer, write_index,
969
"""Copy knit nodes between packs with no graph references.
971
:param output_lines: Output full texts of copied items.
908
def _copy_nodes(self, nodes, index_map, writer, write_index):
909
"""Copy knit nodes between packs with no graph references."""
973
910
pb = ui.ui_factory.nested_progress_bar()
975
912
return self._do_copy_nodes(nodes, index_map, writer,
976
write_index, pb, output_lines=output_lines)
980
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
917
def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb):
982
918
# for record verification
983
919
knit = KnitVersionedFiles(None, None)
984
920
# plan a readv on each source pack:
1018
954
izip(reader.iter_records(), pack_readv_requests):
1019
955
raw_data = read_func(None)
1020
956
# check the header only
1021
if output_lines is not None:
1022
output_lines(knit._parse_record(key[-1], raw_data)[0])
1024
df, _ = knit._parse_record_header(key, raw_data)
957
df, _ = knit._parse_record_header(key, raw_data)
1026
959
pos, size = writer.add_bytes_record(raw_data, names)
1027
960
write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
1028
961
pb.update("Copied record", record_index)
1358
1291
:ivar _names: map of {pack_name: (index_size,)}
1361
pack_factory = NewPack
1362
resumed_pack_factory = ResumedPack
1364
1294
def __init__(self, repo, transport, index_transport, upload_transport,
1365
pack_transport, index_builder_class, index_class,
1295
pack_transport, index_builder_class, index_class):
1367
1296
"""Create a new RepositoryPackCollection.
1369
1298
:param transport: Addresses the repository base directory
1395
1322
# when a pack is being created by this object, the state of that pack.
1396
1323
self._new_pack = None
1397
1324
# aggregated revision index data
1398
flush = self._flush_new_pack
1399
self.revision_index = AggregateIndex(self.reload_pack_names, flush)
1400
self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1401
self.text_index = AggregateIndex(self.reload_pack_names, flush)
1402
self.signature_index = AggregateIndex(self.reload_pack_names, flush)
1403
all_indices = [self.revision_index, self.inventory_index,
1404
self.text_index, self.signature_index]
1406
self.chk_index = AggregateIndex(self.reload_pack_names, flush)
1407
all_indices.append(self.chk_index)
1409
# used to determine if we're using a chk_index elsewhere.
1410
self.chk_index = None
1411
# Tell all the CombinedGraphIndex objects about each other, so they can
1412
# share hints about which pack names to search first.
1413
all_combined = [agg_idx.combined_index for agg_idx in all_indices]
1414
for combined_idx in all_combined:
1415
combined_idx.set_sibling_indices(
1416
set(all_combined).difference([combined_idx]))
1325
self.revision_index = AggregateIndex(self.reload_pack_names)
1326
self.inventory_index = AggregateIndex(self.reload_pack_names)
1327
self.text_index = AggregateIndex(self.reload_pack_names)
1328
self.signature_index = AggregateIndex(self.reload_pack_names)
1417
1329
# resumed packs
1418
1330
self._resumed_packs = []
1421
return '%s(%r)' % (self.__class__.__name__, self.repo)
1423
1332
def add_pack_to_memory(self, pack):
1424
1333
"""Make a Pack object available to the repository to satisfy queries.
1506
1415
'containing %d revisions. Packing %d files into %d affecting %d'
1507
1416
' revisions', self, total_packs, total_revisions, num_old_packs,
1508
1417
num_new_packs, num_revs_affected)
1509
result = self._execute_pack_operations(pack_operations,
1418
self._execute_pack_operations(pack_operations,
1510
1419
reload_func=self._restart_autopack)
1511
mutter('Auto-packing repository %s completed', self)
1514
1422
def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
1515
1423
reload_func=None):
1539
1447
self._remove_pack_from_memory(pack)
1540
1448
# record the newly available packs and stop advertising the old
1542
to_be_obsoleted = []
1543
for _, packs in pack_operations:
1544
to_be_obsoleted.extend(packs)
1545
result = self._save_pack_names(clear_obsolete_packs=True,
1546
obsolete_packs=to_be_obsoleted)
1549
def _flush_new_pack(self):
1550
if self._new_pack is not None:
1551
self._new_pack.flush()
1450
self._save_pack_names(clear_obsolete_packs=True)
1451
# Move the old packs out of the way now they are no longer referenced.
1452
for revision_count, packs in pack_operations:
1453
self._obsolete_packs(packs)
1553
1455
def lock_names(self):
1554
1456
"""Acquire the mutex around the pack-names index.
1559
1461
self.repo.control_files.lock_write()
1561
def _already_packed(self):
1562
"""Is the collection already packed?"""
1563
return not (self.repo._format.pack_compresses or (len(self._names) > 1))
1565
def pack(self, hint=None, clean_obsolete_packs=False):
1566
1464
"""Pack the pack collection totally."""
1567
1465
self.ensure_loaded()
1568
1466
total_packs = len(self._names)
1569
if self._already_packed():
1468
# This is arguably wrong because we might not be optimal, but for
1469
# now lets leave it in. (e.g. reconcile -> one pack. But not
1571
1472
total_revisions = self.revision_index.combined_index.key_count()
1572
1473
# XXX: the following may want to be a class, to pack with a given
1574
1475
mutter('Packing repository %s, which has %d pack files, '
1575
'containing %d revisions with hint %r.', self, total_packs,
1576
total_revisions, hint)
1476
'containing %d revisions into 1 packs.', self, total_packs,
1577
1478
# determine which packs need changing
1479
pack_distribution = [1]
1578
1480
pack_operations = [[0, []]]
1579
1481
for pack in self.all_packs():
1580
if hint is None or pack.name in hint:
1581
# Either no hint was provided (so we are packing everything),
1582
# or this pack was included in the hint.
1583
pack_operations[-1][0] += pack.get_revision_count()
1584
pack_operations[-1][1].append(pack)
1482
pack_operations[-1][0] += pack.get_revision_count()
1483
pack_operations[-1][1].append(pack)
1585
1484
self._execute_pack_operations(pack_operations, OptimisingPacker)
1587
if clean_obsolete_packs:
1588
self._clear_obsolete_packs()
1590
1486
def plan_autopack_combinations(self, existing_packs, pack_distribution):
1591
1487
"""Plan a pack operation.
1679
1575
inv_index = self._make_index(name, '.iix')
1680
1576
txt_index = self._make_index(name, '.tix')
1681
1577
sig_index = self._make_index(name, '.six')
1682
if self.chk_index is not None:
1683
chk_index = self._make_index(name, '.cix', unlimited_cache=True)
1686
1578
result = ExistingPack(self._pack_transport, name, rev_index,
1687
inv_index, txt_index, sig_index, chk_index)
1579
inv_index, txt_index, sig_index)
1688
1580
self.add_pack_to_memory(result)
1704
1596
inv_index = self._make_index(name, '.iix', resume=True)
1705
1597
txt_index = self._make_index(name, '.tix', resume=True)
1706
1598
sig_index = self._make_index(name, '.six', resume=True)
1707
if self.chk_index is not None:
1708
chk_index = self._make_index(name, '.cix', resume=True,
1709
unlimited_cache=True)
1712
result = self.resumed_pack_factory(name, rev_index, inv_index,
1713
txt_index, sig_index, self._upload_transport,
1714
self._pack_transport, self._index_transport, self,
1715
chk_index=chk_index)
1599
result = ResumedPack(name, rev_index, inv_index, txt_index,
1600
sig_index, self._upload_transport, self._pack_transport,
1601
self._index_transport, self)
1716
1602
except errors.NoSuchFile, e:
1717
1603
raise errors.UnresumableWriteGroup(self.repo, [name], str(e))
1718
1604
self.add_pack_to_memory(result)
1786
1671
:param return: None.
1788
1673
for pack in packs:
1790
pack.pack_transport.rename(pack.file_name(),
1791
'../obsolete_packs/' + pack.file_name())
1792
except (errors.PathError, errors.TransportError), e:
1793
# TODO: Should these be warnings or mutters?
1794
mutter("couldn't rename obsolete pack, skipping it:\n%s"
1674
pack.pack_transport.rename(pack.file_name(),
1675
'../obsolete_packs/' + pack.file_name())
1796
1676
# TODO: Probably needs to know all possible indices for this pack
1797
1677
# - or maybe list the directory and move all indices matching this
1798
1678
# name whether we recognize it or not?
1799
suffixes = ['.iix', '.six', '.tix', '.rix']
1800
if self.chk_index is not None:
1801
suffixes.append('.cix')
1802
for suffix in suffixes:
1804
self._index_transport.rename(pack.name + suffix,
1805
'../obsolete_packs/' + pack.name + suffix)
1806
except (errors.PathError, errors.TransportError), e:
1807
mutter("couldn't rename obsolete index, skipping it:\n%s"
1679
for suffix in ('.iix', '.six', '.tix', '.rix'):
1680
self._index_transport.rename(pack.name + suffix,
1681
'../obsolete_packs/' + pack.name + suffix)
1810
1683
def pack_distribution(self, total_revisions):
1811
1684
"""Generate a list of the number of revisions to put in each pack.
1837
1710
self._remove_pack_indices(pack)
1838
1711
self.packs.remove(pack)
1840
def _remove_pack_indices(self, pack, ignore_missing=False):
1841
"""Remove the indices for pack from the aggregated indices.
1843
:param ignore_missing: Suppress KeyErrors from calling remove_index.
1845
for index_type in Pack.index_definitions.keys():
1846
attr_name = index_type + '_index'
1847
aggregate_index = getattr(self, attr_name)
1848
if aggregate_index is not None:
1849
pack_index = getattr(pack, attr_name)
1851
aggregate_index.remove_index(pack_index)
1713
def _remove_pack_indices(self, pack):
1714
"""Remove the indices for pack from the aggregated indices."""
1715
self.revision_index.remove_index(pack.revision_index, pack)
1716
self.inventory_index.remove_index(pack.inventory_index, pack)
1717
self.text_index.remove_index(pack.text_index, pack)
1718
self.signature_index.remove_index(pack.signature_index, pack)
1857
1720
def reset(self):
1858
1721
"""Clear all cached data."""
1859
1722
# cached revision data
1723
self.repo._revision_knit = None
1860
1724
self.revision_index.clear()
1861
1725
# cached signature data
1726
self.repo._signature_knit = None
1862
1727
self.signature_index.clear()
1863
1728
# cached file text data
1864
1729
self.text_index.clear()
1730
self.repo._text_knit = None
1865
1731
# cached inventory data
1866
1732
self.inventory_index.clear()
1868
if self.chk_index is not None:
1869
self.chk_index.clear()
1870
1733
# remove the open pack
1871
1734
self._new_pack = None
1872
1735
# information about packs.
1967
1829
:param clear_obsolete_packs: If True, clear out the contents of the
1968
1830
obsolete_packs directory.
1969
:param obsolete_packs: Packs that are obsolete once the new pack-names
1970
file has been written.
1971
:return: A list of the names saved that were not previously on disk.
1973
already_obsolete = []
1974
1832
self.lock_names()
1976
1834
builder = self._index_builder_class()
1977
(disk_nodes, deleted_nodes, new_nodes,
1978
orig_disk_nodes) = self._diff_pack_names()
1835
disk_nodes, deleted_nodes, new_nodes = self._diff_pack_names()
1979
1836
# TODO: handle same-name, index-size-changes here -
1980
1837
# e.g. use the value from disk, not ours, *unless* we're the one
1983
1840
builder.add_node(key, value)
1984
1841
self.transport.put_file('pack-names', builder.finish(),
1985
1842
mode=self.repo.bzrdir._get_file_mode())
1843
# move the baseline forward
1986
1844
self._packs_at_load = disk_nodes
1987
1845
if clear_obsolete_packs:
1990
to_preserve = set([o.name for o in obsolete_packs])
1991
already_obsolete = self._clear_obsolete_packs(to_preserve)
1846
self._clear_obsolete_packs()
1993
1848
self._unlock_names()
1994
1849
# synchronise the memory packs list with what we just wrote:
1995
1850
self._syncronize_pack_names_from_disk_nodes(disk_nodes)
1997
# TODO: We could add one more condition here. "if o.name not in
1998
# orig_disk_nodes and o != the new_pack we haven't written to
1999
# disk yet. However, the new pack object is not easily
2000
# accessible here (it would have to be passed through the
2001
# autopacking code, etc.)
2002
obsolete_packs = [o for o in obsolete_packs
2003
if o.name not in already_obsolete]
2004
self._obsolete_packs(obsolete_packs)
2005
return [new_node[0][0] for new_node in new_nodes]
2007
1852
def reload_pack_names(self):
2008
1853
"""Sync our pack listing with what is present in the repository.
2024
1869
# out the new value.
2025
(disk_nodes, deleted_nodes, new_nodes,
2026
orig_disk_nodes) = self._diff_pack_names()
2027
# _packs_at_load is meant to be the explicit list of names in
2028
# 'pack-names' at then start. As such, it should not contain any
2029
# pending names that haven't been written out yet.
2030
self._packs_at_load = orig_disk_nodes
1870
disk_nodes, _, _ = self._diff_pack_names()
1871
self._packs_at_load = disk_nodes
2031
1872
(removed, added,
2032
1873
modified) = self._syncronize_pack_names_from_disk_nodes(disk_nodes)
2033
1874
if removed or added or modified:
2043
1884
raise errors.RetryAutopack(self.repo, False, sys.exc_info())
2045
def _clear_obsolete_packs(self, preserve=None):
1886
def _clear_obsolete_packs(self):
2046
1887
"""Delete everything from the obsolete-packs directory.
2048
:return: A list of pack identifiers (the filename without '.pack') that
2049
were found in obsolete_packs.
2052
1889
obsolete_pack_transport = self.transport.clone('obsolete_packs')
2053
if preserve is None:
2055
1890
for filename in obsolete_pack_transport.list_dir('.'):
2056
name, ext = osutils.splitext(filename)
2059
if name in preserve:
2062
1892
obsolete_pack_transport.delete(filename)
2063
1893
except (errors.PathError, errors.TransportError), e:
2064
warning("couldn't delete obsolete pack, skipping it:\n%s"
1894
warning("couldn't delete obsolete pack, skipping it:\n%s" % (e,))
2068
1896
def _start_write_group(self):
2069
1897
# Do not permit preparation for writing if we're not in a 'write lock'.
2070
1898
if not self.repo.is_write_locked():
2071
1899
raise errors.NotWriteLocked(self)
2072
self._new_pack = self.pack_factory(self, upload_suffix='.pack',
1900
self._new_pack = NewPack(self, upload_suffix='.pack',
2073
1901
file_mode=self.repo.bzrdir._get_file_mode())
2074
1902
# allow writing: queue writes to a new index
2075
1903
self.revision_index.add_writable_index(self._new_pack.revision_index,
2078
1906
self._new_pack)
2079
1907
self.text_index.add_writable_index(self._new_pack.text_index,
2080
1908
self._new_pack)
2081
self._new_pack.text_index.set_optimize(combine_backing_indices=False)
2082
1909
self.signature_index.add_writable_index(self._new_pack.signature_index,
2083
1910
self._new_pack)
2084
if self.chk_index is not None:
2085
self.chk_index.add_writable_index(self._new_pack.chk_index,
2087
self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
2088
self._new_pack.chk_index.set_optimize(combine_backing_indices=False)
2090
1912
self.repo.inventories._index._add_callback = self.inventory_index.add_callback
2091
1913
self.repo.revisions._index._add_callback = self.revision_index.add_callback
2096
1918
# FIXME: just drop the transient index.
2097
1919
# forget what names there are
2098
1920
if self._new_pack is not None:
2099
operation = cleanup.OperationWithCleanups(self._new_pack.abort)
2100
operation.add_cleanup(setattr, self, '_new_pack', None)
2101
# If we aborted while in the middle of finishing the write
2102
# group, _remove_pack_indices could fail because the indexes are
2103
# already gone. But they're not there we shouldn't fail in this
2104
# case, so we pass ignore_missing=True.
2105
operation.add_cleanup(self._remove_pack_indices, self._new_pack,
2106
ignore_missing=True)
2107
operation.run_simple()
1922
self._new_pack.abort()
1924
# XXX: If we aborted while in the middle of finishing the write
1925
# group, _remove_pack_indices can fail because the indexes are
1926
# already gone. If they're not there we shouldn't fail in this
1927
# case. -- mbp 20081113
1928
self._remove_pack_indices(self._new_pack)
1929
self._new_pack = None
2108
1930
for resumed_pack in self._resumed_packs:
2109
operation = cleanup.OperationWithCleanups(resumed_pack.abort)
2110
# See comment in previous finally block.
2111
operation.add_cleanup(self._remove_pack_indices, resumed_pack,
2112
ignore_missing=True)
2113
operation.run_simple()
1932
resumed_pack.abort()
1934
# See comment in previous finally block.
1936
self._remove_pack_indices(resumed_pack)
2114
1939
del self._resumed_packs[:]
1940
self.repo._text_knit = None
2116
1942
def _remove_resumed_pack_indices(self):
2117
1943
for resumed_pack in self._resumed_packs:
2118
1944
self._remove_pack_indices(resumed_pack)
2119
1945
del self._resumed_packs[:]
2121
def _check_new_inventories(self):
2122
"""Detect missing inventories in this write group.
2124
:returns: list of strs, summarising any problems found. If the list is
2125
empty no problems were found.
2127
# The base implementation does no checks. GCRepositoryPackCollection
2131
1947
def _commit_write_group(self):
2132
1948
all_missing = set()
2133
1949
for prefix, versioned_file in (
2142
1958
raise errors.BzrCheckError(
2143
1959
"Repository %s has missing compression parent(s) %r "
2144
1960
% (self.repo, sorted(all_missing)))
2145
problems = self._check_new_inventories()
2147
problems_summary = '\n'.join(problems)
2148
raise errors.BzrCheckError(
2149
"Cannot add revision(s) to repository: " + problems_summary)
2150
1961
self._remove_pack_indices(self._new_pack)
2151
any_new_content = False
1962
should_autopack = False
2152
1963
if self._new_pack.data_inserted():
2153
1964
# get all the data to disk and read to use
2154
1965
self._new_pack.finish()
2155
1966
self.allocate(self._new_pack)
2156
1967
self._new_pack = None
2157
any_new_content = True
1968
should_autopack = True
2159
1970
self._new_pack.abort()
2160
1971
self._new_pack = None
2242
2050
self.revisions = KnitVersionedFiles(
2243
2051
_KnitGraphIndex(self._pack_collection.revision_index.combined_index,
2244
2052
add_callback=self._pack_collection.revision_index.add_callback,
2245
deltas=False, parents=True, is_locked=self.is_locked,
2246
track_external_parent_refs=True),
2053
deltas=False, parents=True, is_locked=self.is_locked),
2247
2054
data_access=self._pack_collection.revision_index.data_access,
2248
2055
max_delta_chain=0)
2249
2056
self.signatures = KnitVersionedFiles(
2258
2065
deltas=True, parents=True, is_locked=self.is_locked),
2259
2066
data_access=self._pack_collection.text_index.data_access,
2260
2067
max_delta_chain=200)
2261
if _format.supports_chks:
2262
# No graph, no compression:- references from chks are between
2263
# different objects not temporal versions of the same; and without
2264
# some sort of temporal structure knit compression will just fail.
2265
self.chk_bytes = KnitVersionedFiles(
2266
_KnitGraphIndex(self._pack_collection.chk_index.combined_index,
2267
add_callback=self._pack_collection.chk_index.add_callback,
2268
deltas=False, parents=False, is_locked=self.is_locked),
2269
data_access=self._pack_collection.chk_index.data_access,
2272
self.chk_bytes = None
2273
2068
# True when the repository object is 'write locked' (as opposed to the
2274
2069
# physical lock only taken out around changes to the pack-names list.)
2275
2070
# Another way to represent this would be a decorator around the control
2282
2077
self._reconcile_fixes_text_parents = True
2283
2078
self._reconcile_backsup_inventory = False
2285
def _warn_if_deprecated(self, branch=None):
2080
def _warn_if_deprecated(self):
2286
2081
# This class isn't deprecated, but one sub-format is
2287
2082
if isinstance(self._format, RepositoryFormatKnitPack5RichRootBroken):
2288
super(KnitPackRepository, self)._warn_if_deprecated(branch)
2083
from bzrlib import repository
2084
if repository._deprecation_warning_done:
2086
repository._deprecation_warning_done = True
2087
warning("Format %s for %s is deprecated - please use"
2088
" 'bzr upgrade --1.6.1-rich-root'"
2089
% (self._format, self.bzrdir.transport.base))
2290
2091
def _abort_write_group(self):
2291
self.revisions._index._key_dependencies.clear()
2292
2092
self._pack_collection._abort_write_group()
2294
def _get_source(self, to_format):
2295
if to_format.network_name() == self._format.network_name():
2296
return KnitPackStreamSource(self, to_format)
2297
return super(KnitPackRepository, self)._get_source(to_format)
2094
def _find_inconsistent_revision_parents(self):
2095
"""Find revisions with incorrectly cached parents.
2097
:returns: an iterator yielding tuples of (revison-id, parents-in-index,
2098
parents-in-revision).
2100
if not self.is_locked():
2101
raise errors.ObjectNotLocked(self)
2102
pb = ui.ui_factory.nested_progress_bar()
2105
revision_nodes = self._pack_collection.revision_index \
2106
.combined_index.iter_all_entries()
2107
index_positions = []
2108
# Get the cached index values for all revisions, and also the location
2109
# in each index of the revision text so we can perform linear IO.
2110
for index, key, value, refs in revision_nodes:
2111
pos, length = value[1:].split(' ')
2112
index_positions.append((index, int(pos), key[0],
2113
tuple(parent[0] for parent in refs[0])))
2114
pb.update("Reading revision index", 0, 0)
2115
index_positions.sort()
2116
batch_count = len(index_positions) / 1000 + 1
2117
pb.update("Checking cached revision graph", 0, batch_count)
2118
for offset in xrange(batch_count):
2119
pb.update("Checking cached revision graph", offset)
2120
to_query = index_positions[offset * 1000:(offset + 1) * 1000]
2123
rev_ids = [item[2] for item in to_query]
2124
revs = self.get_revisions(rev_ids)
2125
for revision, item in zip(revs, to_query):
2126
index_parents = item[3]
2127
rev_parents = tuple(revision.parent_ids)
2128
if index_parents != rev_parents:
2129
result.append((revision.revision_id, index_parents, rev_parents))
2299
2134
def _make_parents_provider(self):
2300
2135
return graph.CachingParentsProvider(self)
2308
2143
self._pack_collection._start_write_group()
2310
2145
def _commit_write_group(self):
2311
hint = self._pack_collection._commit_write_group()
2312
self.revisions._index._key_dependencies.clear()
2146
return self._pack_collection._commit_write_group()
2315
2148
def suspend_write_group(self):
2316
2149
# XXX check self._write_group is self.get_transaction()?
2317
2150
tokens = self._pack_collection._suspend_write_group()
2318
self.revisions._index._key_dependencies.clear()
2319
2151
self._write_group = None
2322
2154
def _resume_write_group(self, tokens):
2323
2155
self._start_write_group()
2325
self._pack_collection._resume_write_group(tokens)
2326
except errors.UnresumableWriteGroup:
2327
self._abort_write_group()
2329
for pack in self._pack_collection._resumed_packs:
2330
self.revisions._index.scan_unvalidated_index(pack.revision_index)
2156
self._pack_collection._resume_write_group(tokens)
2332
2158
def get_transaction(self):
2333
2159
if self._write_lock_count:
2342
2168
return self._write_lock_count
2344
2170
def lock_write(self, token=None):
2345
"""Lock the repository for writes.
2347
:return: A bzrlib.repository.RepositoryWriteLockResult.
2349
2171
locked = self.is_locked()
2350
2172
if not self._write_lock_count and locked:
2351
2173
raise errors.ReadOnlyError(self)
2352
2174
self._write_lock_count += 1
2353
2175
if self._write_lock_count == 1:
2354
2176
self._transaction = transactions.WriteTransaction()
2356
if 'relock' in debug.debug_flags and self._prev_lock == 'w':
2357
note('%r was write locked again', self)
2358
self._prev_lock = 'w'
2359
2177
for repo in self._fallback_repositories:
2360
2178
# Writes don't affect fallback repos
2361
2179
repo.lock_read()
2362
2181
self._refresh_data()
2363
return RepositoryWriteLockResult(self.unlock, None)
2365
2183
def lock_read(self):
2366
"""Lock the repository for reads.
2368
:return: A bzrlib.lock.LogicalLockResult.
2370
2184
locked = self.is_locked()
2371
2185
if self._write_lock_count:
2372
2186
self._write_lock_count += 1
2374
2188
self.control_files.lock_read()
2376
if 'relock' in debug.debug_flags and self._prev_lock == 'r':
2377
note('%r was read locked again', self)
2378
self._prev_lock = 'r'
2379
2189
for repo in self._fallback_repositories:
2190
# Writes don't affect fallback repos
2380
2191
repo.lock_read()
2381
2193
self._refresh_data()
2382
return LogicalLockResult(self.unlock)
2384
2195
def leave_lock_in_place(self):
2385
2196
# not supported - raise an error
2425
2231
transaction = self._transaction
2426
2232
self._transaction = None
2427
2233
transaction.finish()
2234
for repo in self._fallback_repositories:
2429
2237
self.control_files.unlock()
2431
if not self.is_locked():
2432
2238
for repo in self._fallback_repositories:
2436
class KnitPackStreamSource(StreamSource):
2437
"""A StreamSource used to transfer data between same-format KnitPack repos.
2439
This source assumes:
2440
1) Same serialization format for all objects
2441
2) Same root information
2442
3) XML format inventories
2443
4) Atomic inserts (so we can stream inventory texts before text
2448
def __init__(self, from_repository, to_format):
2449
super(KnitPackStreamSource, self).__init__(from_repository, to_format)
2450
self._text_keys = None
2451
self._text_fetch_order = 'unordered'
2453
def _get_filtered_inv_stream(self, revision_ids):
2454
from_repo = self.from_repository
2455
parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
2456
parent_keys = [(p,) for p in parent_ids]
2457
find_text_keys = from_repo._find_text_key_references_from_xml_inventory_lines
2458
parent_text_keys = set(find_text_keys(
2459
from_repo._inventory_xml_lines_for_keys(parent_keys)))
2460
content_text_keys = set()
2461
knit = KnitVersionedFiles(None, None)
2462
factory = KnitPlainFactory()
2463
def find_text_keys_from_content(record):
2464
if record.storage_kind not in ('knit-delta-gz', 'knit-ft-gz'):
2465
raise ValueError("Unknown content storage kind for"
2466
" inventory text: %s" % (record.storage_kind,))
2467
# It's a knit record, it has a _raw_record field (even if it was
2468
# reconstituted from a network stream).
2469
raw_data = record._raw_record
2470
# read the entire thing
2471
revision_id = record.key[-1]
2472
content, _ = knit._parse_record(revision_id, raw_data)
2473
if record.storage_kind == 'knit-delta-gz':
2474
line_iterator = factory.get_linedelta_content(content)
2475
elif record.storage_kind == 'knit-ft-gz':
2476
line_iterator = factory.get_fulltext_content(content)
2477
content_text_keys.update(find_text_keys(
2478
[(line, revision_id) for line in line_iterator]))
2479
revision_keys = [(r,) for r in revision_ids]
2480
def _filtered_inv_stream():
2481
source_vf = from_repo.inventories
2482
stream = source_vf.get_record_stream(revision_keys,
2484
for record in stream:
2485
if record.storage_kind == 'absent':
2486
raise errors.NoSuchRevision(from_repo, record.key)
2487
find_text_keys_from_content(record)
2489
self._text_keys = content_text_keys - parent_text_keys
2490
return ('inventories', _filtered_inv_stream())
2492
def _get_text_stream(self):
2493
# Note: We know we don't have to handle adding root keys, because both
2494
# the source and target are the identical network name.
2495
text_stream = self.from_repository.texts.get_record_stream(
2496
self._text_keys, self._text_fetch_order, False)
2497
return ('texts', text_stream)
2499
def get_stream(self, search):
2500
revision_ids = search.get_keys()
2501
for stream_info in self._fetch_revision_texts(revision_ids):
2503
self._revision_keys = [(rev_id,) for rev_id in revision_ids]
2504
yield self._get_filtered_inv_stream(revision_ids)
2505
yield self._get_text_stream()
2509
2242
class RepositoryFormatPack(MetaDirRepositoryFormat):
2510
2243
"""Format logic for pack structured repositories.
2558
2289
utf8_files = [('format', self.get_format_string())]
2560
2291
self._upload_blank_content(a_bzrdir, dirs, files, utf8_files, shared)
2561
repository = self.open(a_bzrdir=a_bzrdir, _found=True)
2562
self._run_post_repo_init_hooks(repository, a_bzrdir, shared)
2292
return self.open(a_bzrdir=a_bzrdir, _found=True)
2565
2294
def open(self, a_bzrdir, _found=False, _override_transport=None):
2566
2295
"""See RepositoryFormat.open().
2648
2379
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2381
def check_conversion_target(self, target_format):
2382
if not target_format.rich_root_data:
2383
raise errors.BadConversionTarget(
2384
'Does not support rich root data.', target_format)
2385
if not getattr(target_format, 'supports_tree_reference', False):
2386
raise errors.BadConversionTarget(
2387
'Does not support nested trees', target_format)
2650
2389
def get_format_string(self):
2651
2390
"""See RepositoryFormat.get_format_string()."""
2652
2391
return "Bazaar pack repository format 1 with subtree support (needs bzr 0.92)\n"
2889
2654
return "Packs 6 rich-root (uses btree indexes, requires bzr 1.9)"
2657
class RepositoryFormatPackDevelopment2(RepositoryFormatPack):
2658
"""A no-subtrees development repository.
2660
This format should be retained until the second release after bzr 1.7.
2662
This is pack-1.6.1 with B+Tree indices.
2665
repository_class = KnitPackRepository
2666
_commit_builder_class = PackCommitBuilder
2667
supports_external_lookups = True
2668
# What index classes to use
2669
index_builder_class = BTreeBuilder
2670
index_class = BTreeGraphIndex
2671
# Set to true to get the fast-commit code path tested until a really fast
2672
# format lands in trunk. Not actually fast in this format.
2676
def _serializer(self):
2677
return xml5.serializer_v5
2679
def _get_matching_bzrdir(self):
2680
return bzrdir.format_registry.make_bzrdir('development2')
2682
def _ignore_setting_bzrdir(self, format):
2685
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2687
def get_format_string(self):
2688
"""See RepositoryFormat.get_format_string()."""
2689
return "Bazaar development format 2 (needs bzr.dev from before 1.8)\n"
2691
def get_format_description(self):
2692
"""See RepositoryFormat.get_format_description()."""
2693
return ("Development repository format, currently the same as "
2694
"1.6.1 with B+Trees.\n")
2696
def check_conversion_target(self, target_format):
2892
2700
class RepositoryFormatPackDevelopment2Subtree(RepositoryFormatPack):
2893
2701
"""A subtrees development repository.
2895
2703
This format should be retained until the second release after bzr 1.7.
2897
2705
1.6.1-subtree[as it might have been] with B+Tree indices.
2899
This is [now] retained until we have a CHK based subtree format in
2903
2708
repository_class = KnitPackRepository
2904
2709
_commit_builder_class = PackRootCommitBuilder
2905
2710
rich_root_data = True
2907
2711
supports_tree_reference = True
2908
2712
supports_external_lookups = True
2909
2713
# What index classes to use
2917
2721
def _get_matching_bzrdir(self):
2918
2722
return bzrdir.format_registry.make_bzrdir(
2919
'development-subtree')
2723
'development2-subtree')
2921
2725
def _ignore_setting_bzrdir(self, format):
2924
2728
_matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
2730
def check_conversion_target(self, target_format):
2731
if not target_format.rich_root_data:
2732
raise errors.BadConversionTarget(
2733
'Does not support rich root data.', target_format)
2734
if not getattr(target_format, 'supports_tree_reference', False):
2735
raise errors.BadConversionTarget(
2736
'Does not support nested trees', target_format)
2926
2738
def get_format_string(self):
2927
2739
"""See RepositoryFormat.get_format_string()."""
2928
2740
return ("Bazaar development format 2 with subtree support "