1384
1397
raise KnitCorrupt(self, "Cannot do delta compression without "
1385
1398
"parent tracking.")
1387
def _get_entries(self, version_ids, check_present=False):
1388
"""Get the entries for version_ids."""
1389
version_ids = set(version_ids)
1400
def _get_entries(self, keys, check_present=False):
1401
"""Get the entries for keys.
1403
:param keys: An iterable of index keys, - 1-tuples.
1390
1406
found_keys = set()
1391
1407
if self._parents:
1392
for node in self._graph_index.iter_entries(version_ids):
1408
for node in self._graph_index.iter_entries(keys):
1394
found_keys.add(node[0])
1410
found_keys.add(node[1])
1396
1412
# adapt parentless index to the rest of the code.
1397
for node in self._graph_index.iter_entries(version_ids):
1398
yield node[0], node[1], ()
1399
found_keys.add(node[0])
1413
for node in self._graph_index.iter_entries(keys):
1414
yield node[0], node[1], node[2], ()
1415
found_keys.add(node[1])
1400
1416
if check_present:
1401
missing_keys = version_ids.difference(found_keys)
1417
missing_keys = keys.difference(found_keys)
1402
1418
if missing_keys:
1403
1419
raise RevisionNotPresent(missing_keys.pop(), self)
1405
1421
def _present_keys(self, version_ids):
1407
node[0] for node in self._get_entries(version_ids)])
1423
node[1] for node in self._get_entries(version_ids)])
1409
1425
def _parentless_ancestry(self, versions):
1410
1426
"""Honour the get_ancestry API for parentless knit indices."""
1411
present_keys = self._present_keys(versions)
1412
missing = set(versions).difference(present_keys)
1427
wanted_keys = self._version_ids_to_keys(versions)
1428
present_keys = self._present_keys(wanted_keys)
1429
missing = set(wanted_keys).difference(present_keys)
1414
1431
raise RevisionNotPresent(missing.pop(), self)
1415
return list(present_keys)
1432
return list(self._keys_to_version_ids(present_keys))
1417
1434
def get_ancestry(self, versions, topo_sorted=True):
1418
1435
"""See VersionedFile.get_ancestry."""
1521
1550
def get_versions(self):
1522
1551
"""Get all the versions in the file. not topologically sorted."""
1523
return [node[0] for node in self._graph_index.iter_all_entries()]
1552
return [node[1][0] for node in self._graph_index.iter_all_entries()]
1525
1554
def has_version(self, version_id):
1526
1555
"""True if the version is in the index."""
1527
return len(self._present_keys([version_id])) == 1
1556
return len(self._present_keys(self._version_ids_to_keys([version_id]))) == 1
1558
def _keys_to_version_ids(self, keys):
1559
return tuple(key[0] for key in keys)
1529
1561
def get_position(self, version_id):
1530
"""Return data position and size of specified version."""
1531
bits = self._get_node(version_id)[1][1:].split(' ')
1532
return int(bits[0]), int(bits[1])
1562
"""Return details needed to access the version.
1564
:return: a tuple (index, data position, size) to hand to the access
1565
logic to get the record.
1567
node = self._get_node(version_id)
1568
bits = node[2][1:].split(' ')
1569
return node[0], int(bits[0]), int(bits[1])
1534
1571
def get_method(self, version_id):
1535
1572
"""Return compression method of specified version."""
1536
1573
if not self._deltas:
1537
1574
return 'fulltext'
1538
return self._parent_compression(self._get_node(version_id)[2][1])
1575
return self._parent_compression(self._get_node(version_id)[3][1])
1540
1577
def _parent_compression(self, reference_list):
1541
1578
# use the second reference list to decide if this is delta'd or not.
1572
1609
def get_parents_with_ghosts(self, version_id):
1573
1610
"""Return parents of specified version with ghosts."""
1574
nodes = list(self._get_entries([version_id], check_present=True))
1611
nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),
1612
check_present=True))
1575
1613
if not self._parents:
1577
return nodes[0][2][0]
1615
return self._keys_to_version_ids(nodes[0][3][0])
1579
1617
def check_versions_present(self, version_ids):
1580
1618
"""Check that all specified versions are present."""
1581
version_ids = set(version_ids)
1582
present = self._present_keys(version_ids)
1583
missing = version_ids.difference(present)
1619
keys = self._version_ids_to_keys(version_ids)
1620
present = self._present_keys(keys)
1621
missing = keys.difference(present)
1585
1623
raise RevisionNotPresent(missing.pop(), self)
1587
def add_version(self, version_id, options, pos, size, parents):
1625
def add_version(self, version_id, options, access_memo, parents):
1588
1626
"""Add a version record to the index."""
1589
return self.add_versions(((version_id, options, pos, size, parents),))
1627
return self.add_versions(((version_id, options, access_memo, parents),))
1591
1629
def add_versions(self, versions):
1592
1630
"""Add multiple versions to the index.
1644
1685
result.append((key, value))
1645
1686
self._add_callback(result)
1648
class _KnitData(_KnitComponentFile):
1649
"""Contents of the knit data file"""
1651
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1652
create_parent_dir=False, delay_create=False,
1654
_KnitComponentFile.__init__(self, transport, filename, mode,
1655
file_mode=file_mode,
1656
create_parent_dir=create_parent_dir,
1688
def _version_ids_to_keys(self, version_ids):
1689
return set((version_id, ) for version_id in version_ids)
1692
class _KnitAccess(object):
1693
"""Access to knit records in a .knit file."""
1695
def __init__(self, transport, filename, _file_mode, _dir_mode,
1696
_need_to_create, _create_parent_dir):
1697
"""Create a _KnitAccess for accessing and inserting data.
1699
:param transport: The transport the .knit is located on.
1700
:param filename: The filename of the .knit.
1702
self._transport = transport
1703
self._filename = filename
1704
self._file_mode = _file_mode
1705
self._dir_mode = _dir_mode
1706
self._need_to_create = _need_to_create
1707
self._create_parent_dir = _create_parent_dir
1709
def add_raw_records(self, sizes, raw_data):
1710
"""Add raw knit bytes to a storage area.
1712
The data is spooled to whereever the access method is storing data.
1714
:param sizes: An iterable containing the size of each raw data segment.
1715
:param raw_data: A bytestring containing the data.
1716
:return: A list of memos to retrieve the record later. Each memo is a
1717
tuple - (index, pos, length), where the index field is always None
1718
for the .knit access method.
1720
assert type(raw_data) == str, \
1721
'data must be plain bytes was %s' % type(raw_data)
1722
if not self._need_to_create:
1723
base = self._transport.append_bytes(self._filename, raw_data)
1725
self._transport.put_bytes_non_atomic(self._filename, raw_data,
1726
create_parent_dir=self._create_parent_dir,
1727
mode=self._file_mode,
1728
dir_mode=self._dir_mode)
1729
self._need_to_create = False
1733
result.append((None, base, size))
1738
"""IFF this data access has its own storage area, initialise it.
1742
self._transport.put_bytes_non_atomic(self._filename, '',
1743
mode=self._file_mode)
1745
def open_file(self):
1746
"""IFF this data access can be represented as a single file, open it.
1748
For knits that are not mapped to a single file on disk this will
1751
:return: None or a file handle.
1754
return self._transport.get(self._filename)
1759
def get_raw_records(self, memos_for_retrieval):
1760
"""Get the raw bytes for a records.
1762
:param memos_for_retrieval: An iterable containing the (index, pos,
1763
length) memo for retrieving the bytes. The .knit method ignores
1764
the index as there is always only a single file.
1765
:return: An iterator over the bytes of the records.
1767
read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]
1768
for pos, data in self._transport.readv(self._filename, read_vector):
1772
class _PackAccess(object):
1773
"""Access to knit records via a collection of packs."""
1775
def __init__(self, index_to_packs, writer=None):
1776
"""Create a _PackAccess object.
1778
:param index_to_packs: A dict mapping index objects to the transport
1779
and file names for obtaining data.
1780
:param writer: A tuple (pack.ContainerWriter, write_index) which
1781
contains the pack to write, and the index that reads from it will
1785
self.container_writer = writer[0]
1786
self.write_index = writer[1]
1788
self.container_writer = None
1789
self.write_index = None
1790
self.indices = index_to_packs
1792
def add_raw_records(self, sizes, raw_data):
1793
"""Add raw knit bytes to a storage area.
1795
The data is spooled to the container writer in one bytes-record per
1798
:param sizes: An iterable containing the size of each raw data segment.
1799
:param raw_data: A bytestring containing the data.
1800
:return: A list of memos to retrieve the record later. Each memo is a
1801
tuple - (index, pos, length), where the index field is the
1802
write_index object supplied to the PackAccess object.
1804
assert type(raw_data) == str, \
1805
'data must be plain bytes was %s' % type(raw_data)
1809
p_offset, p_length = self.container_writer.add_bytes_record(
1810
raw_data[offset:offset+size], [])
1812
result.append((self.write_index, p_offset, p_length))
1816
"""Pack based knits do not get individually created."""
1818
def get_raw_records(self, memos_for_retrieval):
1819
"""Get the raw bytes for a records.
1821
:param memos_for_retrieval: An iterable containing the (index, pos,
1822
length) memo for retrieving the bytes. The Pack access method
1823
looks up the pack to use for a given record in its index_to_pack
1825
:return: An iterator over the bytes of the records.
1827
# first pass, group into same-index requests
1829
current_index = None
1830
for (index, offset, length) in memos_for_retrieval:
1831
if current_index == index:
1832
current_list.append((offset, length))
1834
if current_index is not None:
1835
request_lists.append((current_index, current_list))
1836
current_index = index
1837
current_list = [(offset, length)]
1838
# handle the last entry
1839
if current_index is not None:
1840
request_lists.append((current_index, current_list))
1841
for index, offsets in request_lists:
1842
transport, path = self.indices[index]
1843
reader = pack.make_readv_reader(transport, path, offsets)
1844
for names, read_func in reader.iter_records():
1845
yield read_func(None)
1847
def open_file(self):
1848
"""Pack based knits have no single file."""
1851
def set_writer(self, writer, index, (transport, packname)):
1852
"""Set a writer to use for adding data."""
1853
self.indices[index] = (transport, packname)
1854
self.container_writer = writer
1855
self.write_index = index
1858
class _KnitData(object):
1859
"""Manage extraction of data from a KnitAccess, caching and decompressing.
1861
The KnitData class provides the logic for parsing and using knit records,
1862
making use of an access method for the low level read and write operations.
1865
def __init__(self, access):
1866
"""Create a KnitData object.
1868
:param access: The access method to use. Access methods such as
1869
_KnitAccess manage the insertion of raw records and the subsequent
1870
retrieval of the same.
1872
self._access = access
1658
1873
self._checked = False
1659
1874
# TODO: jam 20060713 conceptually, this could spill to disk
1660
1875
# if the cached size gets larger than a certain amount
1707
1912
return length, sio
1709
def add_raw_record(self, raw_data):
1914
def add_raw_records(self, sizes, raw_data):
1710
1915
"""Append a prepared record to the data file.
1712
:return: the offset in the data file raw_data was written.
1917
:param sizes: An iterable containing the size of each raw data segment.
1918
:param raw_data: A bytestring containing the data.
1919
:return: a list of index data for the way the data was stored.
1920
See the access method add_raw_records documentation for more
1714
assert isinstance(raw_data, str), 'data must be plain bytes'
1715
if not self._need_to_create:
1716
return self._transport.append_bytes(self._filename, raw_data)
1718
self._transport.put_bytes_non_atomic(self._filename, raw_data,
1719
create_parent_dir=self._create_parent_dir,
1720
mode=self._file_mode,
1721
dir_mode=self._dir_mode)
1722
self._need_to_create = False
1923
return self._access.add_raw_records(sizes, raw_data)
1725
1925
def add_record(self, version_id, digest, lines):
1726
"""Write new text record to disk. Returns the position in the
1727
file where it was written."""
1926
"""Write new text record to disk.
1928
Returns index data for retrieving it later, as per add_raw_records.
1728
1930
size, sio = self._record_to_data(version_id, digest, lines)
1730
if not self._need_to_create:
1731
start_pos = self._transport.append_file(self._filename, sio)
1733
self._transport.put_file_non_atomic(self._filename, sio,
1734
create_parent_dir=self._create_parent_dir,
1735
mode=self._file_mode,
1736
dir_mode=self._dir_mode)
1737
self._need_to_create = False
1931
result = self.add_raw_records([size], sio.getvalue())
1739
1932
if self._do_cache:
1740
1933
self._cache[version_id] = sio.getvalue()
1741
return start_pos, size
1743
1936
def _parse_record_header(self, version_id, raw_data):
1744
1937
"""Parse a record header for consistency.