632
642
__contains__ = has_version
634
644
def _merge_annotations(self, content, parents, parent_texts={},
635
delta=None, annotated=None):
645
delta=None, annotated=None,
646
left_matching_blocks=None):
636
647
"""Merge annotations for content. This is done by comparing
637
648
the annotations based on changed to the text.
650
if left_matching_blocks is not None:
651
delta_seq = diff._PrematchedMatcher(left_matching_blocks)
641
655
for parent_id in parents:
642
656
merge_content = self._get_content(parent_id, parent_texts)
643
seq = patiencediff.PatienceSequenceMatcher(
644
None, merge_content.text(), content.text())
645
if delta_seq is None:
646
# setup a delta seq to reuse.
657
if (parent_id == parents[0] and delta_seq is not None):
660
seq = patiencediff.PatienceSequenceMatcher(
661
None, merge_content.text(), content.text())
648
662
for i, j, n in seq.get_matching_blocks():
651
# this appears to copy (origin, text) pairs across to the new
652
# content for any line that matches the last-checked parent.
653
# FIXME: save the sequence control data for delta compression
654
# against the most relevant parent rather than rediffing.
665
# this appears to copy (origin, text) pairs across to the
666
# new content for any line that matches the last-checked
655
668
content._lines[j:j+n] = merge_content._lines[i:i+n]
670
if delta_seq is None:
658
671
reference_content = self._get_content(parents[0], parent_texts)
659
672
new_texts = content.text()
660
673
old_texts = reference_content.text()
1662
1688
def _version_ids_to_keys(self, version_ids):
1663
1689
return set((version_id, ) for version_id in version_ids)
1666
class _KnitData(_KnitComponentFile):
1667
"""Contents of the knit data file"""
1669
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1670
create_parent_dir=False, delay_create=False,
1672
_KnitComponentFile.__init__(self, transport, filename, mode,
1673
file_mode=file_mode,
1674
create_parent_dir=create_parent_dir,
1692
class _KnitAccess(object):
1693
"""Access to knit records in a .knit file."""
1695
def __init__(self, transport, filename, _file_mode, _dir_mode,
1696
_need_to_create, _create_parent_dir):
1697
"""Create a _KnitAccess for accessing and inserting data.
1699
:param transport: The transport the .knit is located on.
1700
:param filename: The filename of the .knit.
1702
self._transport = transport
1703
self._filename = filename
1704
self._file_mode = _file_mode
1705
self._dir_mode = _dir_mode
1706
self._need_to_create = _need_to_create
1707
self._create_parent_dir = _create_parent_dir
1709
def add_raw_records(self, sizes, raw_data):
1710
"""Add raw knit bytes to a storage area.
1712
The data is spooled to whereever the access method is storing data.
1714
:param sizes: An iterable containing the size of each raw data segment.
1715
:param raw_data: A bytestring containing the data.
1716
:return: A list of memos to retrieve the record later. Each memo is a
1717
tuple - (index, pos, length), where the index field is always None
1718
for the .knit access method.
1720
assert type(raw_data) == str, \
1721
'data must be plain bytes was %s' % type(raw_data)
1722
if not self._need_to_create:
1723
base = self._transport.append_bytes(self._filename, raw_data)
1725
self._transport.put_bytes_non_atomic(self._filename, raw_data,
1726
create_parent_dir=self._create_parent_dir,
1727
mode=self._file_mode,
1728
dir_mode=self._dir_mode)
1729
self._need_to_create = False
1733
result.append((None, base, size))
1738
"""IFF this data access has its own storage area, initialise it.
1742
self._transport.put_bytes_non_atomic(self._filename, '',
1743
mode=self._file_mode)
1745
def open_file(self):
1746
"""IFF this data access can be represented as a single file, open it.
1748
For knits that are not mapped to a single file on disk this will
1751
:return: None or a file handle.
1754
return self._transport.get(self._filename)
1759
def get_raw_records(self, memos_for_retrieval):
1760
"""Get the raw bytes for a records.
1762
:param memos_for_retrieval: An iterable containing the (index, pos,
1763
length) memo for retrieving the bytes. The .knit method ignores
1764
the index as there is always only a single file.
1765
:return: An iterator over the bytes of the records.
1767
read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]
1768
for pos, data in self._transport.readv(self._filename, read_vector):
1772
class _PackAccess(object):
1773
"""Access to knit records via a collection of packs."""
1775
def __init__(self, index_to_packs, writer=None):
1776
"""Create a _PackAccess object.
1778
:param index_to_packs: A dict mapping index objects to the transport
1779
and file names for obtaining data.
1780
:param writer: A tuple (pack.ContainerWriter, write_index) which
1781
contains the pack to write, and the index that reads from it will
1785
self.container_writer = writer[0]
1786
self.write_index = writer[1]
1788
self.container_writer = None
1789
self.write_index = None
1790
self.indices = index_to_packs
1792
def add_raw_records(self, sizes, raw_data):
1793
"""Add raw knit bytes to a storage area.
1795
The data is spooled to the container writer in one bytes-record per
1798
:param sizes: An iterable containing the size of each raw data segment.
1799
:param raw_data: A bytestring containing the data.
1800
:return: A list of memos to retrieve the record later. Each memo is a
1801
tuple - (index, pos, length), where the index field is the
1802
write_index object supplied to the PackAccess object.
1804
assert type(raw_data) == str, \
1805
'data must be plain bytes was %s' % type(raw_data)
1809
p_offset, p_length = self.container_writer.add_bytes_record(
1810
raw_data[offset:offset+size], [])
1812
result.append((self.write_index, p_offset, p_length))
1816
"""Pack based knits do not get individually created."""
1818
def get_raw_records(self, memos_for_retrieval):
1819
"""Get the raw bytes for a records.
1821
:param memos_for_retrieval: An iterable containing the (index, pos,
1822
length) memo for retrieving the bytes. The Pack access method
1823
looks up the pack to use for a given record in its index_to_pack
1825
:return: An iterator over the bytes of the records.
1827
# first pass, group into same-index requests
1829
current_index = None
1830
for (index, offset, length) in memos_for_retrieval:
1831
if current_index == index:
1832
current_list.append((offset, length))
1834
if current_index is not None:
1835
request_lists.append((current_index, current_list))
1836
current_index = index
1837
current_list = [(offset, length)]
1838
# handle the last entry
1839
if current_index is not None:
1840
request_lists.append((current_index, current_list))
1841
for index, offsets in request_lists:
1842
transport, path = self.indices[index]
1843
reader = pack.make_readv_reader(transport, path, offsets)
1844
for names, read_func in reader.iter_records():
1845
yield read_func(None)
1847
def open_file(self):
1848
"""Pack based knits have no single file."""
1851
def set_writer(self, writer, index, (transport, packname)):
1852
"""Set a writer to use for adding data."""
1853
self.indices[index] = (transport, packname)
1854
self.container_writer = writer
1855
self.write_index = index
1858
class _KnitData(object):
1859
"""Manage extraction of data from a KnitAccess, caching and decompressing.
1861
The KnitData class provides the logic for parsing and using knit records,
1862
making use of an access method for the low level read and write operations.
1865
def __init__(self, access):
1866
"""Create a KnitData object.
1868
:param access: The access method to use. Access methods such as
1869
_KnitAccess manage the insertion of raw records and the subsequent
1870
retrieval of the same.
1872
self._access = access
1676
1873
self._checked = False
1677
1874
# TODO: jam 20060713 conceptually, this could spill to disk
1678
1875
# if the cached size gets larger than a certain amount
1725
1912
return length, sio
1727
def add_raw_record(self, raw_data):
1914
def add_raw_records(self, sizes, raw_data):
1728
1915
"""Append a prepared record to the data file.
1730
:return: the offset in the data file raw_data was written.
1917
:param sizes: An iterable containing the size of each raw data segment.
1918
:param raw_data: A bytestring containing the data.
1919
:return: a list of index data for the way the data was stored.
1920
See the access method add_raw_records documentation for more
1732
assert isinstance(raw_data, str), 'data must be plain bytes'
1733
if not self._need_to_create:
1734
return self._transport.append_bytes(self._filename, raw_data)
1736
self._transport.put_bytes_non_atomic(self._filename, raw_data,
1737
create_parent_dir=self._create_parent_dir,
1738
mode=self._file_mode,
1739
dir_mode=self._dir_mode)
1740
self._need_to_create = False
1923
return self._access.add_raw_records(sizes, raw_data)
1743
1925
def add_record(self, version_id, digest, lines):
1744
"""Write new text record to disk. Returns the position in the
1745
file where it was written."""
1926
"""Write new text record to disk.
1928
Returns index data for retrieving it later, as per add_raw_records.
1746
1930
size, sio = self._record_to_data(version_id, digest, lines)
1748
if not self._need_to_create:
1749
start_pos = self._transport.append_file(self._filename, sio)
1751
self._transport.put_file_non_atomic(self._filename, sio,
1752
create_parent_dir=self._create_parent_dir,
1753
mode=self._file_mode,
1754
dir_mode=self._dir_mode)
1755
self._need_to_create = False
1931
result = self.add_raw_records([size], sio.getvalue())
1757
1932
if self._do_cache:
1758
1933
self._cache[version_id] = sio.getvalue()
1759
return start_pos, size
1761
1936
def _parse_record_header(self, version_id, raw_data):
1762
1937
"""Parse a record header for consistency.