287
314
self.storage_kind)
317
class LazyKnitContentFactory(ContentFactory):
318
"""A ContentFactory which can either generate full text or a wire form.
320
:seealso ContentFactory:
323
def __init__(self, key, parents, generator, first):
324
"""Create a LazyKnitContentFactory.
326
:param key: The key of the record.
327
:param parents: The parents of the record.
328
:param generator: A _ContentMapGenerator containing the record for this
330
:param first: Is this the first content object returned from generator?
331
if it is, its storage kind is knit-delta-closure, otherwise it is
332
knit-delta-closure-ref
335
self.parents = parents
337
self._generator = generator
338
self.storage_kind = "knit-delta-closure"
340
self.storage_kind = self.storage_kind + "-ref"
343
def get_bytes_as(self, storage_kind):
344
if storage_kind == self.storage_kind:
346
return self._generator._wire_bytes()
348
# all the keys etc are contained in the bytes returned in the
351
if storage_kind in ('chunked', 'fulltext'):
352
chunks = self._generator._get_one_work(self.key).text()
353
if storage_kind == 'chunked':
356
return ''.join(chunks)
357
raise errors.UnavailableRepresentation(self.key, storage_kind,
361
def knit_delta_closure_to_records(storage_kind, bytes, line_end):
362
"""Convert a network record to a iterator over stream records.
364
:param storage_kind: The storage kind of the record.
365
Must be 'knit-delta-closure'.
366
:param bytes: The bytes of the record on the network.
368
generator = _NetworkContentMapGenerator(bytes, line_end)
369
return generator.get_record_stream()
372
def knit_network_to_record(storage_kind, bytes, line_end):
373
"""Convert a network record to a record object.
375
:param storage_kind: The storage kind of the record.
376
:param bytes: The bytes of the record on the network.
379
line_end = bytes.find('\n', start)
380
key = tuple(bytes[start:line_end].split('\x00'))
382
line_end = bytes.find('\n', start)
383
parent_line = bytes[start:line_end]
384
if parent_line == 'None:':
388
[tuple(segment.split('\x00')) for segment in parent_line.split('\t')
391
noeol = bytes[start] == 'N'
392
if 'ft' in storage_kind:
395
method = 'line-delta'
396
build_details = (method, noeol)
398
raw_record = bytes[start:]
399
annotated = 'annotated' in storage_kind
400
return [KnitContentFactory(key, parents, build_details, None, raw_record,
401
annotated, network_bytes=bytes)]
290
404
class KnitContent(object):
291
405
"""Content of a knit version to which deltas can be applied.
986
1100
if not self.get_parent_map([key]):
987
1101
raise RevisionNotPresent(key, self)
988
1102
return cached_version
989
text_map, contents_map = self._get_content_maps([key])
990
return contents_map[key]
992
def _get_content_maps(self, keys, nonlocal_keys=None):
993
"""Produce maps of text and KnitContents
995
:param keys: The keys to produce content maps for.
996
:param nonlocal_keys: An iterable of keys(possibly intersecting keys)
997
which are known to not be in this knit, but rather in one of the
999
:return: (text_map, content_map) where text_map contains the texts for
1000
the requested versions and content_map contains the KnitContents.
1002
# FUTURE: This function could be improved for the 'extract many' case
1003
# by tracking each component and only doing the copy when the number of
1004
# children than need to apply delta's to it is > 1 or it is part of the
1007
multiple_versions = len(keys) != 1
1008
record_map = self._get_record_map(keys, allow_missing=True)
1013
if nonlocal_keys is None:
1014
nonlocal_keys = set()
1016
nonlocal_keys = frozenset(nonlocal_keys)
1017
missing_keys = set(nonlocal_keys)
1018
for source in self._fallback_vfs:
1019
if not missing_keys:
1021
for record in source.get_record_stream(missing_keys,
1023
if record.storage_kind == 'absent':
1025
missing_keys.remove(record.key)
1026
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1027
text_map[record.key] = lines
1028
content_map[record.key] = PlainKnitContent(lines, record.key)
1029
if record.key in keys:
1030
final_content[record.key] = content_map[record.key]
1032
if key in nonlocal_keys:
1037
while cursor is not None:
1039
record, record_details, digest, next = record_map[cursor]
1041
raise RevisionNotPresent(cursor, self)
1042
components.append((cursor, record, record_details, digest))
1044
if cursor in content_map:
1045
# no need to plan further back
1046
components.append((cursor, None, None, None))
1050
for (component_id, record, record_details,
1051
digest) in reversed(components):
1052
if component_id in content_map:
1053
content = content_map[component_id]
1055
content, delta = self._factory.parse_record(key[-1],
1056
record, record_details, content,
1057
copy_base_content=multiple_versions)
1058
if multiple_versions:
1059
content_map[component_id] = content
1061
final_content[key] = content
1063
# digest here is the digest from the last applied component.
1064
text = content.text()
1065
actual_sha = sha_strings(text)
1066
if actual_sha != digest:
1067
raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)
1068
text_map[key] = text
1069
return text_map, final_content
1103
generator = _VFContentMapGenerator(self, [key])
1104
return generator._get_content(key)
1071
1106
def get_parent_map(self, keys):
1072
1107
"""Get a map of the graph parents of keys.
1854
class _ContentMapGenerator(object):
1855
"""Generate texts or expose raw deltas for a set of texts."""
1857
def _get_content(self, key):
1858
"""Get the content object for key."""
1859
# Note that _get_content is only called when the _ContentMapGenerator
1860
# has been constructed with just one key requested for reconstruction.
1861
if key in self.nonlocal_keys:
1862
record = self.get_record_stream().next()
1863
# Create a content object on the fly
1864
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1865
return PlainKnitContent(lines, record.key)
1867
# local keys we can ask for directly
1868
return self._get_one_work(key)
1870
def get_record_stream(self):
1871
"""Get a record stream for the keys requested during __init__."""
1872
for record in self._work():
1876
"""Produce maps of text and KnitContents as dicts.
1878
:return: (text_map, content_map) where text_map contains the texts for
1879
the requested versions and content_map contains the KnitContents.
1881
# NB: By definition we never need to read remote sources unless texts
1882
# are requested from them: we don't delta across stores - and we
1883
# explicitly do not want to to prevent data loss situations.
1884
if self.global_map is None:
1885
self.global_map = self.vf.get_parent_map(self.keys)
1886
nonlocal_keys = self.nonlocal_keys
1888
missing_keys = set(nonlocal_keys)
1889
# Read from remote versioned file instances and provide to our caller.
1890
for source in self.vf._fallback_vfs:
1891
if not missing_keys:
1893
# Loop over fallback repositories asking them for texts - ignore
1894
# any missing from a particular fallback.
1895
for record in source.get_record_stream(missing_keys,
1897
if record.storage_kind == 'absent':
1898
# Not in thie particular stream, may be in one of the
1899
# other fallback vfs objects.
1901
missing_keys.remove(record.key)
1904
self._raw_record_map = self.vf._get_record_map_unparsed(self.keys,
1907
for key in self.keys:
1908
if key in self.nonlocal_keys:
1910
yield LazyKnitContentFactory(key, self.global_map[key], self, first)
1913
def _get_one_work(self, requested_key):
1914
# Now, if we have calculated everything already, just return the
1916
if requested_key in self._contents_map:
1917
return self._contents_map[requested_key]
1918
# To simplify things, parse everything at once - code that wants one text
1919
# probably wants them all.
1920
# FUTURE: This function could be improved for the 'extract many' case
1921
# by tracking each component and only doing the copy when the number of
1922
# children than need to apply delta's to it is > 1 or it is part of the
1924
multiple_versions = len(self.keys) != 1
1925
if self._record_map is None:
1926
self._record_map = self.vf._raw_map_to_record_map(
1927
self._raw_record_map)
1928
record_map = self._record_map
1929
# raw_record_map is key:
1930
# Have read and parsed records at this point.
1931
for key in self.keys:
1932
if key in self.nonlocal_keys:
1937
while cursor is not None:
1939
record, record_details, digest, next = record_map[cursor]
1941
raise RevisionNotPresent(cursor, self)
1942
components.append((cursor, record, record_details, digest))
1944
if cursor in self._contents_map:
1945
# no need to plan further back
1946
components.append((cursor, None, None, None))
1950
for (component_id, record, record_details,
1951
digest) in reversed(components):
1952
if component_id in self._contents_map:
1953
content = self._contents_map[component_id]
1955
content, delta = self._factory.parse_record(key[-1],
1956
record, record_details, content,
1957
copy_base_content=multiple_versions)
1958
if multiple_versions:
1959
self._contents_map[component_id] = content
1961
# digest here is the digest from the last applied component.
1962
text = content.text()
1963
actual_sha = sha_strings(text)
1964
if actual_sha != digest:
1965
raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)
1966
if multiple_versions:
1967
return self._contents_map[requested_key]
1971
def _wire_bytes(self):
1972
"""Get the bytes to put on the wire for 'key'.
1974
The first collection of bytes asked for returns the serialised
1975
raw_record_map and the additional details (key, parent) for key.
1976
Subsequent calls return just the additional details (key, parent).
1977
The wire storage_kind given for the first key is 'knit-delta-closure',
1978
For subsequent keys it is 'knit-delta-closure-ref'.
1980
:param key: A key from the content generator.
1981
:return: Bytes to put on the wire.
1984
# kind marker for dispatch on the far side,
1985
lines.append('knit-delta-closure')
1987
if self.vf._factory.annotated:
1988
lines.append('annotated')
1991
# then the list of keys
1992
lines.append('\t'.join(['\x00'.join(key) for key in self.keys
1993
if key not in self.nonlocal_keys]))
1994
# then the _raw_record_map in serialised form:
1996
# for each item in the map:
1998
# 1 line with parents if the key is to be yielded (None: for None, '' for ())
1999
# one line with method
2000
# one line with noeol
2001
# one line with next ('' for None)
2002
# one line with byte count of the record bytes
2004
for key, (record_bytes, (method, noeol), next) in \
2005
self._raw_record_map.iteritems():
2006
key_bytes = '\x00'.join(key)
2007
parents = self.global_map.get(key, None)
2009
parent_bytes = 'None:'
2011
parent_bytes = '\t'.join('\x00'.join(key) for key in parents)
2012
method_bytes = method
2018
next_bytes = '\x00'.join(next)
2021
map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % (
2022
key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,
2023
len(record_bytes), record_bytes))
2024
map_bytes = ''.join(map_byte_list)
2025
lines.append(map_bytes)
2026
bytes = '\n'.join(lines)
2030
class _VFContentMapGenerator(_ContentMapGenerator):
2031
"""Content map generator reading from a VersionedFiles object."""
2033
def __init__(self, versioned_files, keys, nonlocal_keys=None,
2034
global_map=None, raw_record_map=None):
2035
"""Create a _ContentMapGenerator.
2037
:param versioned_files: The versioned files that the texts are being
2039
:param keys: The keys to produce content maps for.
2040
:param nonlocal_keys: An iterable of keys(possibly intersecting keys)
2041
which are known to not be in this knit, but rather in one of the
2043
:param global_map: The result of get_parent_map(keys) (or a supermap).
2044
This is required if get_record_stream() is to be used.
2045
:param raw_record_map: A unparsed raw record map to use for answering
2048
# The vf to source data from
2049
self.vf = versioned_files
2051
self.keys = list(keys)
2052
# Keys known to be in fallback vfs objects
2053
if nonlocal_keys is None:
2054
self.nonlocal_keys = set()
2056
self.nonlocal_keys = frozenset(nonlocal_keys)
2057
# Parents data for keys to be returned in get_record_stream
2058
self.global_map = global_map
2059
# The chunked lists for self.keys in text form
2061
# A cache of KnitContent objects used in extracting texts.
2062
self._contents_map = {}
2063
# All the knit records needed to assemble the requested keys as full
2065
self._record_map = None
2066
if raw_record_map is None:
2067
self._raw_record_map = self.vf._get_record_map_unparsed(keys,
2070
self._raw_record_map = raw_record_map
2071
# the factory for parsing records
2072
self._factory = self.vf._factory
2075
class _NetworkContentMapGenerator(_ContentMapGenerator):
2076
"""Content map generator sourced from a network stream."""
2078
def __init__(self, bytes, line_end):
2079
"""Construct a _NetworkContentMapGenerator from a bytes block."""
2081
self.global_map = {}
2082
self._raw_record_map = {}
2083
self._contents_map = {}
2084
self._record_map = None
2085
self.nonlocal_keys = []
2086
# Get access to record parsing facilities
2087
self.vf = KnitVersionedFiles(None, None)
2090
line_end = bytes.find('\n', start)
2091
line = bytes[start:line_end]
2092
start = line_end + 1
2093
if line == 'annotated':
2094
self._factory = KnitAnnotateFactory()
2096
self._factory = KnitPlainFactory()
2097
# list of keys to emit in get_record_stream
2098
line_end = bytes.find('\n', start)
2099
line = bytes[start:line_end]
2100
start = line_end + 1
2102
tuple(segment.split('\x00')) for segment in line.split('\t')
2104
# now a loop until the end. XXX: It would be nice if this was just a
2105
# bunch of the same records as get_record_stream(..., False) gives, but
2106
# there is a decent sized gap stopping that at the moment.
2110
line_end = bytes.find('\n', start)
2111
key = tuple(bytes[start:line_end].split('\x00'))
2112
start = line_end + 1
2113
# 1 line with parents (None: for None, '' for ())
2114
line_end = bytes.find('\n', start)
2115
line = bytes[start:line_end]
2120
[tuple(segment.split('\x00')) for segment in line.split('\t')
2122
self.global_map[key] = parents
2123
start = line_end + 1
2124
# one line with method
2125
line_end = bytes.find('\n', start)
2126
line = bytes[start:line_end]
2128
start = line_end + 1
2129
# one line with noeol
2130
line_end = bytes.find('\n', start)
2131
line = bytes[start:line_end]
2133
start = line_end + 1
2134
# one line with next ('' for None)
2135
line_end = bytes.find('\n', start)
2136
line = bytes[start:line_end]
2140
next = tuple(bytes[start:line_end].split('\x00'))
2141
start = line_end + 1
2142
# one line with byte count of the record bytes
2143
line_end = bytes.find('\n', start)
2144
line = bytes[start:line_end]
2146
start = line_end + 1
2148
record_bytes = bytes[start:start+count]
2149
start = start + count
2151
self._raw_record_map[key] = (record_bytes, (method, noeol), next)
2153
def get_record_stream(self):
2154
"""Get a record stream for for keys requested by the bytestream."""
2156
for key in self.keys:
2157
yield LazyKnitContentFactory(key, self.global_map[key], self, first)
2160
def _wire_bytes(self):
1769
2164
class _KndxIndex(object):
1770
2165
"""Manages knit index files