101
101
RevisionNotPresent,
102
102
RevisionAlreadyPresent,
104
from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip
104
from bzrlib.graph import Graph
105
105
from bzrlib.osutils import (
106
106
contains_whitespace,
107
107
contains_linebreaks,
111
from bzrlib.symbol_versioning import DEPRECATED_PARAMETER, deprecated_passed
111
from bzrlib.symbol_versioning import (
112
DEPRECATED_PARAMETER,
112
117
from bzrlib.tsort import topo_sort
118
from bzrlib.tuned_gzip import GzipFile, bytes_to_gzip
120
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
114
121
import bzrlib.weave
115
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
118
124
# TODO: Split out code specific to this format into an associated object.
626
632
# move the copied index into place
627
633
transport.move(name + INDEX_SUFFIX + '.tmp', name + INDEX_SUFFIX)
629
def create_empty(self, name, transport, mode=None):
630
return KnitVersionedFile(name, transport, factory=self.factory,
631
delta=self.delta, create=True)
633
635
def get_data_stream(self, required_versions):
634
636
"""Get a data stream for the specified versions.
720
722
def get_delta(self, version_id):
721
723
"""Get a delta for constructing version from some other version."""
722
724
self.check_not_reserved_id(version_id)
723
parents = self.get_parents(version_id)
725
parents = self.get_parent_map([version_id])[version_id]
725
727
parent = parents[0]
751
753
annotated_part = "plain"
752
754
return "knit-%s" % (annotated_part,)
756
@deprecated_method(one_four)
754
757
def get_graph_with_ghosts(self):
755
758
"""See VersionedFile.get_graph_with_ghosts()."""
756
graph_items = self._index.get_graph()
757
return dict(graph_items)
759
return self.get_parent_map(self.versions())
759
761
def get_sha1(self, version_id):
760
762
return self.get_sha1s([version_id])[0]
770
772
"""See VersionedFile.get_suffixes()."""
771
773
return [DATA_SUFFIX, INDEX_SUFFIX]
775
@deprecated_method(one_four)
773
776
def has_ghost(self, version_id):
774
777
"""True if there is a ghost reference in the file to version_id."""
775
778
# maybe we have it
776
779
if self.has_version(version_id):
778
781
# optimisable if needed by memoising the _ghosts set.
779
items = self._index.get_graph()
780
for node, parents in items:
782
items = self.get_parent_map(self.versions())
783
for parents in items.itervalues():
781
784
for parent in parents:
782
if parent not in self._index._cache:
783
if parent == version_id:
785
if parent == version_id and parent not in items:
787
789
def insert_data_stream(self, (format, data_list, reader_callable)):
977
979
self._index.check_versions_present(version_ids)
979
981
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
980
nostore_sha, random_id, check_content):
982
nostore_sha, random_id, check_content, left_matching_blocks):
981
983
"""See VersionedFile.add_lines_with_ghosts()."""
982
984
self._check_add(version_id, lines, random_id, check_content)
983
985
return self._add(version_id, lines, parents, self.delta,
984
parent_texts, None, nostore_sha, random_id)
986
parent_texts, left_matching_blocks, nostore_sha, random_id)
986
988
def _add_lines(self, version_id, parents, lines, parent_texts,
987
989
left_matching_blocks, nostore_sha, random_id, check_content):
1258
1260
"""See VersionedFile.annotate_iter."""
1259
1261
return self.factory.annotate_iter(self, version_id)
1261
def get_parents(self, version_id):
1262
"""See VersionedFile.get_parents."""
1265
# 52554 calls in 1264 872 internal down from 3674
1267
return self._index.get_parents(version_id)
1269
raise RevisionNotPresent(version_id, self.filename)
1271
def get_parents_with_ghosts(self, version_id):
1272
"""See VersionedFile.get_parents."""
1274
return self._index.get_parents_with_ghosts(version_id)
1276
raise RevisionNotPresent(version_id, self.filename)
1263
def get_parent_map(self, version_ids):
1264
"""See VersionedFile.get_parent_map."""
1265
return self._index.get_parent_map(version_ids)
1278
1267
def get_ancestry(self, versions, topo_sorted=True):
1279
1268
"""See VersionedFile.get_ancestry."""
1440
1429
self._transport.put_bytes_non_atomic(
1441
1430
self._filename, self.HEADER, mode=self._file_mode)
1443
def get_graph(self):
1444
"""Return a list of the node:parents lists from this knit index."""
1445
return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]
1447
1432
def get_ancestry(self, versions, topo_sorted=True):
1448
1433
"""See VersionedFile.get_ancestry."""
1449
1434
# get a graph of all the mentioned versions:
1529
1514
The order is undefined, allowing for different optimisations in
1530
1515
the underlying implementation.
1532
for version_id in version_ids:
1534
yield version_id, tuple(self.get_parents(version_id))
1517
parent_map = self.get_parent_map(version_ids)
1518
parent_map_set = set(parent_map)
1519
unknown_existence = set()
1520
for parents in parent_map.itervalues():
1521
unknown_existence.update(parents)
1522
unknown_existence.difference_update(parent_map_set)
1523
present_parents = set(self.get_parent_map(unknown_existence))
1524
present_parents.update(parent_map_set)
1525
for version_id, parents in parent_map.iteritems():
1526
parents = tuple(parent for parent in parents
1527
if parent in present_parents)
1528
yield version_id, parents
1538
1530
def num_versions(self):
1539
1531
return len(self._history)
1582
1574
assert isinstance(line, str), \
1583
1575
'content must be utf-8 encoded: %r' % (line,)
1584
1576
lines.append(line)
1585
self._cache_version(version_id, options, pos, size, parents)
1577
self._cache_version(version_id, options, pos, size, tuple(parents))
1586
1578
if not self._need_to_create:
1587
1579
self._transport.append_bytes(self._filename, ''.join(lines))
1638
1630
return self._cache[version_id][1]
1640
def get_parents(self, version_id):
1641
"""Return parents of specified version ignoring ghosts."""
1642
return [parent for parent in self._cache[version_id][4]
1643
if parent in self._cache]
1632
def get_parent_map(self, version_ids):
1633
"""Passed through to by KnitVersionedFile.get_parent_map."""
1635
for version_id in version_ids:
1637
result[version_id] = tuple(self._cache[version_id][4])
1645
1642
def get_parents_with_ghosts(self, version_id):
1646
1643
"""Return parents of specified version with ghosts."""
1647
return self._cache[version_id][4]
1645
return self.get_parent_map([version_id])[version_id]
1647
raise RevisionNotPresent(version_id, self)
1649
1649
def check_versions_present(self, version_ids):
1650
1650
"""Check that all specified versions are present."""
1847
1847
return 'fulltext'
1849
def get_graph(self):
1850
"""Return a list of the node:parents lists from this knit index."""
1851
if not self._parents:
1852
return [(key, ()) for key in self.get_versions()]
1854
for index, key, value, refs in self._graph_index.iter_all_entries():
1855
result.append((key[0], tuple([ref[0] for ref in refs[0]])))
1858
1849
def iter_parents(self, version_ids):
1859
1850
"""Iterate through the parents for many version ids.
1935
1926
options.append('no-eol')
1938
def get_parents(self, version_id):
1939
"""Return parents of specified version ignoring ghosts."""
1940
parents = list(self.iter_parents([version_id]))
1943
raise errors.RevisionNotPresent(version_id, self)
1944
return parents[0][1]
1929
def get_parent_map(self, version_ids):
1930
"""Passed through to by KnitVersionedFile.get_parent_map."""
1931
nodes = self._get_entries(self._version_ids_to_keys(version_ids))
1935
result[node[1][0]] = self._keys_to_version_ids(node[3][0])
1938
result[node[1][0]] = ()
1946
1941
def get_parents_with_ghosts(self, version_id):
1947
1942
"""Return parents of specified version with ghosts."""
1948
nodes = list(self._get_entries(self._version_ids_to_keys([version_id]),
1949
check_present=True))
1950
if not self._parents:
1952
return self._keys_to_version_ids(nodes[0][3][0])
1944
return self.get_parent_map([version_id])[version_id]
1946
raise RevisionNotPresent(version_id, self)
1954
1948
def check_versions_present(self, version_ids):
1955
1949
"""Check that all specified versions are present."""
2231
2225
def get_raw_records(self, memos_for_retrieval):
2232
2226
"""Get the raw bytes for a records.
2234
:param memos_for_retrieval: An iterable containing the (thunk_flag,
2235
index, start, end) memo for retrieving the bytes.
2236
:return: An iterator over the bytes of the records.
2228
:param memos_for_retrieval: An iterable of memos from the
2229
_StreamIndex object identifying bytes to read; for these classes
2230
they are (from_backing_knit, index, start, end) and can point to
2231
either the backing knit or streamed data.
2232
:return: An iterator yielding a byte string for each record in
2233
memos_for_retrieval.
2238
2235
# use a generator for memory friendliness
2239
for thunk_flag, version_id, start, end in memos_for_retrieval:
2240
if version_id is self.stream_index:
2236
for from_backing_knit, version_id, start, end in memos_for_retrieval:
2237
if not from_backing_knit:
2238
assert version_id is self.stream_index
2241
2239
yield self.data[start:end]
2243
2241
# we have been asked to thunk. This thunking only occurs when
2248
2246
# as desired. However, for now, this is sufficient.
2249
2247
if self.orig_factory.__class__ != KnitPlainFactory:
2250
2248
raise errors.KnitCorrupt(
2251
self, 'Bad thunk request %r' % version_id)
2249
self, 'Bad thunk request %r cannot be backed by %r' %
2250
(version_id, self.orig_factory))
2252
2251
lines = self.backing_knit.get_lines(version_id)
2253
2252
line_bytes = ''.join(lines)
2254
2253
digest = sha_string(line_bytes)
2254
# the packed form of the fulltext always has a trailing newline,
2255
# even if the actual text does not, unless the file is empty. the
2256
# record options including the noeol flag are passed through by
2257
# _StreamIndex, so this is safe.
2256
2259
if lines[-1][-1] != '\n':
2257
2260
lines[-1] = lines[-1] + '\n'
2258
2261
line_bytes += '\n'
2259
orig_options = list(self.backing_knit._index.get_options(version_id))
2260
if 'fulltext' not in orig_options:
2261
if 'line-delta' not in orig_options:
2262
raise errors.KnitCorrupt(self,
2263
'Unknown compression method %r' % orig_options)
2264
orig_options.remove('line-delta')
2265
orig_options.append('fulltext')
2266
2262
# We want plain data, because we expect to thunk only to allow text
2268
2264
size, bytes = self.backing_knit._data._record_to_data(version_id,
2320
2316
:return: A dict of version_id:(index_memo, compression_parent,
2321
2317
parents, record_details).
2323
opaque structure to pass to read_records to extract the raw
2319
opaque memo that can be passed to _StreamAccess.read_records
2320
to extract the raw data; for these classes it is
2321
(from_backing_knit, index, start, end)
2325
2322
compression_parent
2326
2323
Content that this record is built upon, may be None
2340
2337
parent_ids = self.get_parents_with_ghosts(version_id)
2341
2338
noeol = ('no-eol' in self.get_options(version_id))
2339
index_memo = self.get_position(version_id)
2340
from_backing_knit = index_memo[0]
2341
if from_backing_knit:
2342
# texts retrieved from the backing knit are always full texts
2342
2344
if method == 'fulltext':
2343
2345
compression_parent = None
2345
2347
compression_parent = parent_ids[0]
2346
index_memo = self.get_position(version_id)
2347
2348
result[version_id] = (index_memo, compression_parent,
2348
2349
parent_ids, (method, noeol))
2351
2352
def get_method(self, version_id):
2352
2353
"""Return compression method of specified version."""
2354
options = self._by_version[version_id][0]
2356
# Strictly speaking this should check in the backing knit, but
2357
# until we have a test to discriminate, this will do.
2358
return self.backing_index.get_method(version_id)
2354
options = self.get_options(version_id)
2359
2355
if 'fulltext' in options:
2360
2356
return 'fulltext'
2361
2357
elif 'line-delta' in options:
2372
2368
return self._by_version[version_id][0]
2373
2369
except KeyError:
2374
return self.backing_index.get_options(version_id)
2370
options = list(self.backing_index.get_options(version_id))
2371
if 'fulltext' in options:
2373
elif 'line-delta' in options:
2374
# Texts from the backing knit are always returned from the stream
2376
options.remove('line-delta')
2377
options.append('fulltext')
2379
raise errors.KnitIndexUnknownMethod(self, options)
2380
return tuple(options)
2382
def get_parent_map(self, version_ids):
2383
"""Passed through to by KnitVersionedFile.get_parent_map."""
2386
for version_id in version_ids:
2388
result[version_id] = self._by_version[version_id][2]
2390
pending_ids.add(version_id)
2391
result.update(self.backing_index.get_parent_map(pending_ids))
2376
2394
def get_parents_with_ghosts(self, version_id):
2377
2395
"""Return parents of specified version with ghosts."""
2379
return self._by_version[version_id][2]
2397
return self.get_parent_map([version_id])[version_id]
2380
2398
except KeyError:
2381
return self.backing_index.get_parents_with_ghosts(version_id)
2399
raise RevisionNotPresent(version_id, self)
2383
2401
def get_position(self, version_id):
2384
2402
"""Return details needed to access the version.
2387
2405
coordinates into that (as index_memo's are opaque outside the
2388
2406
index and matching access class).
2390
:return: a tuple (thunk_flag, index, start, end). If thunk_flag is
2391
False, index will be self, otherwise it will be a version id.
2408
:return: a tuple (from_backing_knit, index, start, end) that can
2409
be passed e.g. to get_raw_records.
2410
If from_backing_knit is False, index will be self, otherwise it
2411
will be a version id.
2394
2414
start, end = self._by_version[version_id][1]
2656
2676
see join() for the parameter definitions.
2658
2678
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
2659
graph = self.source.get_graph(version_ids)
2660
order = topo_sort(graph.items())
2679
# --- the below is factorable out with VersionedFile.join, but wait for
2680
# VersionedFiles, it may all be simpler then.
2681
graph = Graph(self.source)
2682
search = graph._make_breadth_first_searcher(version_ids)
2683
transitive_ids = set()
2684
map(transitive_ids.update, list(search))
2685
parent_map = self.source.get_parent_map(transitive_ids)
2686
order = topo_sort(parent_map.items())
2662
2688
def size_of_content(content):
2663
2689
return sum(len(line) for line in content.text())
2836
2864
total = len(version_list)
2865
parent_map = self.source.get_parent_map(version_list)
2837
2866
for version_id in version_list:
2838
2867
pb.update("Converting to knit", count, total)
2839
parents = self.source.get_parents(version_id)
2868
parents = parent_map[version_id]
2840
2869
# check that its will be a consistent copy:
2841
2870
for parent in parents:
2842
2871
# if source has the parent, we must already have it