253
254
def parse_line_delta_iter(self, lines):
254
255
return iter(self.parse_line_delta(lines))
256
def parse_line_delta(self, lines, version_id):
257
def parse_line_delta(self, lines, version_id, plain=False):
257
258
"""Convert a line based delta into internal representation.
259
260
line delta is in the form of:
262
263
revid(utf8) newline\n
263
264
internal representation is
264
265
(start, end, count, [1..count tuples (revid, newline)])
267
:param plain: If True, the lines are returned as a plain
268
list, not as a list of tuples, i.e.
269
(start, end, count, [1..count newline])
267
272
lines = iter(lines)
273
278
return cache.setdefault(origin, origin), text
275
280
# walk through the lines parsing.
277
start, end, count = [int(n) for n in header.split(',')]
278
contents = [tuple(next().split(' ', 1)) for i in xrange(count)]
279
result.append((start, end, count, contents))
281
# Note that the plain test is explicitly pulled out of the
282
# loop to minimise any performance impact
285
start, end, count = [int(n) for n in header.split(',')]
286
contents = [next().split(' ', 1)[1] for i in xrange(count)]
287
result.append((start, end, count, contents))
290
start, end, count = [int(n) for n in header.split(',')]
291
contents = [tuple(next().split(' ', 1)) for i in xrange(count)]
292
result.append((start, end, count, contents))
282
295
def get_fulltext_content(self, lines):
820
833
"""See VersionedFile.add_lines_with_ghosts()."""
821
834
self._check_add(version_id, lines, random_id, check_content)
822
835
return self._add(version_id, lines, parents, self.delta,
823
parent_texts, None, nostore_sha)
836
parent_texts, None, nostore_sha, random_id)
825
838
def _add_lines(self, version_id, parents, lines, parent_texts,
826
839
left_matching_blocks, nostore_sha, random_id, check_content):
828
841
self._check_add(version_id, lines, random_id, check_content)
829
842
self._check_versions_present(parents)
830
843
return self._add(version_id, lines[:], parents, self.delta,
831
parent_texts, left_matching_blocks, nostore_sha)
844
parent_texts, left_matching_blocks, nostore_sha, random_id)
833
846
def _check_add(self, version_id, lines, random_id, check_content):
834
847
"""check that version_id and lines are safe to add."""
846
859
self._check_lines_are_lines(lines)
848
861
def _add(self, version_id, lines, parents, delta, parent_texts,
849
left_matching_blocks, nostore_sha):
862
left_matching_blocks, nostore_sha, random_id):
850
863
"""Add a set of lines on top of version specified by parents.
852
865
If delta is true, compress the text as a line-delta against
855
868
Any versions not present will be converted into ghosts.
857
# 461 0 6546.0390 43.9100 bzrlib.knit:489(_add)
858
# +400 0 889.4890 418.9790 +bzrlib.knit:192(lower_fulltext)
859
# +461 0 1364.8070 108.8030 +bzrlib.knit:996(add_record)
860
# +461 0 193.3940 41.5720 +bzrlib.knit:898(add_version)
861
# +461 0 134.0590 18.3810 +bzrlib.osutils:361(sha_strings)
862
# +461 0 36.3420 15.4540 +bzrlib.knit:146(make)
863
# +1383 0 8.0370 8.0370 +<len>
864
# +61 0 13.5770 7.9190 +bzrlib.knit:199(lower_line_delta)
865
# +61 0 963.3470 7.8740 +bzrlib.knit:427(_get_content)
866
# +61 0 973.9950 5.2950 +bzrlib.knit:136(line_delta)
867
# +61 0 1918.1800 5.2640 +bzrlib.knit:359(_merge_annotations)
870
# first thing, if the content is something we don't need to store, find
872
line_bytes = ''.join(lines)
873
digest = sha_string(line_bytes)
874
if nostore_sha == digest:
875
raise errors.ExistingContent
869
877
present_parents = []
870
878
if parent_texts is None:
879
887
present_parents[0] != parents[0])):
882
digest = sha_strings(lines)
883
if nostore_sha == digest:
884
raise errors.ExistingContent
885
text_length = sum(map(len, lines))
890
text_length = len(line_bytes)
888
893
if lines[-1][-1] != '\n':
909
914
options.append('line-delta')
910
915
store_lines = self.factory.lower_line_delta(delta_hunks)
916
size, bytes = self._data._record_to_data(version_id, digest,
912
919
options.append('fulltext')
920
# get mixed annotation + content and feed it into the
913
922
store_lines = self.factory.lower_fulltext(content)
923
size, bytes = self._data._record_to_data(version_id, digest,
915
access_memo = self._data.add_record(version_id, digest, store_lines)
916
self._index.add_version(version_id, options, access_memo, parents)
926
access_memo = self._data.add_raw_records([size], bytes)[0]
927
self._index.add_versions(
928
((version_id, options, access_memo, parents),),
917
930
return digest, text_length, content
919
932
def check(self, progress_bar=None):
1369
1382
"""Add a version record to the index."""
1370
1383
self.add_versions(((version_id, options, index_memo, parents),))
1372
def add_versions(self, versions):
1385
def add_versions(self, versions, random_id=False):
1373
1386
"""Add multiple versions to the index.
1375
1388
:param versions: a list of tuples:
1376
1389
(version_id, options, pos, size, parents).
1390
:param random_id: If True the ids being added were randomly generated
1391
and no check for existence will be performed.
1379
1394
orig_history = self._history[:]
1709
1724
"""Add a version record to the index."""
1710
1725
return self.add_versions(((version_id, options, access_memo, parents),))
1712
def add_versions(self, versions):
1727
def add_versions(self, versions, random_id=False):
1713
1728
"""Add multiple versions to the index.
1715
1730
This function does not insert data into the Immutable GraphIndex
1720
1735
:param versions: a list of tuples:
1721
1736
(version_id, options, pos, size, parents).
1737
:param random_id: If True the ids being added were randomly generated
1738
and no check for existence will be performed.
1723
1740
if not self._add_callback:
1724
1741
raise errors.ReadOnlyError(self)
1753
1770
"in parentless index.")
1755
1772
keys[key] = (value, node_refs)
1756
present_nodes = self._get_entries(keys)
1757
for (index, key, value, node_refs) in present_nodes:
1758
if (value, node_refs) != keys[key]:
1759
raise KnitCorrupt(self, "inconsistent details in add_versions"
1760
": %s %s" % ((value, node_refs), keys[key]))
1774
present_nodes = self._get_entries(keys)
1775
for (index, key, value, node_refs) in present_nodes:
1776
if (value, node_refs) != keys[key]:
1777
raise KnitCorrupt(self, "inconsistent details in add_versions"
1778
": %s %s" % ((value, node_refs), keys[key]))
1763
1781
if self._parents:
1764
1782
for key, (value, node_refs) in keys.iteritems():
2000
2018
return self._access.add_raw_records(sizes, raw_data)
2002
def add_record(self, version_id, digest, lines):
2003
"""Write new text record to disk.
2005
Returns index data for retrieving it later, as per add_raw_records.
2007
size, bytes = self._record_to_data(version_id, digest, lines)
2008
result = self.add_raw_records([size], bytes)
2010
self._cache[version_id] = bytes
2013
2020
def _parse_record_header(self, version_id, raw_data):
2014
2021
"""Parse a record header for consistency.
2176
2183
assert isinstance(self.source, KnitVersionedFile)
2177
2184
assert isinstance(self.target, KnitVersionedFile)
2186
# If the source and target are mismatched w.r.t. annotations vs
2187
# plain, the data needs to be converted accordingly
2188
if self.source.factory.annotated == self.target.factory.annotated:
2190
elif self.source.factory.annotated:
2191
converter = self._anno_to_plain_converter
2193
# We're converting from a plain to an annotated knit. This requires
2194
# building the annotations from scratch. The generic join code
2195
# handles this implicitly so we delegate to it.
2196
return super(InterKnit, self).join(pb, msg, version_ids,
2179
2199
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
2181
2200
if not version_ids:
2235
2254
assert version_id == version_id2, 'logic error, inconsistent results'
2236
2255
count = count + 1
2237
2256
pb.update("Joining knit", count, total)
2238
raw_records.append((version_id, options, parents, len(raw_data)))
2258
size, raw_data = converter(raw_data, version_id, options,
2261
size = len(raw_data)
2262
raw_records.append((version_id, options, parents, size))
2239
2263
raw_datum.append(raw_data)
2240
2264
self.target._add_raw_records(raw_records, ''.join(raw_datum))
2269
def _anno_to_plain_converter(self, raw_data, version_id, options,
2271
"""Convert annotated content to plain content."""
2272
data, digest = self.source._data._parse_record(version_id, raw_data)
2273
if 'fulltext' in options:
2274
content = self.source.factory.parse_fulltext(data, version_id)
2275
lines = self.target.factory.lower_fulltext(content)
2277
delta = self.source.factory.parse_line_delta(data, version_id,
2279
lines = self.target.factory.lower_line_delta(delta)
2280
return self.target._data._record_to_data(version_id, digest, lines)
2246
2283
InterVersionedFile.register_optimiser(InterKnit)