69
71
from copy import copy
70
72
from cStringIO import StringIO
73
from bzrlib.lazy_import import lazy_import
74
lazy_import(globals(), """
75
from bzrlib import tsort
77
78
from bzrlib import (
81
from bzrlib.trace import mutter
81
82
from bzrlib.errors import (WeaveError, WeaveFormatError, WeaveParentMismatch,
82
83
RevisionAlreadyPresent,
83
84
RevisionNotPresent,
84
UnavailableRepresentation,
85
WeaveRevisionAlreadyPresent,
86
WeaveRevisionNotPresent,
86
from bzrlib.osutils import dirname, sha, sha_strings, split_lines
88
import bzrlib.errors as errors
89
from bzrlib.osutils import sha_strings
87
90
import bzrlib.patiencediff
88
from bzrlib.revision import NULL_REVISION
89
from bzrlib.symbol_versioning import *
90
from bzrlib.trace import mutter
91
from bzrlib.versionedfile import (
91
from bzrlib.tsort import topo_sort
92
from bzrlib.versionedfile import VersionedFile, InterVersionedFile
98
93
from bzrlib.weavefile import _read_weave_v5, write_weave_v5
101
class WeaveContentFactory(ContentFactory):
102
"""Content factory for streaming from weaves.
104
:seealso ContentFactory:
107
def __init__(self, version, weave):
108
"""Create a WeaveContentFactory for version from weave."""
109
ContentFactory.__init__(self)
110
self.sha1 = weave.get_sha1s([version])[version]
111
self.key = (version,)
112
parents = weave.get_parent_map([version])[version]
113
self.parents = tuple((parent,) for parent in parents)
114
self.storage_kind = 'fulltext'
117
def get_bytes_as(self, storage_kind):
118
if storage_kind == 'fulltext':
119
return self._weave.get_text(self.key[-1])
120
elif storage_kind == 'chunked':
121
return self._weave.get_lines(self.key[-1])
123
raise UnavailableRepresentation(self.key, storage_kind, 'fulltext')
126
96
class Weave(VersionedFile):
127
97
"""weave - versioned text file storage.
129
99
A Weave manages versions of line-based text files, keeping track
130
100
of the originating version for each line.
297
246
__contains__ = has_version
299
def get_record_stream(self, versions, ordering, include_delta_closure):
300
"""Get a stream of records for versions.
302
:param versions: The versions to include. Each version is a tuple
304
:param ordering: Either 'unordered' or 'topological'. A topologically
305
sorted stream has compression parents strictly before their
307
:param include_delta_closure: If True then the closure across any
308
compression parents will be included (in the opaque data).
309
:return: An iterator of ContentFactory objects, each of which is only
310
valid until the iterator is advanced.
312
versions = [version[-1] for version in versions]
313
if ordering == 'topological':
314
parents = self.get_parent_map(versions)
315
new_versions = tsort.topo_sort(parents)
316
new_versions.extend(set(versions).difference(set(parents)))
317
versions = new_versions
318
elif ordering == 'groupcompress':
319
parents = self.get_parent_map(versions)
320
new_versions = sort_groupcompress(parents)
321
new_versions.extend(set(versions).difference(set(parents)))
322
versions = new_versions
323
for version in versions:
325
yield WeaveContentFactory(version, self)
327
yield AbsentContentFactory((version,))
329
def get_parent_map(self, version_ids):
330
"""See VersionedFile.get_parent_map."""
248
def get_delta(self, version_id):
249
"""See VersionedFile.get_delta."""
250
return self.get_deltas([version_id])[version_id]
252
def get_deltas(self, version_ids):
253
"""See VersionedFile.get_deltas."""
254
version_ids = self.get_ancestry(version_ids)
332
255
for version_id in version_ids:
333
if version_id == NULL_REVISION:
338
map(self._idx_to_name,
339
self._parents[self._lookup(version_id)]))
340
except RevisionNotPresent:
256
if not self.has_version(version_id):
257
raise RevisionNotPresent(version_id, self)
258
# try extracting all versions; parallel extraction is used
259
nv = self.num_versions()
265
last_parent_lines = {}
267
parent_inclusions = {}
272
# its simplest to generate a full set of prepared variables.
274
name = self._names[i]
275
sha1s[name] = self.get_sha1(name)
276
parents_list = self.get_parents(name)
278
parent = parents_list[0]
279
parents[name] = parent
280
parent_inclusions[name] = inclusions[parent]
283
parent_inclusions[name] = set()
284
# we want to emit start, finish, replacement_length, replacement_lines tuples.
285
diff_hunks[name] = []
286
current_hunks[name] = [0, 0, 0, []] # #start, finish, repl_length, repl_tuples
287
parent_linenums[name] = 0
289
parent_noeols[name] = False
290
last_parent_lines[name] = None
291
new_inc = set([name])
292
for p in self._parents[i]:
293
new_inc.update(inclusions[self._idx_to_name(p)])
294
# debug only, known good so far.
295
#assert set(new_inc) == set(self.get_ancestry(name)), \
296
# 'failed %s != %s' % (set(new_inc), set(self.get_ancestry(name)))
297
inclusions[name] = new_inc
299
nlines = len(self._weave)
301
for lineno, inserted, deletes, line in self._walk_internal():
302
# a line is active in a version if:
303
# insert is in the versions inclusions
305
# deleteset & the versions inclusions is an empty set.
306
# so - if we have a included by mapping - version is included by
307
# children, we get a list of children to examine for deletes affect
308
# ing them, which is less than the entire set of children.
309
for version_id in version_ids:
310
# The active inclusion must be an ancestor,
311
# and no ancestors must have deleted this line,
312
# because we don't support resurrection.
313
parent_inclusion = parent_inclusions[version_id]
314
inclusion = inclusions[version_id]
315
parent_active = inserted in parent_inclusion and not (deletes & parent_inclusion)
316
version_active = inserted in inclusion and not (deletes & inclusion)
317
if not parent_active and not version_active:
318
# unrelated line of ancestry
342
result[version_id] = parents
320
elif parent_active and version_active:
322
parent_linenum = parent_linenums[version_id]
323
if current_hunks[version_id] != [parent_linenum, parent_linenum, 0, []]:
324
diff_hunks[version_id].append(tuple(current_hunks[version_id]))
326
current_hunks[version_id] = [parent_linenum, parent_linenum, 0, []]
327
parent_linenums[version_id] = parent_linenum
330
noeols[version_id] = True
333
elif parent_active and not version_active:
335
current_hunks[version_id][1] += 1
336
parent_linenums[version_id] += 1
337
last_parent_lines[version_id] = line
338
elif not parent_active and version_active:
340
# noeol only occurs at the end of a file because we
341
# diff linewise. We want to show noeol changes as a
342
# empty diff unless the actual eol-less content changed.
345
if last_parent_lines[version_id][-1] != '\n':
346
parent_noeols[version_id] = True
347
except (TypeError, IndexError):
350
if theline[-1] != '\n':
351
noeols[version_id] = True
355
parent_should_go = False
357
if parent_noeols[version_id] == noeols[version_id]:
358
# no noeol toggle, so trust the weaves statement
359
# that this line is changed.
361
if parent_noeols[version_id]:
362
theline = theline + '\n'
363
elif parent_noeols[version_id]:
364
# parent has no eol, we do:
365
# our line is new, report as such..
367
elif noeols[version_id]:
368
# append a eol so that it looks like
370
theline = theline + '\n'
371
if parents[version_id] is not None:
372
#if last_parent_lines[version_id] is not None:
373
parent_should_go = True
374
if last_parent_lines[version_id] != theline:
377
#parent_should_go = False
379
current_hunks[version_id][2] += 1
380
current_hunks[version_id][3].append((inserted, theline))
382
# last hunk last parent line is not eaten
383
current_hunks[version_id][1] -= 1
384
if current_hunks[version_id][1] < 0:
385
current_hunks[version_id][1] = 0
386
# import pdb;pdb.set_trace()
387
# assert current_hunks[version_id][1] >= 0
391
version = self._idx_to_name(i)
392
if current_hunks[version] != [0, 0, 0, []]:
393
diff_hunks[version].append(tuple(current_hunks[version]))
395
for version_id in version_ids:
396
result[version_id] = (
400
diff_hunks[version_id],
345
def get_parents_with_ghosts(self, version_id):
346
raise NotImplementedError(self.get_parents_with_ghosts)
348
def insert_record_stream(self, stream):
349
"""Insert a record stream into this versioned file.
351
:param stream: A stream of records to insert.
353
:seealso VersionedFile.get_record_stream:
356
for record in stream:
357
# Raise an error when a record is missing.
358
if record.storage_kind == 'absent':
359
raise RevisionNotPresent([record.key[0]], self)
360
# adapt to non-tuple interface
361
parents = [parent[0] for parent in record.parents]
362
if (record.storage_kind == 'fulltext'
363
or record.storage_kind == 'chunked'):
364
self.add_lines(record.key[0], parents,
365
osutils.chunks_to_lines(record.get_bytes_as('chunked')))
367
adapter_key = record.storage_kind, 'fulltext'
369
adapter = adapters[adapter_key]
371
adapter_factory = adapter_registry.get(adapter_key)
372
adapter = adapter_factory(self)
373
adapters[adapter_key] = adapter
374
lines = split_lines(adapter.get_bytes(record))
376
self.add_lines(record.key[0], parents, lines)
377
except RevisionAlreadyPresent:
404
def get_parents(self, version_id):
405
"""See VersionedFile.get_parent."""
406
return map(self._idx_to_name, self._parents[self._lookup(version_id)])
380
408
def _check_repeated_add(self, name, parents, text, sha1):
381
409
"""Check that a duplicated add is OK.
391
419
def _add_lines(self, version_id, parents, lines, parent_texts,
392
left_matching_blocks, nostore_sha, random_id, check_content):
420
left_matching_blocks=None):
393
421
"""See VersionedFile.add_lines."""
394
idx = self._add(version_id, lines, map(self._lookup, parents),
395
nostore_sha=nostore_sha)
422
idx = self._add(version_id, lines, map(self._lookup, parents))
396
423
return sha_strings(lines), sum(map(len, lines)), idx
398
def _add(self, version_id, lines, parents, sha1=None, nostore_sha=None):
425
def _add(self, version_id, lines, parents, sha1=None):
399
426
"""Add a single text on top of the weave.
401
428
Returns the index number of the newly added version.
404
431
Symbolic name for this version.
405
432
(Typically the revision-id of the revision that added it.)
406
If None, a name will be allocated based on the hash. (sha1:SHAHASH)
409
435
List or set of direct parent version numbers.
412
438
Sequence of lines to be added in the new version.
414
:param nostore_sha: See VersionedFile.add_lines.
441
assert isinstance(version_id, basestring)
416
442
self._check_lines_not_unicode(lines)
417
443
self._check_lines_are_lines(lines)
419
445
sha1 = sha_strings(lines)
420
if sha1 == nostore_sha:
421
raise errors.ExistingContent
422
if version_id is None:
423
version_id = "sha1:" + sha1
424
446
if version_id in self._name_map:
425
447
return self._check_repeated_add(version_id, parents, lines, sha1)
848
892
# no lines outside of insertion blocks, that deletions are
849
893
# properly paired, etc.
895
def _join(self, other, pb, msg, version_ids, ignore_missing):
896
"""Worker routine for join()."""
897
if not other.versions():
898
return # nothing to update, easy
901
# versions is never none, InterWeave checks this.
904
# two loops so that we do not change ourselves before verifying it
906
# work through in index order to make sure we get all dependencies
909
# get the selected versions only that are in other.versions.
910
version_ids = set(other.versions()).intersection(set(version_ids))
911
# pull in the referenced graph.
912
version_ids = other.get_ancestry(version_ids)
913
pending_graph = [(version, other.get_parents(version)) for
914
version in version_ids]
915
for name in topo_sort(pending_graph):
916
other_idx = other._name_map[name]
917
# returns True if we have it, False if we need it.
918
if not self._check_version_consistent(other, other_idx, name):
919
names_to_join.append((other_idx, name))
928
for other_idx, name in names_to_join:
929
# TODO: If all the parents of the other version are already
930
# present then we can avoid some work by just taking the delta
931
# and adjusting the offsets.
932
new_parents = self._imported_parents(other, other_idx)
933
sha1 = other._sha1s[other_idx]
938
pb.update(msg, merged, len(names_to_join))
940
lines = other.get_lines(other_idx)
941
self._add(name, lines, new_parents, sha1)
943
mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
944
merged, processed, self._weave_name, time.time()-time0))
851
946
def _imported_parents(self, other, other_idx):
852
947
"""Return list of parents in self corresponding to indexes in other."""
946
1044
transport.put_file(name + WeaveFile.WEAVE_SUFFIX, sio, self._filemode)
1046
def create_empty(self, name, transport, filemode=None):
1047
return WeaveFile(name, transport, filemode, create=True)
948
1049
def _save(self):
949
1050
"""Save the weave."""
950
1051
self._check_write_ok()
951
1052
sio = StringIO()
952
1053
write_weave_v5(self, sio)
954
bytes = sio.getvalue()
955
path = self._weave_name + WeaveFile.WEAVE_SUFFIX
957
self._transport.put_bytes(path, bytes, self._filemode)
958
except errors.NoSuchFile:
959
self._transport.mkdir(dirname(path))
960
self._transport.put_bytes(path, bytes, self._filemode)
1055
self._transport.put_file(self._weave_name + WeaveFile.WEAVE_SUFFIX,
963
1060
def get_suffixes():
964
1061
"""See VersionedFile.get_suffixes()."""
965
1062
return [WeaveFile.WEAVE_SUFFIX]
967
def insert_record_stream(self, stream):
968
super(WeaveFile, self).insert_record_stream(stream)
1064
def join(self, other, pb=None, msg=None, version_ids=None,
1065
ignore_missing=False):
1066
"""Join other into self and save."""
1067
super(WeaveFile, self).join(other, pb, msg, version_ids, ignore_missing)
972
1071
def _reweave(wa, wb, pb=None, msg=None):
973
1072
"""Combine two weaves and return the result.
975
This works even if a revision R has different parents in
1074
This works even if a revision R has different parents in
976
1075
wa and wb. In the resulting weave all the parents are given.
978
This is done by just building up a new weave, maintaining ordering
1077
This is done by just building up a new weave, maintaining ordering
979
1078
of the versions in the two inputs. More efficient approaches
980
might be possible but it should only be necessary to do
981
this operation rarely, when a new previously ghost version is
1079
might be possible but it should only be necessary to do
1080
this operation rarely, when a new previously ghost version is
984
1083
:param pb: An optional progress bar, indicating how far done we are
1029
1127
p = combined.setdefault(name, set())
1030
1128
p.update(map(weave._idx_to_name, weave._parents[idx]))
1031
1129
return combined
1133
"""Show the weave's table-of-contents"""
1134
print '%6s %50s %10s %10s' % ('ver', 'name', 'sha1', 'parents')
1135
for i in (6, 50, 10, 10):
1138
for i in range(w.num_versions()):
1141
parent_str = ' '.join(map(str, w._parents[i]))
1142
print '%6d %-50.50s %10.10s %s' % (i, name, sha1, parent_str)
1146
def weave_stats(weave_file, pb):
1147
from bzrlib.weavefile import read_weave
1149
wf = file(weave_file, 'rb')
1151
# FIXME: doesn't work on pipes
1152
weave_size = wf.tell()
1156
for i in range(vers):
1157
pb.update('checking sizes', i, vers)
1158
for origin, lineno, line in w._extract([i]):
1163
print 'versions %9d' % vers
1164
print 'weave file %9d bytes' % weave_size
1165
print 'total contents %9d bytes' % total
1166
print 'compression ratio %9.2fx' % (float(total) / float(weave_size))
1169
print 'average size %9d bytes' % avg
1170
print 'relative size %9.2fx' % (float(weave_size) / float(avg))
1174
print """bzr weave tool
1176
Experimental tool for weave algorithm.
1179
weave init WEAVEFILE
1180
Create an empty weave file
1181
weave get WEAVEFILE VERSION
1182
Write out specified version.
1183
weave check WEAVEFILE
1184
Check consistency of all versions.
1186
Display table of contents.
1187
weave add WEAVEFILE NAME [BASE...] < NEWTEXT
1188
Add NEWTEXT, with specified parent versions.
1189
weave annotate WEAVEFILE VERSION
1190
Display origin of each line.
1191
weave merge WEAVEFILE VERSION1 VERSION2 > OUT
1192
Auto-merge two versions and display conflicts.
1193
weave diff WEAVEFILE VERSION1 VERSION2
1194
Show differences between two versions.
1198
% weave init foo.weave
1200
% weave add foo.weave ver0 < foo.txt
1203
(create updated version)
1205
% weave get foo.weave 0 | diff -u - foo.txt
1206
% weave add foo.weave ver1 0 < foo.txt
1209
% weave get foo.weave 0 > foo.txt (create forked version)
1211
% weave add foo.weave ver2 0 < foo.txt
1214
% weave merge foo.weave 1 2 > foo.txt (merge them)
1215
% vi foo.txt (resolve conflicts)
1216
% weave add foo.weave merged 1 2 < foo.txt (commit merged version)
1228
# in case we're run directly from the subdirectory
1229
sys.path.append('..')
1231
from bzrlib.weavefile import write_weave, read_weave
1232
from bzrlib.progress import ProgressBar
1247
return read_weave(file(argv[2], 'rb'))
1253
# at the moment, based on everything in the file
1255
parents = map(int, argv[4:])
1256
lines = sys.stdin.readlines()
1257
ver = w.add(name, parents, lines)
1258
write_weave(w, file(argv[2], 'wb'))
1259
print 'added version %r %d' % (name, ver)
1262
if os.path.exists(fn):
1263
raise IOError("file exists")
1265
write_weave(w, file(fn, 'wb'))
1266
elif cmd == 'get': # get one version
1268
sys.stdout.writelines(w.get_iter(int(argv[3])))
1273
v1, v2 = map(int, argv[3:5])
1276
diff_gen = bzrlib.patiencediff.unified_diff(lines1, lines2,
1277
'%s version %d' % (fn, v1),
1278
'%s version %d' % (fn, v2))
1279
sys.stdout.writelines(diff_gen)
1281
elif cmd == 'annotate':
1283
# newline is added to all lines regardless; too hard to get
1284
# reasonable formatting otherwise
1286
for origin, text in w.annotate(int(argv[3])):
1287
text = text.rstrip('\r\n')
1289
print ' | %s' % (text)
1291
print '%5d | %s' % (origin, text)
1297
elif cmd == 'stats':
1298
weave_stats(argv[2], ProgressBar())
1300
elif cmd == 'check':
1305
print '%d versions ok' % w.num_versions()
1307
elif cmd == 'inclusions':
1309
print ' '.join(map(str, w.inclusions([int(argv[3])])))
1311
elif cmd == 'parents':
1313
print ' '.join(map(str, w._parents[int(argv[3])]))
1315
elif cmd == 'plan-merge':
1316
# replaced by 'bzr weave-plan-merge'
1318
for state, line in w.plan_merge(int(argv[3]), int(argv[4])):
1320
print '%14s | %s' % (state, line),
1321
elif cmd == 'merge':
1322
# replaced by 'bzr weave-merge-text'
1324
p = w.plan_merge(int(argv[3]), int(argv[4]))
1325
sys.stdout.writelines(w.weave_merge(p))
1327
raise ValueError('unknown command %r' % cmd)
1330
if __name__ == '__main__':
1332
sys.exit(main(sys.argv))
1335
class InterWeave(InterVersionedFile):
1336
"""Optimised code paths for weave to weave operations."""
1338
_matching_file_from_factory = staticmethod(WeaveFile)
1339
_matching_file_to_factory = staticmethod(WeaveFile)
1342
def is_compatible(source, target):
1343
"""Be compatible with weaves."""
1345
return (isinstance(source, Weave) and
1346
isinstance(target, Weave))
1347
except AttributeError:
1350
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1351
"""See InterVersionedFile.join."""
1352
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1353
if self.target.versions() == [] and version_ids is None:
1354
self.target._copy_weave_content(self.source)
1357
self.target._join(self.source, pb, msg, version_ids, ignore_missing)
1358
except errors.WeaveParentMismatch:
1359
self.target._reweave(self.source, pb, msg)
1362
InterVersionedFile.register_optimiser(InterWeave)