285
286
self.delta = delta
287
288
self._index = _KnitIndex(transport, relpath + INDEX_SUFFIX,
288
access_mode, create=create)
289
access_mode, create=create, file_mode=file_mode)
289
290
self._data = _KnitData(transport, relpath + DATA_SUFFIX,
290
access_mode, create=not len(self.versions()))
291
access_mode, create=create and not len(self), file_mode=file_mode)
294
return '%s(%s)' % (self.__class__.__name__,
295
self.transport.abspath(self.filename))
292
297
def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):
293
298
"""See VersionedFile._add_delta()."""
294
299
self._check_add(version_id, []) # should we check the lines ?
351
356
where, size = self._data.add_record(version_id, digest, store_lines)
352
357
self._index.add_version(version_id, options, where, size, parents)
359
def _add_raw_records(self, records, data):
360
"""Add all the records 'records' with data pre-joined in 'data'.
362
:param records: A list of tuples(version_id, options, parents, size).
363
:param data: The data for the records. When it is written, the records
364
are adjusted to have pos pointing into data by the sum of
365
the preceeding records sizes.
368
pos = self._data.add_raw_record(data)
370
for (version_id, options, parents, size) in records:
371
index_entries.append((version_id, options, pos, size, parents))
373
self._index.add_versions(index_entries)
354
375
def clear_cache(self):
355
376
"""Clear the data cache only."""
356
377
self._data.clear_cache()
606
633
assert self.writable, "knit is not opened for write"
607
634
### FIXME escape. RBC 20060228
608
635
if contains_whitespace(version_id):
609
raise InvalidRevisionId(version_id)
636
raise InvalidRevisionId(version_id, self.filename)
610
637
if self.has_version(version_id):
611
638
raise RevisionAlreadyPresent(version_id, self.filename)
613
if False or __debug__:
615
assert '\n' not in l[:-1]
639
self._check_lines_not_unicode(lines)
640
self._check_lines_are_lines(lines)
617
642
def _add(self, version_id, lines, parents, delta, parent_texts):
618
643
"""Add a set of lines on top of version specified by parents.
816
841
for lineno, insert_id, dset, line in w.walk(version_ids):
817
842
yield lineno, insert_id, dset, line
844
def plan_merge(self, ver_a, ver_b):
845
"""See VersionedFile.plan_merge."""
846
ancestors_b = set(self.get_ancestry(ver_b))
847
def status_a(revision, text):
848
if revision in ancestors_b:
849
return 'killed-b', text
853
ancestors_a = set(self.get_ancestry(ver_a))
854
def status_b(revision, text):
855
if revision in ancestors_a:
856
return 'killed-a', text
860
annotated_a = self.annotate(ver_a)
861
annotated_b = self.annotate(ver_b)
862
plain_a = [t for (a, t) in annotated_a]
863
plain_b = [t for (a, t) in annotated_b]
864
blocks = SequenceMatcher(None, plain_a, plain_b).get_matching_blocks()
867
for ai, bi, l in blocks:
868
# process all mismatched sections
869
# (last mismatched section is handled because blocks always
870
# includes a 0-length last block)
871
for revision, text in annotated_a[a_cur:ai]:
872
yield status_a(revision, text)
873
for revision, text in annotated_b[b_cur:bi]:
874
yield status_b(revision, text)
876
# and now the matched section
879
for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):
880
assert text_a == text_b
881
yield "unchanged", text_a
820
884
class _KnitComponentFile(object):
821
885
"""One of the files used to implement a knit database"""
823
def __init__(self, transport, filename, mode):
887
def __init__(self, transport, filename, mode, file_mode=None):
824
888
self._transport = transport
825
889
self._filename = filename
826
890
self._mode = mode
891
self._file_mode=file_mode
828
893
def write_header(self):
829
if self._transport.append(self._filename, StringIO(self.HEADER)):
894
if self._transport.append(self._filename, StringIO(self.HEADER),
895
mode=self._file_mode):
830
896
raise KnitCorrupt(self._filename, 'misaligned after writing header')
832
898
def check_header(self, fp):
833
line = fp.read(len(self.HEADER))
834
900
if line != self.HEADER:
835
901
raise KnitHeaderError(badline=line)
863
929
this allows updates to correct version and parent information.
864
930
Note that the two entries may share the delta, and that successive
865
931
annotations and references MUST point to the first entry.
933
The index file on disc contains a header, followed by one line per knit
934
record. The same revision can be present in an index file more than once.
935
The first occurence gets assigned a sequence number starting from 0.
937
The format of a single line is
938
REVISION_ID FLAGS BYTE_OFFSET LENGTH( PARENT_ID|PARENT_SEQUENCE_ID)* :\n
939
REVISION_ID is a utf8-encoded revision id
940
FLAGS is a comma separated list of flags about the record. Values include
941
no-eol, line-delta, fulltext.
942
BYTE_OFFSET is the ascii representation of the byte offset in the data file
943
that the the compressed data starts at.
944
LENGTH is the ascii representation of the length of the data file.
945
PARENT_ID a utf-8 revision id prefixed by a '.' that is a parent of
947
PARENT_SEQUENCE_ID the ascii representation of the sequence number of a
948
revision id already in the knit that is a parent of REVISION_ID.
949
The ' :' marker is the end of record marker.
952
when a write is interrupted to the index file, it will result in a line that
953
does not end in ' :'. If the ' :' is not present at the end of a line, or at
954
the end of the file, then the record that is missing it will be ignored by
957
When writing new records to the index file, the data is preceeded by '\n'
958
to ensure that records always start on new lines even if the last write was
959
interrupted. As a result its normal for the last line in the index to be
960
missing a trailing newline. One can be added with no harmful effects.
868
HEADER = "# bzr knit index 7\n"
963
HEADER = "# bzr knit index 8\n"
870
965
# speed of knit parsing went from 280 ms to 280 ms with slots addition.
871
966
# __slots__ = ['_cache', '_history', '_transport', '_filename']
894
def __init__(self, transport, filename, mode, create=False):
895
_KnitComponentFile.__init__(self, transport, filename, mode)
989
def __init__(self, transport, filename, mode, create=False, file_mode=None):
990
_KnitComponentFile.__init__(self, transport, filename, mode, file_mode)
897
992
# position in _history is the 'official' index for a revision
898
993
# but the values may have come from a newer entry.
1060
1161
def add_version(self, version_id, options, pos, size, parents):
1061
1162
"""Add a version record to the index."""
1062
self._cache_version(version_id, options, pos, size, parents)
1064
content = "%s %s %s %s %s\n" % (version_id.encode('utf-8'),
1068
self._version_list_to_index(parents))
1069
assert isinstance(content, str), 'content must be utf-8 encoded'
1070
self._transport.append(self._filename, StringIO(content))
1163
self.add_versions(((version_id, options, pos, size, parents),))
1165
def add_versions(self, versions):
1166
"""Add multiple versions to the index.
1168
:param versions: a list of tuples:
1169
(version_id, options, pos, size, parents).
1172
for version_id, options, pos, size, parents in versions:
1173
line = "\n%s %s %s %s %s :" % (version_id.encode('utf-8'),
1177
self._version_list_to_index(parents))
1178
assert isinstance(line, str), \
1179
'content must be utf-8 encoded: %r' % (line,)
1181
self._transport.append(self._filename, StringIO(''.join(lines)))
1182
# cache after writing, so that a failed write leads to missing cache
1183
# entries not extra ones. XXX TODO: RBC 20060502 in the event of a
1184
# failure, reload the index or flush it or some such, to prevent
1185
# writing records that did complete twice.
1186
for version_id, options, pos, size, parents in versions:
1187
self._cache_version(version_id, options, pos, size, parents)
1072
1189
def has_version(self, version_id):
1073
1190
"""True if the version is in the index."""
1074
1191
return self._cache.has_key(version_id)
1112
1229
class _KnitData(_KnitComponentFile):
1113
1230
"""Contents of the knit data file"""
1115
HEADER = "# bzr knit data 7\n"
1232
HEADER = "# bzr knit data 8\n"
1117
def __init__(self, transport, filename, mode, create=False):
1234
def __init__(self, transport, filename, mode, create=False, file_mode=None):
1118
1235
_KnitComponentFile.__init__(self, transport, filename, mode)
1119
1236
self._file = None
1120
1237
self._checked = False
1122
self._transport.put(self._filename, StringIO(''))
1239
self._transport.put(self._filename, StringIO(''), mode=file_mode)
1123
1240
self._records = {}
1125
1242
def clear_cache(self):
1154
1271
return length, sio
1156
1273
def add_raw_record(self, raw_data):
1157
"""Append a prepared record to the data file."""
1274
"""Append a prepared record to the data file.
1276
:return: the offset in the data file raw_data was written.
1158
1278
assert isinstance(raw_data, str), 'data must be plain bytes'
1159
start_pos = self._transport.append(self._filename, StringIO(raw_data))
1160
return start_pos, len(raw_data)
1279
return self._transport.append(self._filename, StringIO(raw_data))
1162
1281
def add_record(self, version_id, digest, lines):
1163
1282
"""Write new text record to disk. Returns the position in the
1368
1481
# data suck the join:
1370
1483
total = len(version_list)
1371
# we want the raw gzip for bulk copying, but the record validated
1372
# just enough to be sure its the right one.
1373
# TODO: consider writev or write combining to reduce
1374
# death of a thousand cuts feeling.
1375
1486
for (version_id, raw_data), \
1376
1487
(version_id2, options, parents) in \
1377
1488
izip(self.source._data.read_records_iter_raw(copy_queue_records),
1379
1490
assert version_id == version_id2, 'logic error, inconsistent results'
1380
1491
count = count + 1
1381
1492
pb.update("Joining knit", count, total)
1382
pos, size = self.target._data.add_raw_record(raw_data)
1383
self.target._index.add_version(version_id, options, pos, size, parents)
1493
raw_records.append((version_id, options, parents, len(raw_data)))
1494
raw_datum.append(raw_data)
1495
self.target._add_raw_records(raw_records, ''.join(raw_datum))
1385
1497
for version in mismatched_versions:
1386
1498
# FIXME RBC 20060309 is this needed?
1398
1510
InterVersionedFile.register_optimiser(InterKnit)
1513
class WeaveToKnit(InterVersionedFile):
1514
"""Optimised code paths for weave to knit operations."""
1516
_matching_file_from_factory = bzrlib.weave.WeaveFile
1517
_matching_file_to_factory = KnitVersionedFile
1520
def is_compatible(source, target):
1521
"""Be compatible with weaves to knits."""
1523
return (isinstance(source, bzrlib.weave.Weave) and
1524
isinstance(target, KnitVersionedFile))
1525
except AttributeError:
1528
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1529
"""See InterVersionedFile.join."""
1530
assert isinstance(self.source, bzrlib.weave.Weave)
1531
assert isinstance(self.target, KnitVersionedFile)
1533
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1538
pb = bzrlib.ui.ui_factory.nested_progress_bar()
1540
version_ids = list(version_ids)
1542
self.source_ancestry = set(self.source.get_ancestry(version_ids))
1543
this_versions = set(self.target._index.get_versions())
1544
needed_versions = self.source_ancestry - this_versions
1545
cross_check_versions = self.source_ancestry.intersection(this_versions)
1546
mismatched_versions = set()
1547
for version in cross_check_versions:
1548
# scan to include needed parents.
1549
n1 = set(self.target.get_parents_with_ghosts(version))
1550
n2 = set(self.source.get_parents(version))
1551
# if all of n2's parents are in n1, then its fine.
1552
if n2.difference(n1):
1553
# FIXME TEST this check for cycles being introduced works
1554
# the logic is we have a cycle if in our graph we are an
1555
# ancestor of any of the n2 revisions.
1561
parent_ancestors = self.source.get_ancestry(parent)
1562
if version in parent_ancestors:
1563
raise errors.GraphCycleError([parent, version])
1564
# ensure this parent will be available later.
1565
new_parents = n2.difference(n1)
1566
needed_versions.update(new_parents.difference(this_versions))
1567
mismatched_versions.add(version)
1569
if not needed_versions and not mismatched_versions:
1571
full_list = topo_sort(self.source.get_graph())
1573
version_list = [i for i in full_list if (not self.target.has_version(i)
1574
and i in needed_versions)]
1578
total = len(version_list)
1579
for version_id in version_list:
1580
pb.update("Converting to knit", count, total)
1581
parents = self.source.get_parents(version_id)
1582
# check that its will be a consistent copy:
1583
for parent in parents:
1584
# if source has the parent, we must already have it
1585
assert (self.target.has_version(parent))
1586
self.target.add_lines(
1587
version_id, parents, self.source.get_lines(version_id))
1590
for version in mismatched_versions:
1591
# FIXME RBC 20060309 is this needed?
1592
n1 = set(self.target.get_parents_with_ghosts(version))
1593
n2 = set(self.source.get_parents(version))
1594
# write a combined record to our history preserving the current
1595
# parents as first in the list
1596
new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))
1597
self.target.fix_parents(version, new_parents)
1603
InterVersionedFile.register_optimiser(WeaveToKnit)
1401
1606
class SequenceMatcher(difflib.SequenceMatcher):
1402
1607
"""Knit tuned sequence matcher.