1
# Copyright (C) 2005, 2006 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
"""Versioned text file storage api."""
22
from bzrlib.lazy_import import lazy_import
23
lazy_import(globals(), """
24
from copy import deepcopy
34
from bzrlib.transport.memory import MemoryTransport
37
from bzrlib.inter import InterObject
38
from bzrlib.textmerge import TextMerge
39
from bzrlib.symbol_versioning import (deprecated_function,
45
class VersionedFile(object):
46
"""Versioned text file storage.
48
A versioned file manages versions of line-based text files,
49
keeping track of the originating version for each line.
51
To clients the "lines" of the file are represented as a list of
52
strings. These strings will typically have terminal newline
53
characters, but this is not required. In particular files commonly
54
do not have a newline at the end of the file.
56
Texts are identified by a version-id string.
59
def __init__(self, access_mode):
61
self._access_mode = access_mode
64
def check_not_reserved_id(version_id):
65
revision.check_not_reserved_id(version_id)
67
def copy_to(self, name, transport):
68
"""Copy this versioned file to name on transport."""
69
raise NotImplementedError(self.copy_to)
71
@deprecated_method(zero_eight)
73
"""Return a list of all the versions in this versioned file.
75
Please use versionedfile.versions() now.
77
return self.versions()
80
"""Return a unsorted list of versions."""
81
raise NotImplementedError(self.versions)
83
def has_ghost(self, version_id):
84
"""Returns whether version is present as a ghost."""
85
raise NotImplementedError(self.has_ghost)
87
def has_version(self, version_id):
88
"""Returns whether version is present."""
89
raise NotImplementedError(self.has_version)
91
def add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):
92
"""Add a text to the versioned file via a pregenerated delta.
94
:param version_id: The version id being added.
95
:param parents: The parents of the version_id.
96
:param delta_parent: The parent this delta was created against.
97
:param sha1: The sha1 of the full text.
98
:param delta: The delta instructions. See get_delta for details.
100
version_id = osutils.safe_revision_id(version_id)
101
parents = [osutils.safe_revision_id(v) for v in parents]
102
self._check_write_ok()
103
if self.has_version(version_id):
104
raise errors.RevisionAlreadyPresent(version_id, self)
105
return self._add_delta(version_id, parents, delta_parent, sha1, noeol, delta)
107
def _add_delta(self, version_id, parents, delta_parent, sha1, noeol, delta):
108
"""Class specific routine to add a delta.
110
This generic version simply applies the delta to the delta_parent and
113
# strip annotation from delta
115
for start, stop, delta_len, delta_lines in delta:
116
new_delta.append((start, stop, delta_len, [text for origin, text in delta_lines]))
117
if delta_parent is not None:
118
parent_full = self.get_lines(delta_parent)
121
new_full = self._apply_delta(parent_full, new_delta)
122
# its impossible to have noeol on an empty file
123
if noeol and new_full[-1][-1] == '\n':
124
new_full[-1] = new_full[-1][:-1]
125
self.add_lines(version_id, parents, new_full)
127
def add_lines(self, version_id, parents, lines, parent_texts=None):
128
"""Add a single text on top of the versioned file.
130
Must raise RevisionAlreadyPresent if the new version is
131
already present in file history.
133
Must raise RevisionNotPresent if any of the given parents are
134
not present in file history.
135
:param parent_texts: An optional dictionary containing the opaque
136
representations of some or all of the parents of
137
version_id to allow delta optimisations.
138
VERY IMPORTANT: the texts must be those returned
139
by add_lines or data corruption can be caused.
140
:return: An opaque representation of the inserted version which can be
141
provided back to future add_lines calls in the parent_texts
144
version_id = osutils.safe_revision_id(version_id)
145
parents = [osutils.safe_revision_id(v) for v in parents]
146
self._check_write_ok()
147
return self._add_lines(version_id, parents, lines, parent_texts)
149
def _add_lines(self, version_id, parents, lines, parent_texts):
150
"""Helper to do the class specific add_lines."""
151
raise NotImplementedError(self.add_lines)
153
def add_lines_with_ghosts(self, version_id, parents, lines,
155
"""Add lines to the versioned file, allowing ghosts to be present.
157
This takes the same parameters as add_lines.
159
version_id = osutils.safe_revision_id(version_id)
160
parents = [osutils.safe_revision_id(v) for v in parents]
161
self._check_write_ok()
162
return self._add_lines_with_ghosts(version_id, parents, lines,
165
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts):
166
"""Helper to do class specific add_lines_with_ghosts."""
167
raise NotImplementedError(self.add_lines_with_ghosts)
169
def check(self, progress_bar=None):
170
"""Check the versioned file for integrity."""
171
raise NotImplementedError(self.check)
173
def _check_lines_not_unicode(self, lines):
174
"""Check that lines being added to a versioned file are not unicode."""
176
if line.__class__ is not str:
177
raise errors.BzrBadParameterUnicode("lines")
179
def _check_lines_are_lines(self, lines):
180
"""Check that the lines really are full lines without inline EOL."""
182
if '\n' in line[:-1]:
183
raise errors.BzrBadParameterContainsNewline("lines")
185
def _check_write_ok(self):
186
"""Is the versioned file marked as 'finished' ? Raise if it is."""
188
raise errors.OutSideTransaction()
189
if self._access_mode != 'w':
190
raise errors.ReadOnlyObjectDirtiedError(self)
192
def enable_cache(self):
193
"""Tell this versioned file that it should cache any data it reads.
195
This is advisory, implementations do not have to support caching.
199
def clear_cache(self):
200
"""Remove any data cached in the versioned file object.
202
This only needs to be supported if caches are supported
206
def clone_text(self, new_version_id, old_version_id, parents):
207
"""Add an identical text to old_version_id as new_version_id.
209
Must raise RevisionNotPresent if the old version or any of the
210
parents are not present in file history.
212
Must raise RevisionAlreadyPresent if the new version is
213
already present in file history."""
214
new_version_id = osutils.safe_revision_id(new_version_id)
215
old_version_id = osutils.safe_revision_id(old_version_id)
216
self._check_write_ok()
217
return self._clone_text(new_version_id, old_version_id, parents)
219
def _clone_text(self, new_version_id, old_version_id, parents):
220
"""Helper function to do the _clone_text work."""
221
raise NotImplementedError(self.clone_text)
223
def create_empty(self, name, transport, mode=None):
224
"""Create a new versioned file of this exact type.
226
:param name: the file name
227
:param transport: the transport
228
:param mode: optional file mode.
230
raise NotImplementedError(self.create_empty)
232
def fix_parents(self, version_id, new_parents):
233
"""Fix the parents list for version.
235
This is done by appending a new version to the index
236
with identical data except for the parents list.
237
the parents list must be a superset of the current
240
version_id = osutils.safe_revision_id(version_id)
241
new_parents = [osutils.safe_revision_id(p) for p in new_parents]
242
self._check_write_ok()
243
return self._fix_parents(version_id, new_parents)
245
def _fix_parents(self, version_id, new_parents):
246
"""Helper for fix_parents."""
247
raise NotImplementedError(self.fix_parents)
249
def get_delta(self, version):
250
"""Get a delta for constructing version from some other version.
252
:return: (delta_parent, sha1, noeol, delta)
253
Where delta_parent is a version id or None to indicate no parent.
255
raise NotImplementedError(self.get_delta)
257
def get_deltas(self, version_ids):
258
"""Get multiple deltas at once for constructing versions.
260
:return: dict(version_id:(delta_parent, sha1, noeol, delta))
261
Where delta_parent is a version id or None to indicate no parent, and
262
version_id is the version_id created by that delta.
265
for version_id in version_ids:
266
result[version_id] = self.get_delta(version_id)
269
def get_sha1(self, version_id):
270
"""Get the stored sha1 sum for the given revision.
272
:param name: The name of the version to lookup
274
raise NotImplementedError(self.get_sha1)
276
def get_suffixes(self):
277
"""Return the file suffixes associated with this versioned file."""
278
raise NotImplementedError(self.get_suffixes)
280
def get_text(self, version_id):
281
"""Return version contents as a text string.
283
Raises RevisionNotPresent if version is not present in
286
return ''.join(self.get_lines(version_id))
287
get_string = get_text
289
def get_texts(self, version_ids):
290
"""Return the texts of listed versions as a list of strings.
292
Raises RevisionNotPresent if version is not present in
295
return [''.join(self.get_lines(v)) for v in version_ids]
297
def get_lines(self, version_id):
298
"""Return version contents as a sequence of lines.
300
Raises RevisionNotPresent if version is not present in
303
raise NotImplementedError(self.get_lines)
305
def get_ancestry(self, version_ids, topo_sorted=True):
306
"""Return a list of all ancestors of given version(s). This
307
will not include the null revision.
309
This list will not be topologically sorted if sorted=False is passed.
311
Must raise RevisionNotPresent if any of the given versions are
312
not present in file history."""
313
if isinstance(version_ids, basestring):
314
version_ids = [version_ids]
315
raise NotImplementedError(self.get_ancestry)
317
def get_ancestry_with_ghosts(self, version_ids):
318
"""Return a list of all ancestors of given version(s). This
319
will not include the null revision.
321
Must raise RevisionNotPresent if any of the given versions are
322
not present in file history.
324
Ghosts that are known about will be included in ancestry list,
325
but are not explicitly marked.
327
raise NotImplementedError(self.get_ancestry_with_ghosts)
329
def get_graph(self, version_ids=None):
330
"""Return a graph from the versioned file.
332
Ghosts are not listed or referenced in the graph.
333
:param version_ids: Versions to select.
334
None means retrieve all versions.
337
if version_ids is None:
338
for version in self.versions():
339
result[version] = self.get_parents(version)
341
pending = set(osutils.safe_revision_id(v) for v in version_ids)
343
version = pending.pop()
344
if version in result:
346
parents = self.get_parents(version)
347
for parent in parents:
351
result[version] = parents
354
def get_graph_with_ghosts(self):
355
"""Return a graph for the entire versioned file.
357
Ghosts are referenced in parents list but are not
360
raise NotImplementedError(self.get_graph_with_ghosts)
362
@deprecated_method(zero_eight)
363
def parent_names(self, version):
364
"""Return version names for parents of a version.
366
See get_parents for the current api.
368
return self.get_parents(version)
370
def get_parents(self, version_id):
371
"""Return version names for parents of a version.
373
Must raise RevisionNotPresent if version is not present in
376
raise NotImplementedError(self.get_parents)
378
def get_parents_with_ghosts(self, version_id):
379
"""Return version names for parents of version_id.
381
Will raise RevisionNotPresent if version_id is not present
384
Ghosts that are known about will be included in the parent list,
385
but are not explicitly marked.
387
raise NotImplementedError(self.get_parents_with_ghosts)
389
def annotate_iter(self, version_id):
390
"""Yield list of (version-id, line) pairs for the specified
393
Must raise RevisionNotPresent if any of the given versions are
394
not present in file history.
396
raise NotImplementedError(self.annotate_iter)
398
def annotate(self, version_id):
399
return list(self.annotate_iter(version_id))
401
def _apply_delta(self, lines, delta):
402
"""Apply delta to lines."""
405
for start, end, count, delta_lines in delta:
406
lines[offset+start:offset+end] = delta_lines
407
offset = offset + (start - end) + count
410
def join(self, other, pb=None, msg=None, version_ids=None,
411
ignore_missing=False):
412
"""Integrate versions from other into this versioned file.
414
If version_ids is None all versions from other should be
415
incorporated into this versioned file.
417
Must raise RevisionNotPresent if any of the specified versions
418
are not present in the other files history unless ignore_missing
419
is supplied when they are silently skipped.
421
self._check_write_ok()
422
return InterVersionedFile.get(other, self).join(
428
def iter_lines_added_or_present_in_versions(self, version_ids=None,
430
"""Iterate over the lines in the versioned file from version_ids.
432
This may return lines from other versions, and does not return the
433
specific version marker at this point. The api may be changed
434
during development to include the version that the versioned file
435
thinks is relevant, but given that such hints are just guesses,
436
its better not to have it if we don't need it.
438
If a progress bar is supplied, it may be used to indicate progress.
439
The caller is responsible for cleaning up progress bars (because this
442
NOTES: Lines are normalised: they will all have \n terminators.
443
Lines are returned in arbitrary order.
445
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
447
def transaction_finished(self):
448
"""The transaction that this file was opened in has finished.
450
This records self.finished = True and should cause all mutating
455
@deprecated_method(zero_eight)
456
def walk(self, version_ids=None):
457
"""Walk the versioned file as a weave-like structure, for
458
versions relative to version_ids. Yields sequence of (lineno,
459
insert, deletes, text) for each relevant line.
461
Must raise RevisionNotPresent if any of the specified versions
462
are not present in the file history.
464
:param version_ids: the version_ids to walk with respect to. If not
465
supplied the entire weave-like structure is walked.
467
walk is deprecated in favour of iter_lines_added_or_present_in_versions
469
raise NotImplementedError(self.walk)
471
@deprecated_method(zero_eight)
472
def iter_names(self):
473
"""Walk the names list."""
474
return iter(self.versions())
476
def plan_merge(self, ver_a, ver_b):
477
"""Return pseudo-annotation indicating how the two versions merge.
479
This is computed between versions a and b and their common
482
Weave lines present in none of them are skipped entirely.
485
killed-base Dead in base revision
486
killed-both Killed in each revision
489
unchanged Alive in both a and b (possibly created in both)
492
ghost-a Killed in a, unborn in b
493
ghost-b Killed in b, unborn in a
494
irrelevant Not in either revision
496
raise NotImplementedError(VersionedFile.plan_merge)
498
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
499
b_marker=TextMerge.B_MARKER):
500
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
503
class PlanWeaveMerge(TextMerge):
504
"""Weave merge that takes a plan as its input.
506
This exists so that VersionedFile.plan_merge is implementable.
507
Most callers will want to use WeaveMerge instead.
510
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
511
b_marker=TextMerge.B_MARKER):
512
TextMerge.__init__(self, a_marker, b_marker)
515
def _merge_struct(self):
520
def outstanding_struct():
521
if not lines_a and not lines_b:
523
elif ch_a and not ch_b:
526
elif ch_b and not ch_a:
528
elif lines_a == lines_b:
531
yield (lines_a, lines_b)
533
# We previously considered either 'unchanged' or 'killed-both' lines
534
# to be possible places to resynchronize. However, assuming agreement
535
# on killed-both lines may be too aggressive. -- mbp 20060324
536
for state, line in self.plan:
537
if state == 'unchanged':
538
# resync and flush queued conflicts changes if any
539
for struct in outstanding_struct():
545
if state == 'unchanged':
548
elif state == 'killed-a':
551
elif state == 'killed-b':
554
elif state == 'new-a':
557
elif state == 'new-b':
561
assert state in ('irrelevant', 'ghost-a', 'ghost-b',
562
'killed-base', 'killed-both'), state
563
for struct in outstanding_struct():
567
class WeaveMerge(PlanWeaveMerge):
568
"""Weave merge that takes a VersionedFile and two versions as its input"""
570
def __init__(self, versionedfile, ver_a, ver_b,
571
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
572
plan = versionedfile.plan_merge(ver_a, ver_b)
573
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
576
class InterVersionedFile(InterObject):
577
"""This class represents operations taking place between two versionedfiles..
579
Its instances have methods like join, and contain
580
references to the source and target versionedfiles these operations can be
583
Often we will provide convenience methods on 'versionedfile' which carry out
584
operations with another versionedfile - they will always forward to
585
InterVersionedFile.get(other).method_name(parameters).
589
"""The available optimised InterVersionedFile types."""
591
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
592
"""Integrate versions from self.source into self.target.
594
If version_ids is None all versions from source should be
595
incorporated into this versioned file.
597
Must raise RevisionNotPresent if any of the specified versions
598
are not present in the other files history unless ignore_missing is
599
supplied when they are silently skipped.
602
# - if the target is empty, just add all the versions from
603
# source to target, otherwise:
604
# - make a temporary versioned file of type target
605
# - insert the source content into it one at a time
607
if not self.target.versions():
610
# Make a new target-format versioned file.
611
temp_source = self.target.create_empty("temp", MemoryTransport())
613
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
614
graph = self.source.get_graph(version_ids)
615
order = tsort.topo_sort(graph.items())
616
pb = ui.ui_factory.nested_progress_bar()
619
# TODO for incremental cross-format work:
620
# make a versioned file with the following content:
621
# all revisions we have been asked to join
622
# all their ancestors that are *not* in target already.
623
# the immediate parents of the above two sets, with
624
# empty parent lists - these versions are in target already
625
# and the incorrect version data will be ignored.
626
# TODO: for all ancestors that are present in target already,
627
# check them for consistent data, this requires moving sha1 from
629
# TODO: remove parent texts when they are not relevant any more for
630
# memory pressure reduction. RBC 20060313
631
# pb.update('Converting versioned data', 0, len(order))
632
# deltas = self.source.get_deltas(order)
633
for index, version in enumerate(order):
634
pb.update('Converting versioned data', index, len(order))
635
parent_text = target.add_lines(version,
636
self.source.get_parents(version),
637
self.source.get_lines(version),
638
parent_texts=parent_texts)
639
parent_texts[version] = parent_text
640
#delta_parent, sha1, noeol, delta = deltas[version]
641
#target.add_delta(version,
642
# self.source.get_parents(version),
647
#target.get_lines(version)
649
# this should hit the native code path for target
650
if target is not self.target:
651
return self.target.join(temp_source,
659
def _get_source_version_ids(self, version_ids, ignore_missing):
660
"""Determine the version ids to be used from self.source.
662
:param version_ids: The caller-supplied version ids to check. (None
663
for all). If None is in version_ids, it is stripped.
664
:param ignore_missing: if True, remove missing ids from the version
665
list. If False, raise RevisionNotPresent on
666
a missing version id.
667
:return: A set of version ids.
669
if version_ids is None:
670
# None cannot be in source.versions
671
return set(self.source.versions())
673
version_ids = [osutils.safe_revision_id(v) for v in version_ids]
675
return set(self.source.versions()).intersection(set(version_ids))
677
new_version_ids = set()
678
for version in version_ids:
681
if not self.source.has_version(version):
682
raise errors.RevisionNotPresent(version, str(self.source))
684
new_version_ids.add(version)
685
return new_version_ids
688
class InterVersionedFileTestProviderAdapter(object):
689
"""A tool to generate a suite testing multiple inter versioned-file classes.
691
This is done by copying the test once for each InterVersionedFile provider
692
and injecting the transport_server, transport_readonly_server,
693
versionedfile_factory and versionedfile_factory_to classes into each copy.
694
Each copy is also given a new id() to make it easy to identify.
697
def __init__(self, transport_server, transport_readonly_server, formats):
698
self._transport_server = transport_server
699
self._transport_readonly_server = transport_readonly_server
700
self._formats = formats
702
def adapt(self, test):
703
result = unittest.TestSuite()
704
for (interversionedfile_class,
705
versionedfile_factory,
706
versionedfile_factory_to) in self._formats:
707
new_test = deepcopy(test)
708
new_test.transport_server = self._transport_server
709
new_test.transport_readonly_server = self._transport_readonly_server
710
new_test.interversionedfile_class = interversionedfile_class
711
new_test.versionedfile_factory = versionedfile_factory
712
new_test.versionedfile_factory_to = versionedfile_factory_to
713
def make_new_test_id():
714
new_id = "%s(%s)" % (new_test.id(), interversionedfile_class.__name__)
715
return lambda: new_id
716
new_test.id = make_new_test_id()
717
result.addTest(new_test)
721
def default_test_list():
722
"""Generate the default list of interversionedfile permutations to test."""
723
from bzrlib.weave import WeaveFile
724
from bzrlib.knit import KnitVersionedFile
726
# test the fallback InterVersionedFile from annotated knits to weave
727
result.append((InterVersionedFile,
730
for optimiser in InterVersionedFile._optimisers:
731
result.append((optimiser,
732
optimiser._matching_file_from_factory,
733
optimiser._matching_file_to_factory
735
# if there are specific combinations we want to use, we can add them