1
# Copyright (C) 2005, 2006 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
"""Versioned text file storage api."""
22
from cStringIO import StringIO
25
from zlib import adler32
27
from bzrlib.lazy_import import lazy_import
28
lazy_import(globals(), """
38
from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider
39
from bzrlib.transport.memory import MemoryTransport
41
from bzrlib.inter import InterObject
42
from bzrlib.registry import Registry
43
from bzrlib.symbol_versioning import *
44
from bzrlib.textmerge import TextMerge
47
adapter_registry = Registry()
48
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
49
'DeltaPlainToFullText')
50
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
52
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
53
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
54
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
55
'bzrlib.knit', 'DeltaAnnotatedToFullText')
56
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
57
'bzrlib.knit', 'FTAnnotatedToUnannotated')
58
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
59
'bzrlib.knit', 'FTAnnotatedToFullText')
62
class ContentFactory(object):
63
"""Abstract interface for insertion and retrieval from a VersionedFile.
65
:ivar sha1: None, or the sha1 of the content fulltext.
66
:ivar storage_kind: The native storage kind of this factory. One of
67
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
68
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
69
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
70
:ivar key: The key of this content. Each key is a tuple with a single
72
:ivar parents: A tuple of parent keys for self.key. If the object has
73
no parent information, None (as opposed to () for an empty list of
78
"""Create a ContentFactory."""
80
self.storage_kind = None
85
class FulltextContentFactory(ContentFactory):
86
"""Static data content factory.
88
This takes a fulltext when created and just returns that during
89
get_bytes_as('fulltext').
91
:ivar sha1: None, or the sha1 of the content fulltext.
92
:ivar storage_kind: The native storage kind of this factory. Always
94
:ivar key: The key of this content. Each key is a tuple with a single
96
:ivar parents: A tuple of parent keys for self.key. If the object has
97
no parent information, None (as opposed to () for an empty list of
101
def __init__(self, key, parents, sha1, text):
102
"""Create a ContentFactory."""
104
self.storage_kind = 'fulltext'
106
self.parents = parents
109
def get_bytes_as(self, storage_kind):
110
if storage_kind == self.storage_kind:
112
raise errors.UnavailableRepresentation(self.key, storage_kind,
116
class AbsentContentFactory(ContentFactory):
117
"""A placeholder content factory for unavailable texts.
120
:ivar storage_kind: 'absent'.
121
:ivar key: The key of this content. Each key is a tuple with a single
126
def __init__(self, key):
127
"""Create a ContentFactory."""
129
self.storage_kind = 'absent'
134
class AdapterFactory(ContentFactory):
135
"""A content factory to adapt between key prefix's."""
137
def __init__(self, key, parents, adapted):
138
"""Create an adapter factory instance."""
140
self.parents = parents
141
self._adapted = adapted
143
def __getattr__(self, attr):
144
"""Return a member from the adapted object."""
145
if attr in ('key', 'parents'):
146
return self.__dict__[attr]
148
return getattr(self._adapted, attr)
151
def filter_absent(record_stream):
152
"""Adapt a record stream to remove absent records."""
153
for record in record_stream:
154
if record.storage_kind != 'absent':
158
class VersionedFile(object):
159
"""Versioned text file storage.
161
A versioned file manages versions of line-based text files,
162
keeping track of the originating version for each line.
164
To clients the "lines" of the file are represented as a list of
165
strings. These strings will typically have terminal newline
166
characters, but this is not required. In particular files commonly
167
do not have a newline at the end of the file.
169
Texts are identified by a version-id string.
173
def check_not_reserved_id(version_id):
174
revision.check_not_reserved_id(version_id)
176
def copy_to(self, name, transport):
177
"""Copy this versioned file to name on transport."""
178
raise NotImplementedError(self.copy_to)
180
def get_record_stream(self, versions, ordering, include_delta_closure):
181
"""Get a stream of records for versions.
183
:param versions: The versions to include. Each version is a tuple
185
:param ordering: Either 'unordered' or 'topological'. A topologically
186
sorted stream has compression parents strictly before their
188
:param include_delta_closure: If True then the closure across any
189
compression parents will be included (in the data content of the
190
stream, not in the emitted records). This guarantees that
191
'fulltext' can be used successfully on every record.
192
:return: An iterator of ContentFactory objects, each of which is only
193
valid until the iterator is advanced.
195
raise NotImplementedError(self.get_record_stream)
197
def has_version(self, version_id):
198
"""Returns whether version is present."""
199
raise NotImplementedError(self.has_version)
201
def insert_record_stream(self, stream):
202
"""Insert a record stream into this versioned file.
204
:param stream: A stream of records to insert.
206
:seealso VersionedFile.get_record_stream:
208
raise NotImplementedError
210
def add_lines(self, version_id, parents, lines, parent_texts=None,
211
left_matching_blocks=None, nostore_sha=None, random_id=False,
213
"""Add a single text on top of the versioned file.
215
Must raise RevisionAlreadyPresent if the new version is
216
already present in file history.
218
Must raise RevisionNotPresent if any of the given parents are
219
not present in file history.
221
:param lines: A list of lines. Each line must be a bytestring. And all
222
of them except the last must be terminated with \n and contain no
223
other \n's. The last line may either contain no \n's or a single
224
terminated \n. If the lines list does meet this constraint the add
225
routine may error or may succeed - but you will be unable to read
226
the data back accurately. (Checking the lines have been split
227
correctly is expensive and extremely unlikely to catch bugs so it
228
is not done at runtime unless check_content is True.)
229
:param parent_texts: An optional dictionary containing the opaque
230
representations of some or all of the parents of version_id to
231
allow delta optimisations. VERY IMPORTANT: the texts must be those
232
returned by add_lines or data corruption can be caused.
233
:param left_matching_blocks: a hint about which areas are common
234
between the text and its left-hand-parent. The format is
235
the SequenceMatcher.get_matching_blocks format.
236
:param nostore_sha: Raise ExistingContent and do not add the lines to
237
the versioned file if the digest of the lines matches this.
238
:param random_id: If True a random id has been selected rather than
239
an id determined by some deterministic process such as a converter
240
from a foreign VCS. When True the backend may choose not to check
241
for uniqueness of the resulting key within the versioned file, so
242
this should only be done when the result is expected to be unique
244
:param check_content: If True, the lines supplied are verified to be
245
bytestrings that are correctly formed lines.
246
:return: The text sha1, the number of bytes in the text, and an opaque
247
representation of the inserted version which can be provided
248
back to future add_lines calls in the parent_texts dictionary.
250
self._check_write_ok()
251
return self._add_lines(version_id, parents, lines, parent_texts,
252
left_matching_blocks, nostore_sha, random_id, check_content)
254
def _add_lines(self, version_id, parents, lines, parent_texts,
255
left_matching_blocks, nostore_sha, random_id, check_content):
256
"""Helper to do the class specific add_lines."""
257
raise NotImplementedError(self.add_lines)
259
def add_lines_with_ghosts(self, version_id, parents, lines,
260
parent_texts=None, nostore_sha=None, random_id=False,
261
check_content=True, left_matching_blocks=None):
262
"""Add lines to the versioned file, allowing ghosts to be present.
264
This takes the same parameters as add_lines and returns the same.
266
self._check_write_ok()
267
return self._add_lines_with_ghosts(version_id, parents, lines,
268
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
270
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
271
nostore_sha, random_id, check_content, left_matching_blocks):
272
"""Helper to do class specific add_lines_with_ghosts."""
273
raise NotImplementedError(self.add_lines_with_ghosts)
275
def check(self, progress_bar=None):
276
"""Check the versioned file for integrity."""
277
raise NotImplementedError(self.check)
279
def _check_lines_not_unicode(self, lines):
280
"""Check that lines being added to a versioned file are not unicode."""
282
if line.__class__ is not str:
283
raise errors.BzrBadParameterUnicode("lines")
285
def _check_lines_are_lines(self, lines):
286
"""Check that the lines really are full lines without inline EOL."""
288
if '\n' in line[:-1]:
289
raise errors.BzrBadParameterContainsNewline("lines")
291
def get_format_signature(self):
292
"""Get a text description of the data encoding in this file.
296
raise NotImplementedError(self.get_format_signature)
298
def make_mpdiffs(self, version_ids):
299
"""Create multiparent diffs for specified versions."""
300
knit_versions = set()
301
knit_versions.update(version_ids)
302
parent_map = self.get_parent_map(version_ids)
303
for version_id in version_ids:
305
knit_versions.update(parent_map[version_id])
307
raise errors.RevisionNotPresent(version_id, self)
308
# We need to filter out ghosts, because we can't diff against them.
309
knit_versions = set(self.get_parent_map(knit_versions).keys())
310
lines = dict(zip(knit_versions,
311
self._get_lf_split_line_list(knit_versions)))
313
for version_id in version_ids:
314
target = lines[version_id]
316
parents = [lines[p] for p in parent_map[version_id] if p in
319
# I don't know how this could ever trigger.
320
# parent_map[version_id] was already triggered in the previous
321
# for loop, and lines[p] has the 'if p in knit_versions' check,
322
# so we again won't have a KeyError.
323
raise errors.RevisionNotPresent(version_id, self)
325
left_parent_blocks = self._extract_blocks(version_id,
328
left_parent_blocks = None
329
diffs.append(multiparent.MultiParent.from_lines(target, parents,
333
def _extract_blocks(self, version_id, source, target):
336
def add_mpdiffs(self, records):
337
"""Add mpdiffs to this VersionedFile.
339
Records should be iterables of version, parents, expected_sha1,
340
mpdiff. mpdiff should be a MultiParent instance.
342
# Does this need to call self._check_write_ok()? (IanC 20070919)
344
mpvf = multiparent.MultiMemoryVersionedFile()
346
for version, parent_ids, expected_sha1, mpdiff in records:
347
versions.append(version)
348
mpvf.add_diff(mpdiff, version, parent_ids)
349
needed_parents = set()
350
for version, parent_ids, expected_sha1, mpdiff in records:
351
needed_parents.update(p for p in parent_ids
352
if not mpvf.has_version(p))
353
present_parents = set(self.get_parent_map(needed_parents).keys())
354
for parent_id, lines in zip(present_parents,
355
self._get_lf_split_line_list(present_parents)):
356
mpvf.add_version(lines, parent_id, [])
357
for (version, parent_ids, expected_sha1, mpdiff), lines in\
358
zip(records, mpvf.get_line_list(versions)):
359
if len(parent_ids) == 1:
360
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
361
mpvf.get_diff(parent_ids[0]).num_lines()))
363
left_matching_blocks = None
365
_, _, version_text = self.add_lines_with_ghosts(version,
366
parent_ids, lines, vf_parents,
367
left_matching_blocks=left_matching_blocks)
368
except NotImplementedError:
369
# The vf can't handle ghosts, so add lines normally, which will
370
# (reasonably) fail if there are ghosts in the data.
371
_, _, version_text = self.add_lines(version,
372
parent_ids, lines, vf_parents,
373
left_matching_blocks=left_matching_blocks)
374
vf_parents[version] = version_text
375
for (version, parent_ids, expected_sha1, mpdiff), sha1 in\
376
zip(records, self.get_sha1s(versions)):
377
if expected_sha1 != sha1:
378
raise errors.VersionedFileInvalidChecksum(version)
380
def get_sha1s(self, version_ids):
381
"""Get the stored sha1 sums for the given revisions.
383
:param version_ids: The names of the versions to lookup
384
:return: a list of sha1s in order according to the version_ids
386
raise NotImplementedError(self.get_sha1s)
388
def get_text(self, version_id):
389
"""Return version contents as a text string.
391
Raises RevisionNotPresent if version is not present in
394
return ''.join(self.get_lines(version_id))
395
get_string = get_text
397
def get_texts(self, version_ids):
398
"""Return the texts of listed versions as a list of strings.
400
Raises RevisionNotPresent if version is not present in
403
return [''.join(self.get_lines(v)) for v in version_ids]
405
def get_lines(self, version_id):
406
"""Return version contents as a sequence of lines.
408
Raises RevisionNotPresent if version is not present in
411
raise NotImplementedError(self.get_lines)
413
def _get_lf_split_line_list(self, version_ids):
414
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
416
def get_ancestry(self, version_ids, topo_sorted=True):
417
"""Return a list of all ancestors of given version(s). This
418
will not include the null revision.
420
This list will not be topologically sorted if topo_sorted=False is
423
Must raise RevisionNotPresent if any of the given versions are
424
not present in file history."""
425
if isinstance(version_ids, basestring):
426
version_ids = [version_ids]
427
raise NotImplementedError(self.get_ancestry)
429
def get_ancestry_with_ghosts(self, version_ids):
430
"""Return a list of all ancestors of given version(s). This
431
will not include the null revision.
433
Must raise RevisionNotPresent if any of the given versions are
434
not present in file history.
436
Ghosts that are known about will be included in ancestry list,
437
but are not explicitly marked.
439
raise NotImplementedError(self.get_ancestry_with_ghosts)
441
def get_parent_map(self, version_ids):
442
"""Get a map of the parents of version_ids.
444
:param version_ids: The version ids to look up parents for.
445
:return: A mapping from version id to parents.
447
raise NotImplementedError(self.get_parent_map)
449
def get_parents_with_ghosts(self, version_id):
450
"""Return version names for parents of version_id.
452
Will raise RevisionNotPresent if version_id is not present
455
Ghosts that are known about will be included in the parent list,
456
but are not explicitly marked.
459
return list(self.get_parent_map([version_id])[version_id])
461
raise errors.RevisionNotPresent(version_id, self)
463
def annotate(self, version_id):
464
"""Return a list of (version-id, line) tuples for version_id.
466
:raise RevisionNotPresent: If the given version is
467
not present in file history.
469
raise NotImplementedError(self.annotate)
471
def iter_lines_added_or_present_in_versions(self, version_ids=None,
473
"""Iterate over the lines in the versioned file from version_ids.
475
This may return lines from other versions. Each item the returned
476
iterator yields is a tuple of a line and a text version that that line
477
is present in (not introduced in).
479
Ordering of results is in whatever order is most suitable for the
480
underlying storage format.
482
If a progress bar is supplied, it may be used to indicate progress.
483
The caller is responsible for cleaning up progress bars (because this
486
NOTES: Lines are normalised: they will all have \n terminators.
487
Lines are returned in arbitrary order.
489
:return: An iterator over (line, version_id).
491
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
493
def plan_merge(self, ver_a, ver_b):
494
"""Return pseudo-annotation indicating how the two versions merge.
496
This is computed between versions a and b and their common
499
Weave lines present in none of them are skipped entirely.
502
killed-base Dead in base revision
503
killed-both Killed in each revision
506
unchanged Alive in both a and b (possibly created in both)
509
ghost-a Killed in a, unborn in b
510
ghost-b Killed in b, unborn in a
511
irrelevant Not in either revision
513
raise NotImplementedError(VersionedFile.plan_merge)
515
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
516
b_marker=TextMerge.B_MARKER):
517
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
520
class RecordingVersionedFilesDecorator(object):
521
"""A minimal versioned files that records calls made on it.
523
Only enough methods have been added to support tests using it to date.
525
:ivar calls: A list of the calls made; can be reset at any time by
529
def __init__(self, backing_vf):
530
"""Create a RecordingVersionedFileDsecorator decorating backing_vf.
532
:param backing_vf: The versioned file to answer all methods.
534
self._backing_vf = backing_vf
537
def get_record_stream(self, keys, sort_order, include_delta_closure):
538
self.calls.append(("get_record_stream", keys, sort_order,
539
include_delta_closure))
540
return self._backing_vf.get_record_stream(keys, sort_order,
541
include_delta_closure)
544
class KeyMapper(object):
545
"""KeyMappers map between keys and underlying partitioned storage."""
548
"""Map key to an underlying storage identifier.
550
:param key: A key tuple e.g. ('file-id', 'revision-id').
551
:return: An underlying storage identifier, specific to the partitioning
554
raise NotImplementedError(self.map)
556
def unmap(self, partition_id):
557
"""Map a partitioned storage id back to a key prefix.
559
:param partition_id: The underlying partition id.
560
:return: As much of a key (or prefix) as is derivable from the partition
563
raise NotImplementedError(self.unmap)
566
class ConstantMapper(KeyMapper):
567
"""A key mapper that maps to a constant result."""
569
def __init__(self, result):
570
"""Create a ConstantMapper which will return result for all maps."""
571
self._result = result
574
"""See KeyMapper.map()."""
578
class URLEscapeMapper(KeyMapper):
579
"""Base class for use with transport backed storage.
581
This provides a map and unmap wrapper that respectively url escape and
582
unescape their outputs and inputs.
586
"""See KeyMapper.map()."""
587
return urllib.quote(self._map(key))
589
def unmap(self, partition_id):
590
"""See KeyMapper.unmap()."""
591
return self._unmap(urllib.unquote(partition_id))
594
class PrefixMapper(URLEscapeMapper):
595
"""A key mapper that extracts the first component of a key.
597
This mapper is for use with a transport based backend.
601
"""See KeyMapper.map()."""
604
def _unmap(self, partition_id):
605
"""See KeyMapper.unmap()."""
606
return (partition_id,)
609
class HashPrefixMapper(URLEscapeMapper):
610
"""A key mapper that combines the first component of a key with a hash.
612
This mapper is for use with a transport based backend.
616
"""See KeyMapper.map()."""
617
prefix = self._escape(key[0])
618
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
620
def _escape(self, prefix):
621
"""No escaping needed here."""
624
def _unmap(self, partition_id):
625
"""See KeyMapper.unmap()."""
626
return (self._unescape(osutils.basename(partition_id)),)
628
def _unescape(self, basename):
629
"""No unescaping needed for HashPrefixMapper."""
633
class HashEscapedPrefixMapper(HashPrefixMapper):
634
"""Combines the escaped first component of a key with a hash.
636
This mapper is for use with a transport based backend.
639
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
641
def _escape(self, prefix):
642
"""Turn a key element into a filesystem safe string.
644
This is similar to a plain urllib.quote, except
645
it uses specific safe characters, so that it doesn't
646
have to translate a lot of valid file ids.
648
# @ does not get escaped. This is because it is a valid
649
# filesystem character we use all the time, and it looks
650
# a lot better than seeing %40 all the time.
651
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
655
def _unescape(self, basename):
656
"""Escaped names are easily unescaped by urlutils."""
657
return urllib.unquote(basename)
660
def make_versioned_files_factory(versioned_file_factory, mapper):
661
"""Create a ThunkedVersionedFiles factory.
663
This will create a callable which when called creates a
664
ThunkedVersionedFiles on a transport, using mapper to access individual
665
versioned files, and versioned_file_factory to create each individual file.
667
def factory(transport):
668
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
673
class VersionedFiles(object):
674
"""Storage for many versioned files.
676
This object allows a single keyspace for accessing the history graph and
677
contents of named bytestrings.
679
Currently no implementation allows the graph of different key prefixes to
680
intersect, but the API does allow such implementations in the future.
682
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
683
may have a different length key-size, but that size will be constant for
684
all texts added to or retrieved from it. For instance, bzrlib uses
685
instances with a key-size of 2 for storing user files in a repository, with
686
the first element the fileid, and the second the version of that file.
688
The use of tuples allows a single code base to support several different
689
uses with only the mapping logic changing from instance to instance.
692
def add_lines(self, key, parents, lines, parent_texts=None,
693
left_matching_blocks=None, nostore_sha=None, random_id=False,
695
"""Add a text to the store.
697
:param key: The key tuple of the text to add.
698
:param parents: The parents key tuples of the text to add.
699
:param lines: A list of lines. Each line must be a bytestring. And all
700
of them except the last must be terminated with \n and contain no
701
other \n's. The last line may either contain no \n's or a single
702
terminating \n. If the lines list does meet this constraint the add
703
routine may error or may succeed - but you will be unable to read
704
the data back accurately. (Checking the lines have been split
705
correctly is expensive and extremely unlikely to catch bugs so it
706
is not done at runtime unless check_content is True.)
707
:param parent_texts: An optional dictionary containing the opaque
708
representations of some or all of the parents of version_id to
709
allow delta optimisations. VERY IMPORTANT: the texts must be those
710
returned by add_lines or data corruption can be caused.
711
:param left_matching_blocks: a hint about which areas are common
712
between the text and its left-hand-parent. The format is
713
the SequenceMatcher.get_matching_blocks format.
714
:param nostore_sha: Raise ExistingContent and do not add the lines to
715
the versioned file if the digest of the lines matches this.
716
:param random_id: If True a random id has been selected rather than
717
an id determined by some deterministic process such as a converter
718
from a foreign VCS. When True the backend may choose not to check
719
for uniqueness of the resulting key within the versioned file, so
720
this should only be done when the result is expected to be unique
722
:param check_content: If True, the lines supplied are verified to be
723
bytestrings that are correctly formed lines.
724
:return: The text sha1, the number of bytes in the text, and an opaque
725
representation of the inserted version which can be provided
726
back to future add_lines calls in the parent_texts dictionary.
728
raise NotImplementedError(self.add_lines)
730
def add_mpdiffs(self, records):
731
"""Add mpdiffs to this VersionedFile.
733
Records should be iterables of version, parents, expected_sha1,
734
mpdiff. mpdiff should be a MultiParent instance.
737
mpvf = multiparent.MultiMemoryVersionedFile()
739
for version, parent_ids, expected_sha1, mpdiff in records:
740
versions.append(version)
741
mpvf.add_diff(mpdiff, version, parent_ids)
742
needed_parents = set()
743
for version, parent_ids, expected_sha1, mpdiff in records:
744
needed_parents.update(p for p in parent_ids
745
if not mpvf.has_version(p))
746
# It seems likely that adding all the present parents as fulltexts can
747
# easily exhaust memory.
748
present_parents = set(self.get_parent_map(needed_parents).keys())
749
split_lines = osutils.split_lines
750
for record in self.get_record_stream(present_parents, 'unordered',
752
mpvf.add_version(split_lines(record.get_bytes_as('fulltext')),
754
for (key, parent_keys, expected_sha1, mpdiff), lines in\
755
zip(records, mpvf.get_line_list(versions)):
756
if len(parent_keys) == 1:
757
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
758
mpvf.get_diff(parent_keys[0]).num_lines()))
760
left_matching_blocks = None
761
version_sha1, _, version_text = self.add_lines(key,
762
parent_keys, lines, vf_parents,
763
left_matching_blocks=left_matching_blocks)
764
if version_sha1 != expected_sha1:
765
raise errors.VersionedFileInvalidChecksum(version)
766
vf_parents[key] = version_text
768
def annotate(self, key):
769
"""Return a list of (version-key, line) tuples for the text of key.
771
:raise RevisionNotPresent: If the key is not present.
773
raise NotImplementedError(self.annotate)
775
def check(self, progress_bar=None):
776
"""Check this object for integrity."""
777
raise NotImplementedError(self.check)
780
def check_not_reserved_id(version_id):
781
revision.check_not_reserved_id(version_id)
783
def _check_lines_not_unicode(self, lines):
784
"""Check that lines being added to a versioned file are not unicode."""
786
if line.__class__ is not str:
787
raise errors.BzrBadParameterUnicode("lines")
789
def _check_lines_are_lines(self, lines):
790
"""Check that the lines really are full lines without inline EOL."""
792
if '\n' in line[:-1]:
793
raise errors.BzrBadParameterContainsNewline("lines")
795
def get_parent_map(self, keys):
796
"""Get a map of the parents of keys.
798
:param keys: The keys to look up parents for.
799
:return: A mapping from keys to parents. Absent keys are absent from
802
raise NotImplementedError(self.get_parent_map)
804
def get_record_stream(self, keys, ordering, include_delta_closure):
805
"""Get a stream of records for keys.
807
:param keys: The keys to include.
808
:param ordering: Either 'unordered' or 'topological'. A topologically
809
sorted stream has compression parents strictly before their
811
:param include_delta_closure: If True then the closure across any
812
compression parents will be included (in the opaque data).
813
:return: An iterator of ContentFactory objects, each of which is only
814
valid until the iterator is advanced.
816
raise NotImplementedError(self.get_record_stream)
818
def get_sha1s(self, keys):
819
"""Get the sha1's of the texts for the given keys.
821
:param keys: The names of the keys to lookup
822
:return: a list of sha1s matching keys.
824
raise NotImplementedError(self.get_sha1s)
826
def insert_record_stream(self, stream):
827
"""Insert a record stream into this container.
829
:param stream: A stream of records to insert.
831
:seealso VersionedFile.get_record_stream:
833
raise NotImplementedError
835
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
836
"""Iterate over the lines in the versioned files from keys.
838
This may return lines from other keys. Each item the returned
839
iterator yields is a tuple of a line and a text version that that line
840
is present in (not introduced in).
842
Ordering of results is in whatever order is most suitable for the
843
underlying storage format.
845
If a progress bar is supplied, it may be used to indicate progress.
846
The caller is responsible for cleaning up progress bars (because this
850
* Lines are normalised by the underlying store: they will all have \n
852
* Lines are returned in arbitrary order.
854
:return: An iterator over (line, key).
856
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
859
"""Return a iterable of the keys for all the contained texts."""
860
raise NotImplementedError(self.keys)
862
def make_mpdiffs(self, keys):
863
"""Create multiparent diffs for specified keys."""
864
keys_order = tuple(keys)
865
keys = frozenset(keys)
866
knit_keys = set(keys)
867
parent_map = self.get_parent_map(keys)
868
for parent_keys in parent_map.itervalues():
870
knit_keys.update(parent_keys)
871
missing_keys = keys - set(parent_map)
873
raise errors.RevisionNotPresent(missing_keys.pop(), self)
874
# We need to filter out ghosts, because we can't diff against them.
875
maybe_ghosts = knit_keys - keys
876
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
877
knit_keys.difference_update(ghosts)
879
split_lines = osutils.split_lines
880
for record in self.get_record_stream(knit_keys, 'topological', True):
881
lines[record.key] = split_lines(record.get_bytes_as('fulltext'))
882
# line_block_dict = {}
883
# for parent, blocks in record.extract_line_blocks():
884
# line_blocks[parent] = blocks
885
# line_blocks[record.key] = line_block_dict
887
for key in keys_order:
889
parents = parent_map[key] or []
890
# Note that filtering knit_keys can lead to a parent difference
891
# between the creation and the application of the mpdiff.
892
parent_lines = [lines[p] for p in parents if p in knit_keys]
893
if len(parent_lines) > 0:
894
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
897
left_parent_blocks = None
898
diffs.append(multiparent.MultiParent.from_lines(target,
899
parent_lines, left_parent_blocks))
902
def _extract_blocks(self, version_id, source, target):
906
class ThunkedVersionedFiles(VersionedFiles):
907
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
909
This object allows a single keyspace for accessing the history graph and
910
contents of named bytestrings.
912
Currently no implementation allows the graph of different key prefixes to
913
intersect, but the API does allow such implementations in the future.
916
def __init__(self, transport, file_factory, mapper, is_locked):
917
"""Create a ThunkedVersionedFiles."""
918
self._transport = transport
919
self._file_factory = file_factory
920
self._mapper = mapper
921
self._is_locked = is_locked
923
def add_lines(self, key, parents, lines, parent_texts=None,
924
left_matching_blocks=None, nostore_sha=None, random_id=False,
926
"""See VersionedFiles.add_lines()."""
927
path = self._mapper.map(key)
929
parents = [parent[-1] for parent in parents]
930
vf = self._get_vf(path)
933
return vf.add_lines_with_ghosts(version_id, parents, lines,
934
parent_texts=parent_texts,
935
left_matching_blocks=left_matching_blocks,
936
nostore_sha=nostore_sha, random_id=random_id,
937
check_content=check_content)
938
except NotImplementedError:
939
return vf.add_lines(version_id, parents, lines,
940
parent_texts=parent_texts,
941
left_matching_blocks=left_matching_blocks,
942
nostore_sha=nostore_sha, random_id=random_id,
943
check_content=check_content)
944
except errors.NoSuchFile:
945
# parent directory may be missing, try again.
946
self._transport.mkdir(osutils.dirname(path))
948
return vf.add_lines_with_ghosts(version_id, parents, lines,
949
parent_texts=parent_texts,
950
left_matching_blocks=left_matching_blocks,
951
nostore_sha=nostore_sha, random_id=random_id,
952
check_content=check_content)
953
except NotImplementedError:
954
return vf.add_lines(version_id, parents, lines,
955
parent_texts=parent_texts,
956
left_matching_blocks=left_matching_blocks,
957
nostore_sha=nostore_sha, random_id=random_id,
958
check_content=check_content)
960
def annotate(self, key):
961
"""Return a list of (version-key, line) tuples for the text of key.
963
:raise RevisionNotPresent: If the key is not present.
966
path = self._mapper.map(prefix)
967
vf = self._get_vf(path)
968
origins = vf.annotate(key[-1])
970
for origin, line in origins:
971
result.append((prefix + (origin,), line))
974
def check(self, progress_bar=None):
975
"""See VersionedFiles.check()."""
976
for prefix, vf in self._iter_all_components():
979
def get_parent_map(self, keys):
980
"""Get a map of the parents of keys.
982
:param keys: The keys to look up parents for.
983
:return: A mapping from keys to parents. Absent keys are absent from
986
prefixes = self._partition_keys(keys)
988
for prefix, suffixes in prefixes.items():
989
path = self._mapper.map(prefix)
990
vf = self._get_vf(path)
991
parent_map = vf.get_parent_map(suffixes)
992
for key, parents in parent_map.items():
993
result[prefix + (key,)] = tuple(
994
prefix + (parent,) for parent in parents)
997
def _get_vf(self, path):
998
if not self._is_locked():
999
raise errors.ObjectNotLocked(self)
1000
return self._file_factory(path, self._transport, create=True,
1001
get_scope=lambda:None)
1003
def _partition_keys(self, keys):
1004
"""Turn keys into a dict of prefix:suffix_list."""
1007
prefix_keys = result.setdefault(key[:-1], [])
1008
prefix_keys.append(key[-1])
1011
def _get_all_prefixes(self):
1012
# Identify all key prefixes.
1013
# XXX: A bit hacky, needs polish.
1014
if type(self._mapper) == ConstantMapper:
1015
paths = [self._mapper.map(())]
1019
for quoted_relpath in self._transport.iter_files_recursive():
1020
path, ext = os.path.splitext(quoted_relpath)
1022
paths = list(relpaths)
1023
prefixes = [self._mapper.unmap(path) for path in paths]
1024
return zip(paths, prefixes)
1026
def get_record_stream(self, keys, ordering, include_delta_closure):
1027
"""See VersionedFiles.get_record_stream()."""
1028
# Ordering will be taken care of by each partitioned store; group keys
1031
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1032
suffixes = [(suffix,) for suffix in suffixes]
1033
for record in vf.get_record_stream(suffixes, ordering,
1034
include_delta_closure):
1035
if record.parents is not None:
1036
record.parents = tuple(
1037
prefix + parent for parent in record.parents)
1038
record.key = prefix + record.key
1041
def _iter_keys_vf(self, keys):
1042
prefixes = self._partition_keys(keys)
1044
for prefix, suffixes in prefixes.items():
1045
path = self._mapper.map(prefix)
1046
vf = self._get_vf(path)
1047
yield prefix, suffixes, vf
1049
def get_sha1s(self, keys):
1050
"""See VersionedFiles.get_sha1s()."""
1052
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1053
vf_sha1s = vf.get_sha1s(suffixes)
1054
for suffix, sha1 in zip(suffixes, vf.get_sha1s(suffixes)):
1055
sha1s[prefix + (suffix,)] = sha1
1056
return [sha1s[key] for key in keys]
1058
def insert_record_stream(self, stream):
1059
"""Insert a record stream into this container.
1061
:param stream: A stream of records to insert.
1063
:seealso VersionedFile.get_record_stream:
1065
for record in stream:
1066
prefix = record.key[:-1]
1067
key = record.key[-1:]
1068
if record.parents is not None:
1069
parents = [parent[-1:] for parent in record.parents]
1072
thunk_record = AdapterFactory(key, parents, record)
1073
path = self._mapper.map(prefix)
1074
# Note that this parses the file many times; we can do better but
1075
# as this only impacts weaves in terms of performance, it is
1077
vf = self._get_vf(path)
1078
vf.insert_record_stream([thunk_record])
1080
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1081
"""Iterate over the lines in the versioned files from keys.
1083
This may return lines from other keys. Each item the returned
1084
iterator yields is a tuple of a line and a text version that that line
1085
is present in (not introduced in).
1087
Ordering of results is in whatever order is most suitable for the
1088
underlying storage format.
1090
If a progress bar is supplied, it may be used to indicate progress.
1091
The caller is responsible for cleaning up progress bars (because this
1095
* Lines are normalised by the underlying store: they will all have \n
1097
* Lines are returned in arbitrary order.
1099
:return: An iterator over (line, key).
1101
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1102
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1103
yield line, prefix + (version,)
1105
def _iter_all_components(self):
1106
for path, prefix in self._get_all_prefixes():
1107
yield prefix, self._get_vf(path)
1110
"""See VersionedFiles.keys()."""
1112
for prefix, vf in self._iter_all_components():
1113
for suffix in vf.versions():
1114
result.add(prefix + (suffix,))
1118
class _PlanMergeVersionedFile(VersionedFiles):
1119
"""A VersionedFile for uncommitted and committed texts.
1121
It is intended to allow merges to be planned with working tree texts.
1122
It implements only the small part of the VersionedFiles interface used by
1123
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1124
_PlanMergeVersionedFile itself.
1126
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1127
queried for missing texts.
1130
def __init__(self, file_id):
1131
"""Create a _PlanMergeVersionedFile.
1133
:param file_id: Used with _PlanMerge code which is not yet fully
1134
tuple-keyspace aware.
1136
self._file_id = file_id
1137
# fallback locations
1138
self.fallback_versionedfiles = []
1139
# Parents for locally held keys.
1141
# line data for locally held keys.
1143
# key lookup providers
1144
self._providers = [DictParentsProvider(self._parents)]
1146
def plan_merge(self, ver_a, ver_b, base=None):
1147
"""See VersionedFile.plan_merge"""
1148
from bzrlib.merge import _PlanMerge
1150
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1151
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1152
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1153
return _PlanMerge._subtract_plans(old_plan, new_plan)
1155
def plan_lca_merge(self, ver_a, ver_b, base=None):
1156
from bzrlib.merge import _PlanLCAMerge
1158
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1161
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1162
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1164
def add_lines(self, key, parents, lines):
1165
"""See VersionedFiles.add_lines
1167
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1168
are permitted. Only reserved ids are permitted.
1170
if type(key) is not tuple:
1171
raise TypeError(key)
1172
if not revision.is_reserved_id(key[-1]):
1173
raise ValueError('Only reserved ids may be used')
1175
raise ValueError('Parents may not be None')
1177
raise ValueError('Lines may not be None')
1178
self._parents[key] = tuple(parents)
1179
self._lines[key] = lines
1181
def get_record_stream(self, keys, ordering, include_delta_closure):
1184
if key in self._lines:
1185
lines = self._lines[key]
1186
parents = self._parents[key]
1188
yield FulltextContentFactory(key, parents, None,
1190
for versionedfile in self.fallback_versionedfiles:
1191
for record in versionedfile.get_record_stream(
1192
pending, 'unordered', True):
1193
if record.storage_kind == 'absent':
1196
pending.remove(record.key)
1200
# report absent entries
1202
yield AbsentContentFactory(key)
1204
def get_parent_map(self, keys):
1205
"""See VersionedFiles.get_parent_map"""
1206
# We create a new provider because a fallback may have been added.
1207
# If we make fallbacks private we can update a stack list and avoid
1208
# object creation thrashing.
1211
if revision.NULL_REVISION in keys:
1212
keys.remove(revision.NULL_REVISION)
1213
result[revision.NULL_REVISION] = ()
1214
self._providers = self._providers[:1] + self.fallback_versionedfiles
1216
_StackedParentsProvider(self._providers).get_parent_map(keys))
1217
for key, parents in result.iteritems():
1219
result[key] = (revision.NULL_REVISION,)
1223
class PlanWeaveMerge(TextMerge):
1224
"""Weave merge that takes a plan as its input.
1226
This exists so that VersionedFile.plan_merge is implementable.
1227
Most callers will want to use WeaveMerge instead.
1230
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1231
b_marker=TextMerge.B_MARKER):
1232
TextMerge.__init__(self, a_marker, b_marker)
1235
def _merge_struct(self):
1240
def outstanding_struct():
1241
if not lines_a and not lines_b:
1243
elif ch_a and not ch_b:
1246
elif ch_b and not ch_a:
1248
elif lines_a == lines_b:
1251
yield (lines_a, lines_b)
1253
# We previously considered either 'unchanged' or 'killed-both' lines
1254
# to be possible places to resynchronize. However, assuming agreement
1255
# on killed-both lines may be too aggressive. -- mbp 20060324
1256
for state, line in self.plan:
1257
if state == 'unchanged':
1258
# resync and flush queued conflicts changes if any
1259
for struct in outstanding_struct():
1265
if state == 'unchanged':
1268
elif state == 'killed-a':
1270
lines_b.append(line)
1271
elif state == 'killed-b':
1273
lines_a.append(line)
1274
elif state == 'new-a':
1276
lines_a.append(line)
1277
elif state == 'new-b':
1279
lines_b.append(line)
1280
elif state == 'conflicted-a':
1282
lines_a.append(line)
1283
elif state == 'conflicted-b':
1285
lines_b.append(line)
1287
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1288
'killed-base', 'killed-both'):
1289
raise AssertionError(state)
1290
for struct in outstanding_struct():
1294
class WeaveMerge(PlanWeaveMerge):
1295
"""Weave merge that takes a VersionedFile and two versions as its input."""
1297
def __init__(self, versionedfile, ver_a, ver_b,
1298
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1299
plan = versionedfile.plan_merge(ver_a, ver_b)
1300
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)