1
# Copyright (C) 2005, 2006 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
"""Versioned text file storage api."""
22
from cStringIO import StringIO
25
from zlib import adler32
27
from bzrlib.lazy_import import lazy_import
28
lazy_import(globals(), """
38
from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider
39
from bzrlib.transport.memory import MemoryTransport
41
from bzrlib.inter import InterObject
42
from bzrlib.registry import Registry
43
from bzrlib.symbol_versioning import *
44
from bzrlib.textmerge import TextMerge
47
adapter_registry = Registry()
48
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
49
'DeltaPlainToFullText')
50
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
52
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
53
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
54
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
55
'bzrlib.knit', 'DeltaAnnotatedToFullText')
56
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
57
'bzrlib.knit', 'FTAnnotatedToUnannotated')
58
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
59
'bzrlib.knit', 'FTAnnotatedToFullText')
62
class ContentFactory(object):
63
"""Abstract interface for insertion and retrieval from a VersionedFile.
65
:ivar sha1: None, or the sha1 of the content fulltext.
66
:ivar storage_kind: The native storage kind of this factory. One of
67
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
68
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
69
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
70
:ivar key: The key of this content. Each key is a tuple with a single
72
:ivar parents: A tuple of parent keys for self.key. If the object has
73
no parent information, None (as opposed to () for an empty list of
78
"""Create a ContentFactory."""
80
self.storage_kind = None
85
class FulltextContentFactory(ContentFactory):
86
"""Static data content factory.
88
This takes a fulltext when created and just returns that during
89
get_bytes_as('fulltext').
91
:ivar sha1: None, or the sha1 of the content fulltext.
92
:ivar storage_kind: The native storage kind of this factory. Always
94
:ivar key: The key of this content. Each key is a tuple with a single
96
:ivar parents: A tuple of parent keys for self.key. If the object has
97
no parent information, None (as opposed to () for an empty list of
101
def __init__(self, key, parents, sha1, text):
102
"""Create a ContentFactory."""
104
self.storage_kind = 'fulltext'
106
self.parents = parents
109
def get_bytes_as(self, storage_kind):
110
if storage_kind == self.storage_kind:
112
raise errors.UnavailableRepresentation(self.key, storage_kind,
116
class AbsentContentFactory(ContentFactory):
117
"""A placeholder content factory for unavailable texts.
120
:ivar storage_kind: 'absent'.
121
:ivar key: The key of this content. Each key is a tuple with a single
126
def __init__(self, key):
127
"""Create a ContentFactory."""
129
self.storage_kind = 'absent'
134
class AdapterFactory(ContentFactory):
135
"""A content factory to adapt between key prefix's."""
137
def __init__(self, key, parents, adapted):
138
"""Create an adapter factory instance."""
140
self.parents = parents
141
self._adapted = adapted
143
def __getattr__(self, attr):
144
"""Return a member from the adapted object."""
145
if attr in ('key', 'parents'):
146
return self.__dict__[attr]
148
return getattr(self._adapted, attr)
151
def filter_absent(record_stream):
152
"""Adapt a record stream to remove absent records."""
153
for record in record_stream:
154
if record.storage_kind != 'absent':
158
class VersionedFile(object):
159
"""Versioned text file storage.
161
A versioned file manages versions of line-based text files,
162
keeping track of the originating version for each line.
164
To clients the "lines" of the file are represented as a list of
165
strings. These strings will typically have terminal newline
166
characters, but this is not required. In particular files commonly
167
do not have a newline at the end of the file.
169
Texts are identified by a version-id string.
173
def check_not_reserved_id(version_id):
174
revision.check_not_reserved_id(version_id)
176
def copy_to(self, name, transport):
177
"""Copy this versioned file to name on transport."""
178
raise NotImplementedError(self.copy_to)
180
def get_record_stream(self, versions, ordering, include_delta_closure):
181
"""Get a stream of records for versions.
183
:param versions: The versions to include. Each version is a tuple
185
:param ordering: Either 'unordered' or 'topological'. A topologically
186
sorted stream has compression parents strictly before their
188
:param include_delta_closure: If True then the closure across any
189
compression parents will be included (in the data content of the
190
stream, not in the emitted records). This guarantees that
191
'fulltext' can be used successfully on every record.
192
:return: An iterator of ContentFactory objects, each of which is only
193
valid until the iterator is advanced.
195
raise NotImplementedError(self.get_record_stream)
197
def has_version(self, version_id):
198
"""Returns whether version is present."""
199
raise NotImplementedError(self.has_version)
201
def insert_record_stream(self, stream):
202
"""Insert a record stream into this versioned file.
204
:param stream: A stream of records to insert.
206
:seealso VersionedFile.get_record_stream:
208
raise NotImplementedError
210
def add_lines(self, version_id, parents, lines, parent_texts=None,
211
left_matching_blocks=None, nostore_sha=None, random_id=False,
213
"""Add a single text on top of the versioned file.
215
Must raise RevisionAlreadyPresent if the new version is
216
already present in file history.
218
Must raise RevisionNotPresent if any of the given parents are
219
not present in file history.
221
:param lines: A list of lines. Each line must be a bytestring. And all
222
of them except the last must be terminated with \n and contain no
223
other \n's. The last line may either contain no \n's or a single
224
terminated \n. If the lines list does meet this constraint the add
225
routine may error or may succeed - but you will be unable to read
226
the data back accurately. (Checking the lines have been split
227
correctly is expensive and extremely unlikely to catch bugs so it
228
is not done at runtime unless check_content is True.)
229
:param parent_texts: An optional dictionary containing the opaque
230
representations of some or all of the parents of version_id to
231
allow delta optimisations. VERY IMPORTANT: the texts must be those
232
returned by add_lines or data corruption can be caused.
233
:param left_matching_blocks: a hint about which areas are common
234
between the text and its left-hand-parent. The format is
235
the SequenceMatcher.get_matching_blocks format.
236
:param nostore_sha: Raise ExistingContent and do not add the lines to
237
the versioned file if the digest of the lines matches this.
238
:param random_id: If True a random id has been selected rather than
239
an id determined by some deterministic process such as a converter
240
from a foreign VCS. When True the backend may choose not to check
241
for uniqueness of the resulting key within the versioned file, so
242
this should only be done when the result is expected to be unique
244
:param check_content: If True, the lines supplied are verified to be
245
bytestrings that are correctly formed lines.
246
:return: The text sha1, the number of bytes in the text, and an opaque
247
representation of the inserted version which can be provided
248
back to future add_lines calls in the parent_texts dictionary.
250
self._check_write_ok()
251
return self._add_lines(version_id, parents, lines, parent_texts,
252
left_matching_blocks, nostore_sha, random_id, check_content)
254
def _add_lines(self, version_id, parents, lines, parent_texts,
255
left_matching_blocks, nostore_sha, random_id, check_content):
256
"""Helper to do the class specific add_lines."""
257
raise NotImplementedError(self.add_lines)
259
def add_lines_with_ghosts(self, version_id, parents, lines,
260
parent_texts=None, nostore_sha=None, random_id=False,
261
check_content=True, left_matching_blocks=None):
262
"""Add lines to the versioned file, allowing ghosts to be present.
264
This takes the same parameters as add_lines and returns the same.
266
self._check_write_ok()
267
return self._add_lines_with_ghosts(version_id, parents, lines,
268
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
270
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
271
nostore_sha, random_id, check_content, left_matching_blocks):
272
"""Helper to do class specific add_lines_with_ghosts."""
273
raise NotImplementedError(self.add_lines_with_ghosts)
275
def check(self, progress_bar=None):
276
"""Check the versioned file for integrity."""
277
raise NotImplementedError(self.check)
279
def _check_lines_not_unicode(self, lines):
280
"""Check that lines being added to a versioned file are not unicode."""
282
if line.__class__ is not str:
283
raise errors.BzrBadParameterUnicode("lines")
285
def _check_lines_are_lines(self, lines):
286
"""Check that the lines really are full lines without inline EOL."""
288
if '\n' in line[:-1]:
289
raise errors.BzrBadParameterContainsNewline("lines")
291
def get_format_signature(self):
292
"""Get a text description of the data encoding in this file.
296
raise NotImplementedError(self.get_format_signature)
298
def make_mpdiffs(self, version_ids):
299
"""Create multiparent diffs for specified versions."""
300
knit_versions = set()
301
knit_versions.update(version_ids)
302
parent_map = self.get_parent_map(version_ids)
303
for version_id in version_ids:
305
knit_versions.update(parent_map[version_id])
307
raise errors.RevisionNotPresent(version_id, self)
308
# We need to filter out ghosts, because we can't diff against them.
309
knit_versions = set(self.get_parent_map(knit_versions).keys())
310
lines = dict(zip(knit_versions,
311
self._get_lf_split_line_list(knit_versions)))
313
for version_id in version_ids:
314
target = lines[version_id]
316
parents = [lines[p] for p in parent_map[version_id] if p in
319
# I don't know how this could ever trigger.
320
# parent_map[version_id] was already triggered in the previous
321
# for loop, and lines[p] has the 'if p in knit_versions' check,
322
# so we again won't have a KeyError.
323
raise errors.RevisionNotPresent(version_id, self)
325
left_parent_blocks = self._extract_blocks(version_id,
328
left_parent_blocks = None
329
diffs.append(multiparent.MultiParent.from_lines(target, parents,
333
def _extract_blocks(self, version_id, source, target):
336
def add_mpdiffs(self, records):
337
"""Add mpdiffs to this VersionedFile.
339
Records should be iterables of version, parents, expected_sha1,
340
mpdiff. mpdiff should be a MultiParent instance.
342
# Does this need to call self._check_write_ok()? (IanC 20070919)
344
mpvf = multiparent.MultiMemoryVersionedFile()
346
for version, parent_ids, expected_sha1, mpdiff in records:
347
versions.append(version)
348
mpvf.add_diff(mpdiff, version, parent_ids)
349
needed_parents = set()
350
for version, parent_ids, expected_sha1, mpdiff in records:
351
needed_parents.update(p for p in parent_ids
352
if not mpvf.has_version(p))
353
present_parents = set(self.get_parent_map(needed_parents).keys())
354
for parent_id, lines in zip(present_parents,
355
self._get_lf_split_line_list(present_parents)):
356
mpvf.add_version(lines, parent_id, [])
357
for (version, parent_ids, expected_sha1, mpdiff), lines in\
358
zip(records, mpvf.get_line_list(versions)):
359
if len(parent_ids) == 1:
360
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
361
mpvf.get_diff(parent_ids[0]).num_lines()))
363
left_matching_blocks = None
365
_, _, version_text = self.add_lines_with_ghosts(version,
366
parent_ids, lines, vf_parents,
367
left_matching_blocks=left_matching_blocks)
368
except NotImplementedError:
369
# The vf can't handle ghosts, so add lines normally, which will
370
# (reasonably) fail if there are ghosts in the data.
371
_, _, version_text = self.add_lines(version,
372
parent_ids, lines, vf_parents,
373
left_matching_blocks=left_matching_blocks)
374
vf_parents[version] = version_text
375
for (version, parent_ids, expected_sha1, mpdiff), sha1 in\
376
zip(records, self.get_sha1s(versions)):
377
if expected_sha1 != sha1:
378
raise errors.VersionedFileInvalidChecksum(version)
380
def get_sha1s(self, version_ids):
381
"""Get the stored sha1 sums for the given revisions.
383
:param version_ids: The names of the versions to lookup
384
:return: a list of sha1s in order according to the version_ids
386
raise NotImplementedError(self.get_sha1s)
388
def get_text(self, version_id):
389
"""Return version contents as a text string.
391
Raises RevisionNotPresent if version is not present in
394
return ''.join(self.get_lines(version_id))
395
get_string = get_text
397
def get_texts(self, version_ids):
398
"""Return the texts of listed versions as a list of strings.
400
Raises RevisionNotPresent if version is not present in
403
return [''.join(self.get_lines(v)) for v in version_ids]
405
def get_lines(self, version_id):
406
"""Return version contents as a sequence of lines.
408
Raises RevisionNotPresent if version is not present in
411
raise NotImplementedError(self.get_lines)
413
def _get_lf_split_line_list(self, version_ids):
414
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
416
def get_ancestry(self, version_ids, topo_sorted=True):
417
"""Return a list of all ancestors of given version(s). This
418
will not include the null revision.
420
This list will not be topologically sorted if topo_sorted=False is
423
Must raise RevisionNotPresent if any of the given versions are
424
not present in file history."""
425
if isinstance(version_ids, basestring):
426
version_ids = [version_ids]
427
raise NotImplementedError(self.get_ancestry)
429
def get_ancestry_with_ghosts(self, version_ids):
430
"""Return a list of all ancestors of given version(s). This
431
will not include the null revision.
433
Must raise RevisionNotPresent if any of the given versions are
434
not present in file history.
436
Ghosts that are known about will be included in ancestry list,
437
but are not explicitly marked.
439
raise NotImplementedError(self.get_ancestry_with_ghosts)
441
def get_parent_map(self, version_ids):
442
"""Get a map of the parents of version_ids.
444
:param version_ids: The version ids to look up parents for.
445
:return: A mapping from version id to parents.
447
raise NotImplementedError(self.get_parent_map)
449
def get_parents_with_ghosts(self, version_id):
450
"""Return version names for parents of version_id.
452
Will raise RevisionNotPresent if version_id is not present
455
Ghosts that are known about will be included in the parent list,
456
but are not explicitly marked.
459
return list(self.get_parent_map([version_id])[version_id])
461
raise errors.RevisionNotPresent(version_id, self)
463
def annotate(self, version_id):
464
"""Return a list of (version-id, line) tuples for version_id.
466
:raise RevisionNotPresent: If the given version is
467
not present in file history.
469
raise NotImplementedError(self.annotate)
471
def iter_lines_added_or_present_in_versions(self, version_ids=None,
473
"""Iterate over the lines in the versioned file from version_ids.
475
This may return lines from other versions. Each item the returned
476
iterator yields is a tuple of a line and a text version that that line
477
is present in (not introduced in).
479
Ordering of results is in whatever order is most suitable for the
480
underlying storage format.
482
If a progress bar is supplied, it may be used to indicate progress.
483
The caller is responsible for cleaning up progress bars (because this
486
NOTES: Lines are normalised: they will all have \n terminators.
487
Lines are returned in arbitrary order.
489
:return: An iterator over (line, version_id).
491
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
493
def plan_merge(self, ver_a, ver_b):
494
"""Return pseudo-annotation indicating how the two versions merge.
496
This is computed between versions a and b and their common
499
Weave lines present in none of them are skipped entirely.
502
killed-base Dead in base revision
503
killed-both Killed in each revision
506
unchanged Alive in both a and b (possibly created in both)
509
ghost-a Killed in a, unborn in b
510
ghost-b Killed in b, unborn in a
511
irrelevant Not in either revision
513
raise NotImplementedError(VersionedFile.plan_merge)
515
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
516
b_marker=TextMerge.B_MARKER):
517
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
520
class RecordingVersionedFilesDecorator(object):
521
"""A minimal versioned files that records calls made on it.
523
Only enough methods have been added to support tests using it to date.
525
:ivar calls: A list of the calls made; can be reset at any time by
529
def __init__(self, backing_vf):
530
"""Create a RecordingVersionedFileDsecorator decorating backing_vf.
532
:param backing_vf: The versioned file to answer all methods.
534
self._backing_vf = backing_vf
537
def get_record_stream(self, keys, sort_order, include_delta_closure):
538
self.calls.append(("get_record_stream", keys, sort_order,
539
include_delta_closure))
540
return self._backing_vf.get_record_stream(keys, sort_order,
541
include_delta_closure)
544
class KeyMapper(object):
545
"""KeyMappers map between keys and underlying paritioned storage."""
548
"""Map key to an underlying storage identifier.
550
:param key: A key tuple e.g. ('file-id', 'revision-id').
551
:return: An underlying storage identifier, specific to the partitioning
554
raise NotImplementedError(self.map)
556
def unmap(self, partition_id):
557
"""Map a partitioned storage id back to a key prefix.
559
:param partition_id: The underlying partition id.
560
:return: As much of a key (or prefix) as is derivable from the parition
563
raise NotImplementedError(self.unmap)
566
class ConstantMapper(KeyMapper):
567
"""A key mapper that maps to a constant result."""
569
def __init__(self, result):
570
"""Create a ConstantMapper which will return result for all maps."""
571
self._result = result
574
"""See KeyMapper.map()."""
578
class URLEscapeMapper(KeyMapper):
579
"""Base class for use with transport backed storage.
581
This provides a map and unmap wrapper that respectively url escape and
582
unescape their outputs and inputs.
586
"""See KeyMapper.map()."""
587
return urllib.quote(self._map(key))
589
def unmap(self, partition_id):
590
"""See KeyMapper.unmap()."""
591
return self._unmap(urllib.unquote(partition_id))
594
class PrefixMapper(URLEscapeMapper):
595
"""A key mapper that extracts the first component of a key.
597
This mapper is for use with a transport based backend.
601
"""See KeyMapper.map()."""
604
def _unmap(self, partition_id):
605
"""See KeyMapper.unmap()."""
606
return (partition_id,)
609
class HashPrefixMapper(URLEscapeMapper):
610
"""A key mapper that combines the first component of a key with a hash.
612
This mapper is for use with a transport based backend.
616
"""See KeyMapper.map()."""
617
prefix = self._escape(key[0])
618
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
620
def _escape(self, prefix):
621
"""No escaping needed here."""
624
def _unmap(self, partition_id):
625
"""See KeyMapper.unmap()."""
626
return (self._unescape(osutils.basename(partition_id)),)
628
def _unescape(self, basename):
629
"""No unescaping needed for HashPrefixMapper."""
633
class HashEscapedPrefixMapper(HashPrefixMapper):
634
"""Combines the escaped first component of a key with a hash.
636
This mapper is for use with a transport based backend.
639
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
641
def _escape(self, prefix):
642
"""Turn a key element into a filesystem safe string.
644
This is similar to a plain urllib.quote, except
645
it uses specific safe characters, so that it doesn't
646
have to translate a lot of valid file ids.
648
# @ does not get escaped. This is because it is a valid
649
# filesystem character we use all the time, and it looks
650
# a lot better than seeing %40 all the time.
651
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
655
def _unescape(self, basename):
656
"""Escaped names are easily unescaped by urlutils."""
657
return urllib.unquote(basename)
660
def make_versioned_files_factory(versioned_file_factory, mapper):
661
"""Create a ThunkedVersionedFiles factory.
663
This will create a callable which when called creates a
664
ThunkedVersionedFiles on a transport, using mapper to access individual
665
versioned files, and versioned_file_factory to create each individual file.
667
def factory(transport):
668
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
673
class VersionedFiles(object):
674
"""Storage for many versioned files.
676
This object allows a single keyspace for accessing the history graph and
677
contents of named bytestrings.
679
Currently no implementation allows the graph of different key prefixes to
680
intersect, but the API does allow such implementations in the future.
683
def add_lines(self, key, parents, lines, parent_texts=None,
684
left_matching_blocks=None, nostore_sha=None, random_id=False,
686
"""Add a text to the store.
688
:param key: The key tuple of the text to add.
689
:param parents: The parents key tuples of the text to add.
690
:param lines: A list of lines. Each line must be a bytestring. And all
691
of them except the last must be terminated with \n and contain no
692
other \n's. The last line may either contain no \n's or a single
693
terminating \n. If the lines list does meet this constraint the add
694
routine may error or may succeed - but you will be unable to read
695
the data back accurately. (Checking the lines have been split
696
correctly is expensive and extremely unlikely to catch bugs so it
697
is not done at runtime unless check_content is True.)
698
:param parent_texts: An optional dictionary containing the opaque
699
representations of some or all of the parents of version_id to
700
allow delta optimisations. VERY IMPORTANT: the texts must be those
701
returned by add_lines or data corruption can be caused.
702
:param left_matching_blocks: a hint about which areas are common
703
between the text and its left-hand-parent. The format is
704
the SequenceMatcher.get_matching_blocks format.
705
:param nostore_sha: Raise ExistingContent and do not add the lines to
706
the versioned file if the digest of the lines matches this.
707
:param random_id: If True a random id has been selected rather than
708
an id determined by some deterministic process such as a converter
709
from a foreign VCS. When True the backend may choose not to check
710
for uniqueness of the resulting key within the versioned file, so
711
this should only be done when the result is expected to be unique
713
:param check_content: If True, the lines supplied are verified to be
714
bytestrings that are correctly formed lines.
715
:return: The text sha1, the number of bytes in the text, and an opaque
716
representation of the inserted version which can be provided
717
back to future add_lines calls in the parent_texts dictionary.
719
raise NotImplementedError(self.add_lines)
721
def add_mpdiffs(self, records):
722
"""Add mpdiffs to this VersionedFile.
724
Records should be iterables of version, parents, expected_sha1,
725
mpdiff. mpdiff should be a MultiParent instance.
728
mpvf = multiparent.MultiMemoryVersionedFile()
730
for version, parent_ids, expected_sha1, mpdiff in records:
731
versions.append(version)
732
mpvf.add_diff(mpdiff, version, parent_ids)
733
needed_parents = set()
734
for version, parent_ids, expected_sha1, mpdiff in records:
735
needed_parents.update(p for p in parent_ids
736
if not mpvf.has_version(p))
737
# It seems likely that adding all the present parents as fulltexts can
738
# easily exhaust memory.
739
present_parents = set(self.get_parent_map(needed_parents).keys())
740
split_lines = osutils.split_lines
741
for record in self.get_record_stream(present_parents, 'unordered',
743
mpvf.add_version(split_lines(record.get_bytes_as('fulltext')),
745
for (key, parent_keys, expected_sha1, mpdiff), lines in\
746
zip(records, mpvf.get_line_list(versions)):
747
if len(parent_keys) == 1:
748
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
749
mpvf.get_diff(parent_keys[0]).num_lines()))
751
left_matching_blocks = None
752
version_sha1, _, version_text = self.add_lines(key,
753
parent_keys, lines, vf_parents,
754
left_matching_blocks=left_matching_blocks)
755
if version_sha1 != expected_sha1:
756
raise errors.VersionedFileInvalidChecksum(version)
757
vf_parents[key] = version_text
759
def annotate(self, key):
760
"""Return a list of (version-key, line) tuples for the text of key.
762
:raise RevisionNotPresent: If the key is not present.
764
raise NotImplementedError(self.annotate)
766
def check(self, progress_bar=None):
767
"""Check this object for integrity."""
768
raise NotImplementedError(self.check)
771
def check_not_reserved_id(version_id):
772
revision.check_not_reserved_id(version_id)
774
def _check_lines_not_unicode(self, lines):
775
"""Check that lines being added to a versioned file are not unicode."""
777
if line.__class__ is not str:
778
raise errors.BzrBadParameterUnicode("lines")
780
def _check_lines_are_lines(self, lines):
781
"""Check that the lines really are full lines without inline EOL."""
783
if '\n' in line[:-1]:
784
raise errors.BzrBadParameterContainsNewline("lines")
786
def get_parent_map(self, keys):
787
"""Get a map of the parents of keys.
789
:param keys: The keys to look up parents for.
790
:return: A mapping from keys to parents. Absent keys are absent from
793
raise NotImplementedError(self.get_parent_map)
795
def get_record_stream(self, keys, ordering, include_delta_closure):
796
"""Get a stream of records for keys.
798
:param keys: The keys to include.
799
:param ordering: Either 'unordered' or 'topological'. A topologically
800
sorted stream has compression parents strictly before their
802
:param include_delta_closure: If True then the closure across any
803
compression parents will be included (in the opaque data).
804
:return: An iterator of ContentFactory objects, each of which is only
805
valid until the iterator is advanced.
807
raise NotImplementedError(self.get_record_stream)
809
def get_sha1s(self, keys):
810
"""Get the sha1's of the texts for the given keys.
812
:param keys: The names of the keys to lookup
813
:return: a list of sha1s matching keys.
815
raise NotImplementedError(self.get_sha1s)
817
def insert_record_stream(self, stream):
818
"""Insert a record stream into this container.
820
:param stream: A stream of records to insert.
822
:seealso VersionedFile.get_record_stream:
824
raise NotImplementedError
826
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
827
"""Iterate over the lines in the versioned files from keys.
829
This may return lines from other keys. Each item the returned
830
iterator yields is a tuple of a line and a text version that that line
831
is present in (not introduced in).
833
Ordering of results is in whatever order is most suitable for the
834
underlying storage format.
836
If a progress bar is supplied, it may be used to indicate progress.
837
The caller is responsible for cleaning up progress bars (because this
841
* Lines are normalised by the underlying store: they will all have \n
843
* Lines are returned in arbitrary order.
845
:return: An iterator over (line, key).
847
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
850
"""Return a iterable of the keys for all the contained texts."""
851
raise NotImplementedError(self.keys)
853
def make_mpdiffs(self, keys):
854
"""Create multiparent diffs for specified keys."""
855
keys_order = tuple(keys)
856
keys = frozenset(keys)
857
knit_keys = set(keys)
858
parent_map = self.get_parent_map(keys)
859
for parent_keys in parent_map.itervalues():
861
knit_keys.update(parent_keys)
862
missing_keys = keys - set(parent_map)
864
raise errors.RevisionNotPresent(missing_keys.pop(), self)
865
# We need to filter out ghosts, because we can't diff against them.
866
maybe_ghosts = knit_keys - keys
867
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
868
knit_keys.difference_update(ghosts)
870
split_lines = osutils.split_lines
871
for record in self.get_record_stream(knit_keys, 'topological', True):
872
lines[record.key] = split_lines(record.get_bytes_as('fulltext'))
873
# line_block_dict = {}
874
# for parent, blocks in record.extract_line_blocks():
875
# line_blocks[parent] = blocks
876
# line_blocks[record.key] = line_block_dict
878
for key in keys_order:
880
parents = parent_map[key] or []
881
# Note that filtering knit_keys can lead to a parent difference
882
# between the creation and the application of the mpdiff.
883
parent_lines = [lines[p] for p in parents if p in knit_keys]
884
if len(parent_lines) > 0:
885
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
888
left_parent_blocks = None
889
diffs.append(multiparent.MultiParent.from_lines(target,
890
parent_lines, left_parent_blocks))
893
def _extract_blocks(self, version_id, source, target):
897
class ThunkedVersionedFiles(VersionedFiles):
898
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
900
This object allows a single keyspace for accessing the history graph and
901
contents of named bytestrings.
903
Currently no implementation allows the graph of different key prefixes to
904
intersect, but the API does allow such implementations in the future.
907
def __init__(self, transport, file_factory, mapper, is_locked):
908
"""Create a ThunkedVersionedFiles."""
909
self._transport = transport
910
self._file_factory = file_factory
911
self._mapper = mapper
912
self._is_locked = is_locked
914
def add_lines(self, key, parents, lines, parent_texts=None,
915
left_matching_blocks=None, nostore_sha=None, random_id=False,
917
"""See VersionedFiles.add_lines()."""
918
path = self._mapper.map(key)
920
parents = [parent[-1] for parent in parents]
921
vf = self._get_vf(path)
924
return vf.add_lines_with_ghosts(version_id, parents, lines,
925
parent_texts=parent_texts,
926
left_matching_blocks=left_matching_blocks,
927
nostore_sha=nostore_sha, random_id=random_id,
928
check_content=check_content)
929
except NotImplementedError:
930
return vf.add_lines(version_id, parents, lines,
931
parent_texts=parent_texts,
932
left_matching_blocks=left_matching_blocks,
933
nostore_sha=nostore_sha, random_id=random_id,
934
check_content=check_content)
935
except errors.NoSuchFile:
936
# parent directory may be missing, try again.
937
self._transport.mkdir(osutils.dirname(path))
939
return vf.add_lines_with_ghosts(version_id, parents, lines,
940
parent_texts=parent_texts,
941
left_matching_blocks=left_matching_blocks,
942
nostore_sha=nostore_sha, random_id=random_id,
943
check_content=check_content)
944
except NotImplementedError:
945
return vf.add_lines(version_id, parents, lines,
946
parent_texts=parent_texts,
947
left_matching_blocks=left_matching_blocks,
948
nostore_sha=nostore_sha, random_id=random_id,
949
check_content=check_content)
951
def annotate(self, key):
952
"""Return a list of (version-key, line) tuples for the text of key.
954
:raise RevisionNotPresent: If the key is not present.
957
path = self._mapper.map(prefix)
958
vf = self._get_vf(path)
959
origins = vf.annotate(key[-1])
961
for origin, line in origins:
962
result.append((prefix + (origin,), line))
965
def check(self, progress_bar=None):
966
"""See VersionedFiles.check()."""
967
for prefix, vf in self._iter_all_components():
970
def get_parent_map(self, keys):
971
"""Get a map of the parents of keys.
973
:param keys: The keys to look up parents for.
974
:return: A mapping from keys to parents. Absent keys are absent from
977
prefixes = self._partition_keys(keys)
979
for prefix, suffixes in prefixes.items():
980
path = self._mapper.map(prefix)
981
vf = self._get_vf(path)
982
parent_map = vf.get_parent_map(suffixes)
983
for key, parents in parent_map.items():
984
result[prefix + (key,)] = tuple(
985
prefix + (parent,) for parent in parents)
988
def _get_vf(self, path):
989
if not self._is_locked():
990
raise errors.ObjectNotLocked(self)
991
return self._file_factory(path, self._transport, create=True,
992
get_scope=lambda:None)
994
def _partition_keys(self, keys):
995
"""Turn keys into a dict of prefix:suffix_list."""
998
prefix_keys = result.setdefault(key[:-1], [])
999
prefix_keys.append(key[-1])
1002
def _get_all_prefixes(self):
1003
# Identify all key prefixes.
1004
# XXX: A bit hacky, needs polish.
1005
if type(self._mapper) == ConstantMapper:
1006
paths = [self._mapper.map(())]
1010
for quoted_relpath in self._transport.iter_files_recursive():
1011
path, ext = os.path.splitext(quoted_relpath)
1013
paths = list(relpaths)
1014
prefixes = [self._mapper.unmap(path) for path in paths]
1015
return zip(paths, prefixes)
1017
def get_record_stream(self, keys, ordering, include_delta_closure):
1018
"""See VersionedFiles.get_record_stream()."""
1019
# Ordering will be taken care of by each partitioned store; group keys
1022
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1023
suffixes = [(suffix,) for suffix in suffixes]
1024
for record in vf.get_record_stream(suffixes, ordering,
1025
include_delta_closure):
1026
if record.parents is not None:
1027
record.parents = tuple(
1028
prefix + parent for parent in record.parents)
1029
record.key = prefix + record.key
1032
def _iter_keys_vf(self, keys):
1033
prefixes = self._partition_keys(keys)
1035
for prefix, suffixes in prefixes.items():
1036
path = self._mapper.map(prefix)
1037
vf = self._get_vf(path)
1038
yield prefix, suffixes, vf
1040
def get_sha1s(self, keys):
1041
"""See VersionedFiles.get_sha1s()."""
1043
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1044
vf_sha1s = vf.get_sha1s(suffixes)
1045
for suffix, sha1 in zip(suffixes, vf.get_sha1s(suffixes)):
1046
sha1s[prefix + (suffix,)] = sha1
1047
return [sha1s[key] for key in keys]
1049
def insert_record_stream(self, stream):
1050
"""Insert a record stream into this container.
1052
:param stream: A stream of records to insert.
1054
:seealso VersionedFile.get_record_stream:
1056
for record in stream:
1057
prefix = record.key[:-1]
1058
key = record.key[-1:]
1059
if record.parents is not None:
1060
parents = [parent[-1:] for parent in record.parents]
1063
thunk_record = AdapterFactory(key, parents, record)
1064
path = self._mapper.map(prefix)
1065
# Note that this parses the file many times; we can do better but
1066
# as this only impacts weaves in terms of performance, it is
1068
vf = self._get_vf(path)
1069
vf.insert_record_stream([thunk_record])
1071
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1072
"""Iterate over the lines in the versioned files from keys.
1074
This may return lines from other keys. Each item the returned
1075
iterator yields is a tuple of a line and a text version that that line
1076
is present in (not introduced in).
1078
Ordering of results is in whatever order is most suitable for the
1079
underlying storage format.
1081
If a progress bar is supplied, it may be used to indicate progress.
1082
The caller is responsible for cleaning up progress bars (because this
1086
* Lines are normalised by the underlying store: they will all have \n
1088
* Lines are returned in arbitrary order.
1090
:return: An iterator over (line, key).
1092
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1093
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1094
yield line, prefix + (version,)
1096
def _iter_all_components(self):
1097
for path, prefix in self._get_all_prefixes():
1098
yield prefix, self._get_vf(path)
1101
"""See VersionedFiles.keys()."""
1103
for prefix, vf in self._iter_all_components():
1104
for suffix in vf.versions():
1105
result.add(prefix + (suffix,))
1109
class _PlanMergeVersionedFile(VersionedFiles):
1110
"""A VersionedFile for uncommitted and committed texts.
1112
It is intended to allow merges to be planned with working tree texts.
1113
It implements only the small part of the VersionedFiles interface used by
1114
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1115
_PlanMergeVersionedFile itself.
1117
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1118
queried for missing texts.
1121
def __init__(self, file_id):
1122
"""Create a _PlanMergeVersionedFile.
1124
:param file_id: Used with _PlanMerge code which is not yet fully
1125
tuple-keyspace aware.
1127
self._file_id = file_id
1128
# fallback locations
1129
self.fallback_versionedfiles = []
1130
# Parents for locally held keys.
1132
# line data for locally held keys.
1134
# key lookup providers
1135
self._providers = [DictParentsProvider(self._parents)]
1137
def plan_merge(self, ver_a, ver_b, base=None):
1138
"""See VersionedFile.plan_merge"""
1139
from bzrlib.merge import _PlanMerge
1141
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1142
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1143
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1144
return _PlanMerge._subtract_plans(old_plan, new_plan)
1146
def plan_lca_merge(self, ver_a, ver_b, base=None):
1147
from bzrlib.merge import _PlanLCAMerge
1149
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1152
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1153
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1155
def add_lines(self, key, parents, lines):
1156
"""See VersionedFiles.add_lines
1158
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1159
are permitted. Only reserved ids are permitted.
1161
if type(key) != tuple:
1162
import pdb;pdb.set_trace()
1163
if not revision.is_reserved_id(key[-1]):
1164
raise ValueError('Only reserved ids may be used')
1166
raise ValueError('Parents may not be None')
1168
raise ValueError('Lines may not be None')
1169
self._parents[key] = tuple(parents)
1170
self._lines[key] = lines
1172
def get_record_stream(self, keys, ordering, include_delta_closure):
1175
if key in self._lines:
1176
lines = self._lines[key]
1177
parents = self._parents[key]
1179
yield FulltextContentFactory(key, parents, None,
1181
for versionedfile in self.fallback_versionedfiles:
1182
for record in versionedfile.get_record_stream(
1183
pending, 'unordered', True):
1184
if record.storage_kind == 'absent':
1187
pending.remove(record.key)
1191
# report absent entries
1193
yield AbsentContentFactory(key)
1195
def get_parent_map(self, keys):
1196
"""See VersionedFiles.get_parent_map"""
1197
# We create a new provider because a fallback may have been added.
1198
# If we make fallbacks private we can update a stack list and avoid
1199
# object creation thrashing.
1200
self._providers = self._providers[:1] + self.fallback_versionedfiles
1201
result = _StackedParentsProvider(self._providers).get_parent_map(keys)
1202
for key, parents in result.iteritems():
1204
result[key] = (revision.NULL_REVISION,)
1208
class PlanWeaveMerge(TextMerge):
1209
"""Weave merge that takes a plan as its input.
1211
This exists so that VersionedFile.plan_merge is implementable.
1212
Most callers will want to use WeaveMerge instead.
1215
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1216
b_marker=TextMerge.B_MARKER):
1217
TextMerge.__init__(self, a_marker, b_marker)
1220
def _merge_struct(self):
1225
def outstanding_struct():
1226
if not lines_a and not lines_b:
1228
elif ch_a and not ch_b:
1231
elif ch_b and not ch_a:
1233
elif lines_a == lines_b:
1236
yield (lines_a, lines_b)
1238
# We previously considered either 'unchanged' or 'killed-both' lines
1239
# to be possible places to resynchronize. However, assuming agreement
1240
# on killed-both lines may be too aggressive. -- mbp 20060324
1241
for state, line in self.plan:
1242
if state == 'unchanged':
1243
# resync and flush queued conflicts changes if any
1244
for struct in outstanding_struct():
1250
if state == 'unchanged':
1253
elif state == 'killed-a':
1255
lines_b.append(line)
1256
elif state == 'killed-b':
1258
lines_a.append(line)
1259
elif state == 'new-a':
1261
lines_a.append(line)
1262
elif state == 'new-b':
1264
lines_b.append(line)
1265
elif state == 'conflicted-a':
1267
lines_a.append(line)
1268
elif state == 'conflicted-b':
1270
lines_b.append(line)
1272
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1273
'killed-base', 'killed-both'):
1274
raise AssertionError(state)
1275
for struct in outstanding_struct():
1279
class WeaveMerge(PlanWeaveMerge):
1280
"""Weave merge that takes a VersionedFile and two versions as its input."""
1282
def __init__(self, versionedfile, ver_a, ver_b,
1283
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1284
plan = versionedfile.plan_merge(ver_a, ver_b)
1285
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)