1
# Copyright (C) 2005, 2006 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
"""Versioned text file storage api."""
22
from cStringIO import StringIO
25
from zlib import adler32
27
from bzrlib.lazy_import import lazy_import
28
lazy_import(globals(), """
38
from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider
39
from bzrlib.transport.memory import MemoryTransport
41
from bzrlib.inter import InterObject
42
from bzrlib.registry import Registry
43
from bzrlib.symbol_versioning import *
44
from bzrlib.textmerge import TextMerge
47
adapter_registry = Registry()
48
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
49
'DeltaPlainToFullText')
50
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
52
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
53
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
54
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
55
'bzrlib.knit', 'DeltaAnnotatedToFullText')
56
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
57
'bzrlib.knit', 'FTAnnotatedToUnannotated')
58
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
59
'bzrlib.knit', 'FTAnnotatedToFullText')
62
class ContentFactory(object):
63
"""Abstract interface for insertion and retrieval from a VersionedFile.
65
:ivar sha1: None, or the sha1 of the content fulltext.
66
:ivar storage_kind: The native storage kind of this factory. One of
67
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
68
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
69
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
70
:ivar key: The key of this content. Each key is a tuple with a single
72
:ivar parents: A tuple of parent keys for self.key. If the object has
73
no parent information, None (as opposed to () for an empty list of
78
"""Create a ContentFactory."""
80
self.storage_kind = None
85
class FulltextContentFactory(ContentFactory):
86
"""Static data content factory.
88
This takes a fulltext when created and just returns that during
89
get_bytes_as('fulltext').
91
:ivar sha1: None, or the sha1 of the content fulltext.
92
:ivar storage_kind: The native storage kind of this factory. Always
94
:ivar key: The key of this content. Each key is a tuple with a single
96
:ivar parents: A tuple of parent keys for self.key. If the object has
97
no parent information, None (as opposed to () for an empty list of
101
def __init__(self, key, parents, sha1, text):
102
"""Create a ContentFactory."""
104
self.storage_kind = 'fulltext'
106
self.parents = parents
109
def get_bytes_as(self, storage_kind):
110
if storage_kind == self.storage_kind:
112
raise errors.UnavailableRepresentation(self.key, storage_kind,
116
class AbsentContentFactory(ContentFactory):
117
"""A placeholder content factory for unavailable texts.
120
:ivar storage_kind: 'absent'.
121
:ivar key: The key of this content. Each key is a tuple with a single
126
def __init__(self, key):
127
"""Create a ContentFactory."""
129
self.storage_kind = 'absent'
134
class AdapterFactory(ContentFactory):
135
"""A content factory to adapt between key prefix's."""
137
def __init__(self, key, parents, adapted):
138
"""Create an adapter factory instance."""
140
self.parents = parents
141
self._adapted = adapted
143
def __getattr__(self, attr):
144
"""Return a member from the adapted object."""
145
if attr in ('key', 'parents'):
146
return self.__dict__[attr]
148
return getattr(self._adapted, attr)
151
def filter_absent(record_stream):
152
"""Adapt a record stream to remove absent records."""
153
for record in record_stream:
154
if record.storage_kind != 'absent':
158
class VersionedFile(object):
159
"""Versioned text file storage.
161
A versioned file manages versions of line-based text files,
162
keeping track of the originating version for each line.
164
To clients the "lines" of the file are represented as a list of
165
strings. These strings will typically have terminal newline
166
characters, but this is not required. In particular files commonly
167
do not have a newline at the end of the file.
169
Texts are identified by a version-id string.
173
def check_not_reserved_id(version_id):
174
revision.check_not_reserved_id(version_id)
176
def copy_to(self, name, transport):
177
"""Copy this versioned file to name on transport."""
178
raise NotImplementedError(self.copy_to)
180
def get_record_stream(self, versions, ordering, include_delta_closure):
181
"""Get a stream of records for versions.
183
:param versions: The versions to include. Each version is a tuple
185
:param ordering: Either 'unordered' or 'topological'. A topologically
186
sorted stream has compression parents strictly before their
188
:param include_delta_closure: If True then the closure across any
189
compression parents will be included (in the data content of the
190
stream, not in the emitted records). This guarantees that
191
'fulltext' can be used successfully on every record.
192
:return: An iterator of ContentFactory objects, each of which is only
193
valid until the iterator is advanced.
195
raise NotImplementedError(self.get_record_stream)
197
def has_version(self, version_id):
198
"""Returns whether version is present."""
199
raise NotImplementedError(self.has_version)
201
def insert_record_stream(self, stream):
202
"""Insert a record stream into this versioned file.
204
:param stream: A stream of records to insert.
206
:seealso VersionedFile.get_record_stream:
208
raise NotImplementedError
210
def add_lines(self, version_id, parents, lines, parent_texts=None,
211
left_matching_blocks=None, nostore_sha=None, random_id=False,
213
"""Add a single text on top of the versioned file.
215
Must raise RevisionAlreadyPresent if the new version is
216
already present in file history.
218
Must raise RevisionNotPresent if any of the given parents are
219
not present in file history.
221
:param lines: A list of lines. Each line must be a bytestring. And all
222
of them except the last must be terminated with \n and contain no
223
other \n's. The last line may either contain no \n's or a single
224
terminated \n. If the lines list does meet this constraint the add
225
routine may error or may succeed - but you will be unable to read
226
the data back accurately. (Checking the lines have been split
227
correctly is expensive and extremely unlikely to catch bugs so it
228
is not done at runtime unless check_content is True.)
229
:param parent_texts: An optional dictionary containing the opaque
230
representations of some or all of the parents of version_id to
231
allow delta optimisations. VERY IMPORTANT: the texts must be those
232
returned by add_lines or data corruption can be caused.
233
:param left_matching_blocks: a hint about which areas are common
234
between the text and its left-hand-parent. The format is
235
the SequenceMatcher.get_matching_blocks format.
236
:param nostore_sha: Raise ExistingContent and do not add the lines to
237
the versioned file if the digest of the lines matches this.
238
:param random_id: If True a random id has been selected rather than
239
an id determined by some deterministic process such as a converter
240
from a foreign VCS. When True the backend may choose not to check
241
for uniqueness of the resulting key within the versioned file, so
242
this should only be done when the result is expected to be unique
244
:param check_content: If True, the lines supplied are verified to be
245
bytestrings that are correctly formed lines.
246
:return: The text sha1, the number of bytes in the text, and an opaque
247
representation of the inserted version which can be provided
248
back to future add_lines calls in the parent_texts dictionary.
250
self._check_write_ok()
251
return self._add_lines(version_id, parents, lines, parent_texts,
252
left_matching_blocks, nostore_sha, random_id, check_content)
254
def _add_lines(self, version_id, parents, lines, parent_texts,
255
left_matching_blocks, nostore_sha, random_id, check_content):
256
"""Helper to do the class specific add_lines."""
257
raise NotImplementedError(self.add_lines)
259
def add_lines_with_ghosts(self, version_id, parents, lines,
260
parent_texts=None, nostore_sha=None, random_id=False,
261
check_content=True, left_matching_blocks=None):
262
"""Add lines to the versioned file, allowing ghosts to be present.
264
This takes the same parameters as add_lines and returns the same.
266
self._check_write_ok()
267
return self._add_lines_with_ghosts(version_id, parents, lines,
268
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
270
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
271
nostore_sha, random_id, check_content, left_matching_blocks):
272
"""Helper to do class specific add_lines_with_ghosts."""
273
raise NotImplementedError(self.add_lines_with_ghosts)
275
def check(self, progress_bar=None):
276
"""Check the versioned file for integrity."""
277
raise NotImplementedError(self.check)
279
def _check_lines_not_unicode(self, lines):
280
"""Check that lines being added to a versioned file are not unicode."""
282
if line.__class__ is not str:
283
raise errors.BzrBadParameterUnicode("lines")
285
def _check_lines_are_lines(self, lines):
286
"""Check that the lines really are full lines without inline EOL."""
288
if '\n' in line[:-1]:
289
raise errors.BzrBadParameterContainsNewline("lines")
291
def get_format_signature(self):
292
"""Get a text description of the data encoding in this file.
296
raise NotImplementedError(self.get_format_signature)
298
def make_mpdiffs(self, version_ids):
299
"""Create multiparent diffs for specified versions."""
300
knit_versions = set()
301
knit_versions.update(version_ids)
302
parent_map = self.get_parent_map(version_ids)
303
for version_id in version_ids:
305
knit_versions.update(parent_map[version_id])
307
raise RevisionNotPresent(version_id, self)
308
# We need to filter out ghosts, because we can't diff against them.
309
knit_versions = set(self.get_parent_map(knit_versions).keys())
310
lines = dict(zip(knit_versions,
311
self._get_lf_split_line_list(knit_versions)))
313
for version_id in version_ids:
314
target = lines[version_id]
316
parents = [lines[p] for p in parent_map[version_id] if p in
319
raise RevisionNotPresent(version_id, self)
321
left_parent_blocks = self._extract_blocks(version_id,
324
left_parent_blocks = None
325
diffs.append(multiparent.MultiParent.from_lines(target, parents,
329
def _extract_blocks(self, version_id, source, target):
332
def add_mpdiffs(self, records):
333
"""Add mpdiffs to this VersionedFile.
335
Records should be iterables of version, parents, expected_sha1,
336
mpdiff. mpdiff should be a MultiParent instance.
338
# Does this need to call self._check_write_ok()? (IanC 20070919)
340
mpvf = multiparent.MultiMemoryVersionedFile()
342
for version, parent_ids, expected_sha1, mpdiff in records:
343
versions.append(version)
344
mpvf.add_diff(mpdiff, version, parent_ids)
345
needed_parents = set()
346
for version, parent_ids, expected_sha1, mpdiff in records:
347
needed_parents.update(p for p in parent_ids
348
if not mpvf.has_version(p))
349
present_parents = set(self.get_parent_map(needed_parents).keys())
350
for parent_id, lines in zip(present_parents,
351
self._get_lf_split_line_list(present_parents)):
352
mpvf.add_version(lines, parent_id, [])
353
for (version, parent_ids, expected_sha1, mpdiff), lines in\
354
zip(records, mpvf.get_line_list(versions)):
355
if len(parent_ids) == 1:
356
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
357
mpvf.get_diff(parent_ids[0]).num_lines()))
359
left_matching_blocks = None
361
_, _, version_text = self.add_lines_with_ghosts(version,
362
parent_ids, lines, vf_parents,
363
left_matching_blocks=left_matching_blocks)
364
except NotImplementedError:
365
# The vf can't handle ghosts, so add lines normally, which will
366
# (reasonably) fail if there are ghosts in the data.
367
_, _, version_text = self.add_lines(version,
368
parent_ids, lines, vf_parents,
369
left_matching_blocks=left_matching_blocks)
370
vf_parents[version] = version_text
371
for (version, parent_ids, expected_sha1, mpdiff), sha1 in\
372
zip(records, self.get_sha1s(versions)):
373
if expected_sha1 != sha1:
374
raise errors.VersionedFileInvalidChecksum(version)
376
def get_sha1s(self, version_ids):
377
"""Get the stored sha1 sums for the given revisions.
379
:param version_ids: The names of the versions to lookup
380
:return: a list of sha1s in order according to the version_ids
382
raise NotImplementedError(self.get_sha1s)
384
def get_text(self, version_id):
385
"""Return version contents as a text string.
387
Raises RevisionNotPresent if version is not present in
390
return ''.join(self.get_lines(version_id))
391
get_string = get_text
393
def get_texts(self, version_ids):
394
"""Return the texts of listed versions as a list of strings.
396
Raises RevisionNotPresent if version is not present in
399
return [''.join(self.get_lines(v)) for v in version_ids]
401
def get_lines(self, version_id):
402
"""Return version contents as a sequence of lines.
404
Raises RevisionNotPresent if version is not present in
407
raise NotImplementedError(self.get_lines)
409
def _get_lf_split_line_list(self, version_ids):
410
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
412
def get_ancestry(self, version_ids, topo_sorted=True):
413
"""Return a list of all ancestors of given version(s). This
414
will not include the null revision.
416
This list will not be topologically sorted if topo_sorted=False is
419
Must raise RevisionNotPresent if any of the given versions are
420
not present in file history."""
421
if isinstance(version_ids, basestring):
422
version_ids = [version_ids]
423
raise NotImplementedError(self.get_ancestry)
425
def get_ancestry_with_ghosts(self, version_ids):
426
"""Return a list of all ancestors of given version(s). This
427
will not include the null revision.
429
Must raise RevisionNotPresent if any of the given versions are
430
not present in file history.
432
Ghosts that are known about will be included in ancestry list,
433
but are not explicitly marked.
435
raise NotImplementedError(self.get_ancestry_with_ghosts)
437
def get_parent_map(self, version_ids):
438
"""Get a map of the parents of version_ids.
440
:param version_ids: The version ids to look up parents for.
441
:return: A mapping from version id to parents.
443
raise NotImplementedError(self.get_parent_map)
445
def get_parents_with_ghosts(self, version_id):
446
"""Return version names for parents of version_id.
448
Will raise RevisionNotPresent if version_id is not present
451
Ghosts that are known about will be included in the parent list,
452
but are not explicitly marked.
455
return list(self.get_parent_map([version_id])[version_id])
457
raise errors.RevisionNotPresent(version_id, self)
459
def annotate(self, version_id):
460
"""Return a list of (version-id, line) tuples for version_id.
462
:raise RevisionNotPresent: If the given version is
463
not present in file history.
465
raise NotImplementedError(self.annotate)
467
def iter_lines_added_or_present_in_versions(self, version_ids=None,
469
"""Iterate over the lines in the versioned file from version_ids.
471
This may return lines from other versions. Each item the returned
472
iterator yields is a tuple of a line and a text version that that line
473
is present in (not introduced in).
475
Ordering of results is in whatever order is most suitable for the
476
underlying storage format.
478
If a progress bar is supplied, it may be used to indicate progress.
479
The caller is responsible for cleaning up progress bars (because this
482
NOTES: Lines are normalised: they will all have \n terminators.
483
Lines are returned in arbitrary order.
485
:return: An iterator over (line, version_id).
487
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
489
def plan_merge(self, ver_a, ver_b):
490
"""Return pseudo-annotation indicating how the two versions merge.
492
This is computed between versions a and b and their common
495
Weave lines present in none of them are skipped entirely.
498
killed-base Dead in base revision
499
killed-both Killed in each revision
502
unchanged Alive in both a and b (possibly created in both)
505
ghost-a Killed in a, unborn in b
506
ghost-b Killed in b, unborn in a
507
irrelevant Not in either revision
509
raise NotImplementedError(VersionedFile.plan_merge)
511
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
512
b_marker=TextMerge.B_MARKER):
513
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
516
class RecordingVersionedFilesDecorator(object):
517
"""A minimal versioned files that records calls made on it.
519
Only enough methods have been added to support tests using it to date.
521
:ivar calls: A list of the calls made; can be reset at any time by
525
def __init__(self, backing_vf):
526
"""Create a RecordingVersionedFileDsecorator decorating backing_vf.
528
:param backing_vf: The versioned file to answer all methods.
530
self._backing_vf = backing_vf
533
def get_record_stream(self, keys, sort_order, include_delta_closure):
534
self.calls.append(("get_record_stream", keys, sort_order,
535
include_delta_closure))
536
return self._backing_vf.get_record_stream(keys, sort_order,
537
include_delta_closure)
540
class KeyMapper(object):
541
"""KeyMappers map between keys and underlying paritioned storage."""
544
"""Map key to an underlying storage identifier.
546
:param key: A key tuple e.g. ('file-id', 'revision-id').
547
:return: An underlying storage identifier, specific to the partitioning
550
raise NotImplementedError(self.map)
552
def unmap(self, partition_id):
553
"""Map a partitioned storage id back to a key prefix.
555
:param partition_id: The underlying partition id.
556
:return: As much of a key (or prefix) as is derivable from the parition
559
raise NotImplementedError(self.unmap)
562
class ConstantMapper(KeyMapper):
563
"""A key mapper that maps to a constant result."""
565
def __init__(self, result):
566
"""Create a ConstantMapper which will return result for all maps."""
567
self._result = result
570
"""See KeyMapper.map()."""
574
class URLEscapeMapper(KeyMapper):
575
"""Base class for use with transport backed storage.
577
This provides a map and unmap wrapper that respectively url escape and
578
unescape their outputs and inputs.
582
"""See KeyMapper.map()."""
583
return urllib.quote(self._map(key))
585
def unmap(self, partition_id):
586
"""See KeyMapper.unmap()."""
587
return self._unmap(urllib.unquote(partition_id))
590
class PrefixMapper(URLEscapeMapper):
591
"""A key mapper that extracts the first component of a key.
593
This mapper is for use with a transport based backend.
597
"""See KeyMapper.map()."""
600
def _unmap(self, partition_id):
601
"""See KeyMapper.unmap()."""
602
return (partition_id,)
605
class HashPrefixMapper(URLEscapeMapper):
606
"""A key mapper that combines the first component of a key with a hash.
608
This mapper is for use with a transport based backend.
612
"""See KeyMapper.map()."""
613
prefix = self._escape(key[0])
614
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
616
def _escape(self, prefix):
617
"""No escaping needed here."""
620
def _unmap(self, partition_id):
621
"""See KeyMapper.unmap()."""
622
return (self._unescape(osutils.basename(partition_id)),)
624
def _unescape(self, basename):
625
"""No unescaping needed for HashPrefixMapper."""
629
class HashEscapedPrefixMapper(HashPrefixMapper):
630
"""Combines the escaped first component of a key with a hash.
632
This mapper is for use with a transport based backend.
635
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
637
def _escape(self, prefix):
638
"""Turn a key element into a filesystem safe string.
640
This is similar to a plain urllib.quote, except
641
it uses specific safe characters, so that it doesn't
642
have to translate a lot of valid file ids.
644
# @ does not get escaped. This is because it is a valid
645
# filesystem character we use all the time, and it looks
646
# a lot better than seeing %40 all the time.
647
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
651
def _unescape(self, basename):
652
"""Escaped names are easily unescaped by urlutils."""
653
return urllib.unquote(basename)
656
def make_versioned_files_factory(versioned_file_factory, mapper):
657
"""Create a ThunkedVersionedFiles factory.
659
This will create a callable which when called creates a
660
ThunkedVersionedFiles on a transport, using mapper to access individual
661
versioned files, and versioned_file_factory to create each individual file.
663
def factory(transport):
664
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
669
class VersionedFiles(object):
670
"""Storage for many versioned files.
672
This object allows a single keyspace for accessing the history graph and
673
contents of named bytestrings.
675
Currently no implementation allows the graph of different key prefixes to
676
intersect, but the API does allow such implementations in the future.
679
def add_lines(self, key, parents, lines, parent_texts=None,
680
left_matching_blocks=None, nostore_sha=None, random_id=False,
682
"""Add a text to the store.
684
:param key: The key tuple of the text to add.
685
:param parents: The parents key tuples of the text to add.
686
:param lines: A list of lines. Each line must be a bytestring. And all
687
of them except the last must be terminated with \n and contain no
688
other \n's. The last line may either contain no \n's or a single
689
terminating \n. If the lines list does meet this constraint the add
690
routine may error or may succeed - but you will be unable to read
691
the data back accurately. (Checking the lines have been split
692
correctly is expensive and extremely unlikely to catch bugs so it
693
is not done at runtime unless check_content is True.)
694
:param parent_texts: An optional dictionary containing the opaque
695
representations of some or all of the parents of version_id to
696
allow delta optimisations. VERY IMPORTANT: the texts must be those
697
returned by add_lines or data corruption can be caused.
698
:param left_matching_blocks: a hint about which areas are common
699
between the text and its left-hand-parent. The format is
700
the SequenceMatcher.get_matching_blocks format.
701
:param nostore_sha: Raise ExistingContent and do not add the lines to
702
the versioned file if the digest of the lines matches this.
703
:param random_id: If True a random id has been selected rather than
704
an id determined by some deterministic process such as a converter
705
from a foreign VCS. When True the backend may choose not to check
706
for uniqueness of the resulting key within the versioned file, so
707
this should only be done when the result is expected to be unique
709
:param check_content: If True, the lines supplied are verified to be
710
bytestrings that are correctly formed lines.
711
:return: The text sha1, the number of bytes in the text, and an opaque
712
representation of the inserted version which can be provided
713
back to future add_lines calls in the parent_texts dictionary.
715
raise NotImplementedError(self.add_lines)
717
def add_mpdiffs(self, records):
718
"""Add mpdiffs to this VersionedFile.
720
Records should be iterables of version, parents, expected_sha1,
721
mpdiff. mpdiff should be a MultiParent instance.
724
mpvf = multiparent.MultiMemoryVersionedFile()
726
for version, parent_ids, expected_sha1, mpdiff in records:
727
versions.append(version)
728
mpvf.add_diff(mpdiff, version, parent_ids)
729
needed_parents = set()
730
for version, parent_ids, expected_sha1, mpdiff in records:
731
needed_parents.update(p for p in parent_ids
732
if not mpvf.has_version(p))
733
# It seems likely that adding all the present parents as fulltexts can
734
# easily exhaust memory.
735
present_parents = set(self.get_parent_map(needed_parents).keys())
736
split_lines = osutils.split_lines
737
for record in self.get_record_stream(present_parents, 'unordered',
739
mpvf.add_version(split_lines(record.get_bytes_as('fulltext')),
741
for (key, parent_keys, expected_sha1, mpdiff), lines in\
742
zip(records, mpvf.get_line_list(versions)):
743
if len(parent_keys) == 1:
744
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
745
mpvf.get_diff(parent_keys[0]).num_lines()))
747
left_matching_blocks = None
748
version_sha1, _, version_text = self.add_lines(key,
749
parent_keys, lines, vf_parents,
750
left_matching_blocks=left_matching_blocks)
751
if version_sha1 != expected_sha1:
752
raise errors.VersionedFileInvalidChecksum(version)
753
vf_parents[key] = version_text
755
def annotate(self, key):
756
"""Return a list of (version-key, line) tuples for the text of key.
758
:raise RevisionNotPresent: If the key is not present.
760
raise NotImplementedError(self.annotate)
762
def check(self, progress_bar=None):
763
"""Check this object for integrity."""
764
raise NotImplementedError(self.check)
767
def check_not_reserved_id(version_id):
768
revision.check_not_reserved_id(version_id)
770
def _check_lines_not_unicode(self, lines):
771
"""Check that lines being added to a versioned file are not unicode."""
773
if line.__class__ is not str:
774
raise errors.BzrBadParameterUnicode("lines")
776
def _check_lines_are_lines(self, lines):
777
"""Check that the lines really are full lines without inline EOL."""
779
if '\n' in line[:-1]:
780
raise errors.BzrBadParameterContainsNewline("lines")
782
def get_parent_map(self, keys):
783
"""Get a map of the parents of keys.
785
:param keys: The keys to look up parents for.
786
:return: A mapping from keys to parents. Absent keys are absent from
789
raise NotImplementedError(self.get_parent_map)
791
def get_record_stream(self, keys, ordering, include_delta_closure):
792
"""Get a stream of records for keys.
794
:param keys: The keys to include.
795
:param ordering: Either 'unordered' or 'topological'. A topologically
796
sorted stream has compression parents strictly before their
798
:param include_delta_closure: If True then the closure across any
799
compression parents will be included (in the opaque data).
800
:return: An iterator of ContentFactory objects, each of which is only
801
valid until the iterator is advanced.
803
raise NotImplementedError(self.get_record_stream)
805
def get_sha1s(self, keys):
806
"""Get the sha1's of the texts for the given keys.
808
:param keys: The names of the keys to lookup
809
:return: a list of sha1s matching keys.
811
raise NotImplementedError(self.get_sha1s)
813
def insert_record_stream(self, stream):
814
"""Insert a record stream into this container.
816
:param stream: A stream of records to insert.
818
:seealso VersionedFile.get_record_stream:
820
raise NotImplementedError
822
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
823
"""Iterate over the lines in the versioned files from keys.
825
This may return lines from other keys. Each item the returned
826
iterator yields is a tuple of a line and a text version that that line
827
is present in (not introduced in).
829
Ordering of results is in whatever order is most suitable for the
830
underlying storage format.
832
If a progress bar is supplied, it may be used to indicate progress.
833
The caller is responsible for cleaning up progress bars (because this
837
* Lines are normalised by the underlying store: they will all have \n
839
* Lines are returned in arbitrary order.
841
:return: An iterator over (line, key).
843
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
846
"""Return a iterable of the keys for all the contained texts."""
847
raise NotImplementedError(self.keys)
849
def make_mpdiffs(self, keys):
850
"""Create multiparent diffs for specified keys."""
851
keys_order = tuple(keys)
852
keys = frozenset(keys)
853
knit_keys = set(keys)
854
parent_map = self.get_parent_map(keys)
855
for parent_keys in parent_map.itervalues():
857
knit_keys.update(parent_keys)
858
missing_keys = keys - set(parent_map)
860
raise errors.RevisionNotPresent(missing_keys.pop(), self)
861
# We need to filter out ghosts, because we can't diff against them.
862
maybe_ghosts = knit_keys - keys
863
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
864
knit_keys.difference_update(ghosts)
866
split_lines = osutils.split_lines
867
for record in self.get_record_stream(knit_keys, 'topological', True):
868
lines[record.key] = split_lines(record.get_bytes_as('fulltext'))
869
# line_block_dict = {}
870
# for parent, blocks in record.extract_line_blocks():
871
# line_blocks[parent] = blocks
872
# line_blocks[record.key] = line_block_dict
874
for key in keys_order:
876
parents = parent_map[key] or []
877
# Note that filtering knit_keys can lead to a parent difference
878
# between the creation and the application of the mpdiff.
879
parent_lines = [lines[p] for p in parents if p in knit_keys]
880
if len(parent_lines) > 0:
881
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
884
left_parent_blocks = None
885
diffs.append(multiparent.MultiParent.from_lines(target,
886
parent_lines, left_parent_blocks))
889
def _extract_blocks(self, version_id, source, target):
893
class ThunkedVersionedFiles(VersionedFiles):
894
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
896
This object allows a single keyspace for accessing the history graph and
897
contents of named bytestrings.
899
Currently no implementation allows the graph of different key prefixes to
900
intersect, but the API does allow such implementations in the future.
903
def __init__(self, transport, file_factory, mapper, is_locked):
904
"""Create a ThunkedVersionedFiles."""
905
self._transport = transport
906
self._file_factory = file_factory
907
self._mapper = mapper
908
self._is_locked = is_locked
910
def add_lines(self, key, parents, lines, parent_texts=None,
911
left_matching_blocks=None, nostore_sha=None, random_id=False,
913
"""See VersionedFiles.add_lines()."""
914
path = self._mapper.map(key)
916
parents = [parent[-1] for parent in parents]
917
vf = self._get_vf(path)
920
return vf.add_lines_with_ghosts(version_id, parents, lines,
921
parent_texts=parent_texts,
922
left_matching_blocks=left_matching_blocks,
923
nostore_sha=nostore_sha, random_id=random_id,
924
check_content=check_content)
925
except NotImplementedError:
926
return vf.add_lines(version_id, parents, lines,
927
parent_texts=parent_texts,
928
left_matching_blocks=left_matching_blocks,
929
nostore_sha=nostore_sha, random_id=random_id,
930
check_content=check_content)
931
except errors.NoSuchFile:
932
# parent directory may be missing, try again.
933
self._transport.mkdir(osutils.dirname(path))
935
return vf.add_lines_with_ghosts(version_id, parents, lines,
936
parent_texts=parent_texts,
937
left_matching_blocks=left_matching_blocks,
938
nostore_sha=nostore_sha, random_id=random_id,
939
check_content=check_content)
940
except NotImplementedError:
941
return vf.add_lines(version_id, parents, lines,
942
parent_texts=parent_texts,
943
left_matching_blocks=left_matching_blocks,
944
nostore_sha=nostore_sha, random_id=random_id,
945
check_content=check_content)
947
def annotate(self, key):
948
"""Return a list of (version-key, line) tuples for the text of key.
950
:raise RevisionNotPresent: If the key is not present.
953
path = self._mapper.map(prefix)
954
vf = self._get_vf(path)
955
origins = vf.annotate(key[-1])
957
for origin, line in origins:
958
result.append((prefix + (origin,), line))
961
def check(self, progress_bar=None):
962
"""See VersionedFiles.check()."""
963
for prefix, vf in self._iter_all_components():
966
def get_parent_map(self, keys):
967
"""Get a map of the parents of keys.
969
:param keys: The keys to look up parents for.
970
:return: A mapping from keys to parents. Absent keys are absent from
973
prefixes = self._partition_keys(keys)
975
for prefix, suffixes in prefixes.items():
976
path = self._mapper.map(prefix)
977
vf = self._get_vf(path)
978
parent_map = vf.get_parent_map(suffixes)
979
for key, parents in parent_map.items():
980
result[prefix + (key,)] = tuple(
981
prefix + (parent,) for parent in parents)
984
def _get_vf(self, path):
985
if not self._is_locked():
986
raise errors.ObjectNotLocked(self)
987
return self._file_factory(path, self._transport, create=True,
988
get_scope=lambda:None)
990
def _partition_keys(self, keys):
991
"""Turn keys into a dict of prefix:suffix_list."""
994
prefix_keys = result.setdefault(key[:-1], [])
995
prefix_keys.append(key[-1])
998
def _get_all_prefixes(self):
999
# Identify all key prefixes.
1000
# XXX: A bit hacky, needs polish.
1001
if type(self._mapper) == ConstantMapper:
1002
paths = [self._mapper.map(())]
1006
for quoted_relpath in self._transport.iter_files_recursive():
1007
path, ext = os.path.splitext(quoted_relpath)
1009
paths = list(relpaths)
1010
prefixes = [self._mapper.unmap(path) for path in paths]
1011
return zip(paths, prefixes)
1013
def get_record_stream(self, keys, ordering, include_delta_closure):
1014
"""See VersionedFiles.get_record_stream()."""
1015
# Ordering will be taken care of by each partitioned store; group keys
1018
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1019
suffixes = [(suffix,) for suffix in suffixes]
1020
for record in vf.get_record_stream(suffixes, ordering,
1021
include_delta_closure):
1022
if record.parents is not None:
1023
record.parents = tuple(
1024
prefix + parent for parent in record.parents)
1025
record.key = prefix + record.key
1028
def _iter_keys_vf(self, keys):
1029
prefixes = self._partition_keys(keys)
1031
for prefix, suffixes in prefixes.items():
1032
path = self._mapper.map(prefix)
1033
vf = self._get_vf(path)
1034
yield prefix, suffixes, vf
1036
def get_sha1s(self, keys):
1037
"""See VersionedFiles.get_sha1s()."""
1039
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1040
vf_sha1s = vf.get_sha1s(suffixes)
1041
for suffix, sha1 in zip(suffixes, vf.get_sha1s(suffixes)):
1042
sha1s[prefix + (suffix,)] = sha1
1043
return [sha1s[key] for key in keys]
1045
def insert_record_stream(self, stream):
1046
"""Insert a record stream into this container.
1048
:param stream: A stream of records to insert.
1050
:seealso VersionedFile.get_record_stream:
1052
for record in stream:
1053
prefix = record.key[:-1]
1054
key = record.key[-1:]
1055
if record.parents is not None:
1056
parents = [parent[-1:] for parent in record.parents]
1059
thunk_record = AdapterFactory(key, parents, record)
1060
path = self._mapper.map(prefix)
1061
# Note that this parses the file many times; we can do better but
1062
# as this only impacts weaves in terms of performance, it is
1064
vf = self._get_vf(path)
1065
vf.insert_record_stream([thunk_record])
1067
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1068
"""Iterate over the lines in the versioned files from keys.
1070
This may return lines from other keys. Each item the returned
1071
iterator yields is a tuple of a line and a text version that that line
1072
is present in (not introduced in).
1074
Ordering of results is in whatever order is most suitable for the
1075
underlying storage format.
1077
If a progress bar is supplied, it may be used to indicate progress.
1078
The caller is responsible for cleaning up progress bars (because this
1082
* Lines are normalised by the underlying store: they will all have \n
1084
* Lines are returned in arbitrary order.
1086
:return: An iterator over (line, key).
1088
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1089
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1090
yield line, prefix + (version,)
1092
def _iter_all_components(self):
1093
for path, prefix in self._get_all_prefixes():
1094
yield prefix, self._get_vf(path)
1097
"""See VersionedFiles.keys()."""
1099
for prefix, vf in self._iter_all_components():
1100
for suffix in vf.versions():
1101
result.add(prefix + (suffix,))
1105
class _PlanMergeVersionedFile(VersionedFiles):
1106
"""A VersionedFile for uncommitted and committed texts.
1108
It is intended to allow merges to be planned with working tree texts.
1109
It implements only the small part of the VersionedFiles interface used by
1110
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1111
_PlanMergeVersionedFile itself.
1113
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1114
queried for missing texts.
1117
def __init__(self, file_id):
1118
"""Create a _PlanMergeVersionedFile.
1120
:param file_id: Used with _PlanMerge code which is not yet fully
1121
tuple-keyspace aware.
1123
self._file_id = file_id
1124
# fallback locations
1125
self.fallback_versionedfiles = []
1126
# Parents for locally held keys.
1128
# line data for locally held keys.
1130
# key lookup providers
1131
self._providers = [DictParentsProvider(self._parents)]
1133
def plan_merge(self, ver_a, ver_b, base=None):
1134
"""See VersionedFile.plan_merge"""
1135
from bzrlib.merge import _PlanMerge
1137
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1138
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1139
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1140
return _PlanMerge._subtract_plans(old_plan, new_plan)
1142
def plan_lca_merge(self, ver_a, ver_b, base=None):
1143
from bzrlib.merge import _PlanLCAMerge
1145
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1148
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1149
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1151
def add_lines(self, key, parents, lines):
1152
"""See VersionedFiles.add_lines
1154
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1155
are permitted. Only reserved ids are permitted.
1157
if type(key) != tuple:
1158
import pdb;pdb.set_trace()
1159
if not revision.is_reserved_id(key[-1]):
1160
raise ValueError('Only reserved ids may be used')
1162
raise ValueError('Parents may not be None')
1164
raise ValueError('Lines may not be None')
1165
self._parents[key] = tuple(parents)
1166
self._lines[key] = lines
1168
def get_record_stream(self, keys, ordering, include_delta_closure):
1171
if key in self._lines:
1172
lines = self._lines[key]
1173
parents = self._parents[key]
1175
yield FulltextContentFactory(key, parents, None,
1177
for versionedfile in self.fallback_versionedfiles:
1178
for record in versionedfile.get_record_stream(
1179
pending, 'unordered', True):
1180
if record.storage_kind == 'absent':
1183
pending.remove(record.key)
1187
# report absent entries
1189
yield AbsentContentFactory(key)
1191
def get_parent_map(self, keys):
1192
"""See VersionedFiles.get_parent_map"""
1193
# We create a new provider because a fallback may have been added.
1194
# If we make fallbacks private we can update a stack list and avoid
1195
# object creation thrashing.
1196
self._providers = self._providers[:1] + self.fallback_versionedfiles
1197
return _StackedParentsProvider(self._providers).get_parent_map(keys)
1200
class PlanWeaveMerge(TextMerge):
1201
"""Weave merge that takes a plan as its input.
1203
This exists so that VersionedFile.plan_merge is implementable.
1204
Most callers will want to use WeaveMerge instead.
1207
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1208
b_marker=TextMerge.B_MARKER):
1209
TextMerge.__init__(self, a_marker, b_marker)
1212
def _merge_struct(self):
1217
def outstanding_struct():
1218
if not lines_a and not lines_b:
1220
elif ch_a and not ch_b:
1223
elif ch_b and not ch_a:
1225
elif lines_a == lines_b:
1228
yield (lines_a, lines_b)
1230
# We previously considered either 'unchanged' or 'killed-both' lines
1231
# to be possible places to resynchronize. However, assuming agreement
1232
# on killed-both lines may be too aggressive. -- mbp 20060324
1233
for state, line in self.plan:
1234
if state == 'unchanged':
1235
# resync and flush queued conflicts changes if any
1236
for struct in outstanding_struct():
1242
if state == 'unchanged':
1245
elif state == 'killed-a':
1247
lines_b.append(line)
1248
elif state == 'killed-b':
1250
lines_a.append(line)
1251
elif state == 'new-a':
1253
lines_a.append(line)
1254
elif state == 'new-b':
1256
lines_b.append(line)
1257
elif state == 'conflicted-a':
1259
lines_a.append(line)
1260
elif state == 'conflicted-b':
1262
lines_b.append(line)
1264
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1265
'killed-base', 'killed-both'):
1266
raise AssertionError(state)
1267
for struct in outstanding_struct():
1271
class WeaveMerge(PlanWeaveMerge):
1272
"""Weave merge that takes a VersionedFile and two versions as its input."""
1274
def __init__(self, versionedfile, ver_a, ver_b,
1275
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1276
plan = versionedfile.plan_merge(ver_a, ver_b)
1277
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)