1
# Copyright (C) 2006-2011 Canonical Ltd
1
# Copyright (C) 2005 by Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
3
6
# This program is free software; you can redistribute it and/or modify
4
7
# it under the terms of the GNU General Public License as published by
5
8
# the Free Software Foundation; either version 2 of the License, or
6
9
# (at your option) any later version.
8
11
# This program is distributed in the hope that it will be useful,
9
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
14
# GNU General Public License for more details.
13
16
# You should have received a copy of the GNU General Public License
14
17
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
# Remaing to do is to figure out if get_graph should return a simple
21
# map, or a graph object of some kind.
17
24
"""Versioned text file storage api."""
19
from __future__ import absolute_import
25
from zlib import adler32
27
from ..lazy_import import lazy_import
28
lazy_import(globals(), """
39
from breezy.bzr import (
48
from ..registry import Registry
49
from ..sixish import (
55
from ..textmerge import TextMerge
58
adapter_registry = Registry()
59
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
60
'breezy.bzr.knit', 'DeltaAnnotatedToUnannotated')
61
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
62
'breezy.bzr.knit', 'FTAnnotatedToUnannotated')
63
for target_storage_kind in ('fulltext', 'chunked', 'lines'):
64
adapter_registry.register_lazy(('knit-delta-gz', target_storage_kind), 'breezy.bzr.knit',
65
'DeltaPlainToFullText')
66
adapter_registry.register_lazy(('knit-ft-gz', target_storage_kind), 'breezy.bzr.knit',
68
adapter_registry.register_lazy(('knit-annotated-ft-gz', target_storage_kind),
69
'breezy.bzr.knit', 'FTAnnotatedToFullText')
70
adapter_registry.register_lazy(('knit-annotated-delta-gz', target_storage_kind),
71
'breezy.bzr.knit', 'DeltaAnnotatedToFullText')
74
class ContentFactory(object):
75
"""Abstract interface for insertion and retrieval from a VersionedFile.
77
:ivar sha1: None, or the sha1 of the content fulltext.
78
:ivar size: None, or the size of the content fulltext.
79
:ivar storage_kind: The native storage kind of this factory. One of
80
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
81
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
82
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
83
:ivar key: The key of this content. Each key is a tuple with a single
85
:ivar parents: A tuple of parent keys for self.key. If the object has
86
no parent information, None (as opposed to () for an empty list of
91
"""Create a ContentFactory."""
94
self.storage_kind = None
99
class ChunkedContentFactory(ContentFactory):
100
"""Static data content factory.
102
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
103
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
104
satisfies this, as does a list of lines.
106
:ivar sha1: None, or the sha1 of the content fulltext.
107
:ivar size: None, or the size of the content fulltext.
108
:ivar storage_kind: The native storage kind of this factory. Always
110
:ivar key: The key of this content. Each key is a tuple with a single
112
:ivar parents: A tuple of parent keys for self.key. If the object has
113
no parent information, None (as opposed to () for an empty list of
115
:ivar chunks_are_lines: Whether chunks are lines.
118
def __init__(self, key, parents, sha1, chunks, chunks_are_lines=None):
119
"""Create a ContentFactory."""
121
self.size = sum(map(len, chunks))
122
self.storage_kind = 'chunked'
124
self.parents = parents
125
self._chunks = chunks
126
self._chunks_are_lines = chunks_are_lines
128
def get_bytes_as(self, storage_kind):
129
if storage_kind == 'chunked':
131
elif storage_kind == 'fulltext':
132
return b''.join(self._chunks)
133
elif storage_kind == 'lines':
134
if self._chunks_are_lines:
136
return list(osutils.chunks_to_lines(self._chunks))
137
raise errors.UnavailableRepresentation(self.key, storage_kind,
140
def iter_bytes_as(self, storage_kind):
141
if storage_kind == 'chunked':
142
return iter(self._chunks)
143
elif storage_kind == 'lines':
144
if self._chunks_are_lines:
145
return iter(self._chunks)
146
return iter(osutils.chunks_to_lines(self._chunks))
147
raise errors.UnavailableRepresentation(self.key, storage_kind,
150
class FulltextContentFactory(ContentFactory):
151
"""Static data content factory.
153
This takes a fulltext when created and just returns that during
154
get_bytes_as('fulltext').
156
:ivar sha1: None, or the sha1 of the content fulltext.
157
:ivar storage_kind: The native storage kind of this factory. Always
159
:ivar key: The key of this content. Each key is a tuple with a single
161
:ivar parents: A tuple of parent keys for self.key. If the object has
162
no parent information, None (as opposed to () for an empty list of
166
def __init__(self, key, parents, sha1, text):
167
"""Create a ContentFactory."""
169
self.size = len(text)
170
self.storage_kind = 'fulltext'
172
self.parents = parents
173
if not isinstance(text, bytes):
174
raise TypeError(text)
177
def get_bytes_as(self, storage_kind):
178
if storage_kind == self.storage_kind:
180
elif storage_kind == 'chunked':
182
elif storage_kind == 'lines':
183
return osutils.split_lines(self._text)
184
raise errors.UnavailableRepresentation(self.key, storage_kind,
187
def iter_bytes_as(self, storage_kind):
188
if storage_kind == 'chunked':
189
return iter([self._text])
190
elif storage_kind == 'lines':
191
return iter(osutils.split_lines(self._text))
192
raise errors.UnavailableRepresentation(self.key, storage_kind,
196
class FileContentFactory(ContentFactory):
197
"""File-based content factory.
200
def __init__(self, key, parents, fileobj, sha1=None, size=None):
202
self.parents = parents
204
self.storage_kind = 'file'
208
def get_bytes_as(self, storage_kind):
210
if storage_kind == 'fulltext':
211
return self.file.read()
212
elif storage_kind == 'chunked':
213
return list(osutils.file_iterator(self.file))
214
elif storage_kind == 'lines':
215
return list(self.file.readlines())
216
raise errors.UnavailableRepresentation(self.key, storage_kind,
219
def iter_bytes_as(self, storage_kind):
221
if storage_kind == 'chunked':
222
return osutils.file_iterator(self.file)
223
elif storage_kind == 'lines':
225
raise errors.UnavailableRepresentation(self.key, storage_kind,
229
class AbsentContentFactory(ContentFactory):
230
"""A placeholder content factory for unavailable texts.
233
:ivar storage_kind: 'absent'.
234
:ivar key: The key of this content. Each key is a tuple with a single
239
def __init__(self, key):
240
"""Create a ContentFactory."""
243
self.storage_kind = 'absent'
247
def get_bytes_as(self, storage_kind):
248
raise ValueError('A request was made for key: %s, but that'
249
' content is not available, and the calling'
250
' code does not handle if it is missing.'
253
def iter_bytes_as(self, storage_kind):
254
raise ValueError('A request was made for key: %s, but that'
255
' content is not available, and the calling'
256
' code does not handle if it is missing.'
260
class AdapterFactory(ContentFactory):
261
"""A content factory to adapt between key prefix's."""
263
def __init__(self, key, parents, adapted):
264
"""Create an adapter factory instance."""
266
self.parents = parents
267
self._adapted = adapted
269
def __getattr__(self, attr):
270
"""Return a member from the adapted object."""
271
if attr in ('key', 'parents'):
272
return self.__dict__[attr]
274
return getattr(self._adapted, attr)
277
def filter_absent(record_stream):
278
"""Adapt a record stream to remove absent records."""
279
for record in record_stream:
280
if record.storage_kind != 'absent':
284
class _MPDiffGenerator(object):
285
"""Pull out the functionality for generating mp_diffs."""
287
def __init__(self, vf, keys):
289
# This is the order the keys were requested in
290
self.ordered_keys = tuple(keys)
291
# keys + their parents, what we need to compute the diffs
292
self.needed_keys = ()
293
# Map from key: mp_diff
295
# Map from key: parents_needed (may have ghosts)
297
# Parents that aren't present
298
self.ghost_parents = ()
299
# Map from parent_key => number of children for this text
301
# Content chunks that are cached while we still need them
304
def _find_needed_keys(self):
305
"""Find the set of keys we need to request.
307
This includes all the original keys passed in, and the non-ghost
308
parents of those keys.
310
:return: (needed_keys, refcounts)
311
needed_keys is the set of all texts we need to extract
312
refcounts is a dict of {key: num_children} letting us know when we
313
no longer need to cache a given parent text
315
# All the keys and their parents
316
needed_keys = set(self.ordered_keys)
317
parent_map = self.vf.get_parent_map(needed_keys)
318
self.parent_map = parent_map
319
# TODO: Should we be using a different construct here? I think this
320
# uses difference_update internally, and we expect the result to
322
missing_keys = needed_keys.difference(parent_map)
324
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
325
# Parents that might be missing. They are allowed to be ghosts, but we
326
# should check for them
328
setdefault = refcounts.setdefault
330
for child_key, parent_keys in viewitems(parent_map):
332
# parent_keys may be None if a given VersionedFile claims to
333
# not support graph operations.
335
just_parents.update(parent_keys)
336
needed_keys.update(parent_keys)
337
for p in parent_keys:
338
refcounts[p] = setdefault(p, 0) + 1
339
just_parents.difference_update(parent_map)
340
# Remove any parents that are actually ghosts from the needed set
341
self.present_parents = set(self.vf.get_parent_map(just_parents))
342
self.ghost_parents = just_parents.difference(self.present_parents)
343
needed_keys.difference_update(self.ghost_parents)
344
self.needed_keys = needed_keys
345
self.refcounts = refcounts
346
return needed_keys, refcounts
348
def _compute_diff(self, key, parent_lines, lines):
349
"""Compute a single mp_diff, and store it in self._diffs"""
350
if len(parent_lines) > 0:
351
# XXX: _extract_blocks is not usefully defined anywhere...
352
# It was meant to extract the left-parent diff without
353
# having to recompute it for Knit content (pack-0.92,
354
# etc). That seems to have regressed somewhere
355
left_parent_blocks = self.vf._extract_blocks(key,
356
parent_lines[0], lines)
358
left_parent_blocks = None
359
diff = multiparent.MultiParent.from_lines(lines,
360
parent_lines, left_parent_blocks)
361
self.diffs[key] = diff
363
def _process_one_record(self, key, this_chunks):
365
if key in self.parent_map:
366
# This record should be ready to diff, since we requested
367
# content in 'topological' order
368
parent_keys = self.parent_map.pop(key)
369
# If a VersionedFile claims 'no-graph' support, then it may return
370
# None for any parent request, so we replace it with an empty tuple
371
if parent_keys is None:
374
for p in parent_keys:
375
# Alternatively we could check p not in self.needed_keys, but
376
# ghost_parents should be tiny versus huge
377
if p in self.ghost_parents:
379
refcount = self.refcounts[p]
380
if refcount == 1: # Last child reference
381
self.refcounts.pop(p)
382
parent_chunks = self.chunks.pop(p)
384
self.refcounts[p] = refcount - 1
385
parent_chunks = self.chunks[p]
386
p_lines = osutils.chunks_to_lines(parent_chunks)
387
# TODO: Should we cache the line form? We did the
388
# computation to get it, but storing it this way will
389
# be less memory efficient...
390
parent_lines.append(p_lines)
392
lines = osutils.chunks_to_lines(this_chunks)
393
# Since we needed the lines, we'll go ahead and cache them this way
395
self._compute_diff(key, parent_lines, lines)
397
# Is this content required for any more children?
398
if key in self.refcounts:
399
self.chunks[key] = this_chunks
401
def _extract_diffs(self):
402
needed_keys, refcounts = self._find_needed_keys()
403
for record in self.vf.get_record_stream(needed_keys,
404
'topological', True):
405
if record.storage_kind == 'absent':
406
raise errors.RevisionNotPresent(record.key, self.vf)
407
self._process_one_record(record.key,
408
record.get_bytes_as('chunked'))
410
def compute_diffs(self):
411
self._extract_diffs()
412
dpop = self.diffs.pop
413
return [dpop(k) for k in self.ordered_keys]
27
from copy import deepcopy
28
from unittest import TestSuite
31
import bzrlib.errors as errors
32
from bzrlib.inter import InterObject
33
from bzrlib.symbol_versioning import *
34
from bzrlib.textmerge import TextMerge
35
from bzrlib.transport.memory import MemoryTransport
36
from bzrlib.tsort import topo_sort
416
40
class VersionedFile(object):
417
41
"""Versioned text file storage.
419
43
A versioned file manages versions of line-based text files,
420
44
keeping track of the originating version for each line.
537
158
def _check_lines_not_unicode(self, lines):
538
159
"""Check that lines being added to a versioned file are not unicode."""
539
160
for line in lines:
540
if not isinstance(line, bytes):
161
if line.__class__ is not str:
541
162
raise errors.BzrBadParameterUnicode("lines")
543
164
def _check_lines_are_lines(self, lines):
544
165
"""Check that the lines really are full lines without inline EOL."""
545
166
for line in lines:
546
if b'\n' in line[:-1]:
167
if '\n' in line[:-1]:
547
168
raise errors.BzrBadParameterContainsNewline("lines")
549
def get_format_signature(self):
550
"""Get a text description of the data encoding in this file.
554
raise NotImplementedError(self.get_format_signature)
556
def make_mpdiffs(self, version_ids):
557
"""Create multiparent diffs for specified versions."""
558
# XXX: Can't use _MPDiffGenerator just yet. This is because version_ids
559
# is a list of strings, not keys. And while self.get_record_stream
560
# is supported, it takes *keys*, while self.get_parent_map() takes
562
knit_versions = set()
563
knit_versions.update(version_ids)
564
parent_map = self.get_parent_map(version_ids)
565
for version_id in version_ids:
567
knit_versions.update(parent_map[version_id])
569
raise errors.RevisionNotPresent(version_id, self)
570
# We need to filter out ghosts, because we can't diff against them.
571
knit_versions = set(self.get_parent_map(knit_versions))
572
lines = dict(zip(knit_versions,
573
self._get_lf_split_line_list(knit_versions)))
575
for version_id in version_ids:
576
target = lines[version_id]
578
parents = [lines[p] for p in parent_map[version_id] if p in
581
# I don't know how this could ever trigger.
582
# parent_map[version_id] was already triggered in the previous
583
# for loop, and lines[p] has the 'if p in knit_versions' check,
584
# so we again won't have a KeyError.
585
raise errors.RevisionNotPresent(version_id, self)
587
left_parent_blocks = self._extract_blocks(version_id,
590
left_parent_blocks = None
591
diffs.append(multiparent.MultiParent.from_lines(target, parents,
595
def _extract_blocks(self, version_id, source, target):
598
def add_mpdiffs(self, records):
599
"""Add mpdiffs to this VersionedFile.
601
Records should be iterables of version, parents, expected_sha1,
602
mpdiff. mpdiff should be a MultiParent instance.
604
# Does this need to call self._check_write_ok()? (IanC 20070919)
606
mpvf = multiparent.MultiMemoryVersionedFile()
608
for version, parent_ids, expected_sha1, mpdiff in records:
609
versions.append(version)
610
mpvf.add_diff(mpdiff, version, parent_ids)
611
needed_parents = set()
612
for version, parent_ids, expected_sha1, mpdiff in records:
613
needed_parents.update(p for p in parent_ids
614
if not mpvf.has_version(p))
615
present_parents = set(self.get_parent_map(needed_parents))
616
for parent_id, lines in zip(present_parents,
617
self._get_lf_split_line_list(present_parents)):
618
mpvf.add_version(lines, parent_id, [])
619
for (version, parent_ids, expected_sha1, mpdiff), lines in zip(
620
records, mpvf.get_line_list(versions)):
621
if len(parent_ids) == 1:
622
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
623
mpvf.get_diff(parent_ids[0]).num_lines()))
625
left_matching_blocks = None
627
_, _, version_text = self.add_lines_with_ghosts(version,
628
parent_ids, lines, vf_parents,
629
left_matching_blocks=left_matching_blocks)
630
except NotImplementedError:
631
# The vf can't handle ghosts, so add lines normally, which will
632
# (reasonably) fail if there are ghosts in the data.
633
_, _, version_text = self.add_lines(version,
634
parent_ids, lines, vf_parents,
635
left_matching_blocks=left_matching_blocks)
636
vf_parents[version] = version_text
637
sha1s = self.get_sha1s(versions)
638
for version, parent_ids, expected_sha1, mpdiff in records:
639
if expected_sha1 != sha1s[version]:
640
raise errors.VersionedFileInvalidChecksum(version)
170
def _check_write_ok(self):
171
"""Is the versioned file marked as 'finished' ? Raise if it is."""
173
raise errors.OutSideTransaction()
174
if self._access_mode != 'w':
175
raise errors.ReadOnlyObjectDirtiedError(self)
177
def clear_cache(self):
178
"""Remove any data cached in the versioned file object."""
180
def clone_text(self, new_version_id, old_version_id, parents):
181
"""Add an identical text to old_version_id as new_version_id.
183
Must raise RevisionNotPresent if the old version or any of the
184
parents are not present in file history.
186
Must raise RevisionAlreadyPresent if the new version is
187
already present in file history."""
188
self._check_write_ok()
189
return self._clone_text(new_version_id, old_version_id, parents)
191
def _clone_text(self, new_version_id, old_version_id, parents):
192
"""Helper function to do the _clone_text work."""
193
raise NotImplementedError(self.clone_text)
195
def create_empty(self, name, transport, mode=None):
196
"""Create a new versioned file of this exact type.
198
:param name: the file name
199
:param transport: the transport
200
:param mode: optional file mode.
202
raise NotImplementedError(self.create_empty)
204
def fix_parents(self, version, new_parents):
205
"""Fix the parents list for version.
207
This is done by appending a new version to the index
208
with identical data except for the parents list.
209
the parents list must be a superset of the current
212
self._check_write_ok()
213
return self._fix_parents(version, new_parents)
215
def _fix_parents(self, version, new_parents):
216
"""Helper for fix_parents."""
217
raise NotImplementedError(self.fix_parents)
219
def get_delta(self, version):
220
"""Get a delta for constructing version from some other version.
222
:return: (delta_parent, sha1, noeol, delta)
223
Where delta_parent is a version id or None to indicate no parent.
225
raise NotImplementedError(self.get_delta)
227
def get_deltas(self, versions):
228
"""Get multiple deltas at once for constructing versions.
230
:return: dict(version_id:(delta_parent, sha1, noeol, delta))
231
Where delta_parent is a version id or None to indicate no parent, and
232
version_id is the version_id created by that delta.
235
for version in versions:
236
result[version] = self.get_delta(version)
239
def get_sha1(self, version_id):
240
"""Get the stored sha1 sum for the given revision.
242
:param name: The name of the version to lookup
244
raise NotImplementedError(self.get_sha1)
246
def get_suffixes(self):
247
"""Return the file suffixes associated with this versioned file."""
248
raise NotImplementedError(self.get_suffixes)
642
250
def get_text(self, version_id):
643
251
"""Return version contents as a text string.
645
253
Raises RevisionNotPresent if version is not present in
648
return b''.join(self.get_lines(version_id))
256
return ''.join(self.get_lines(version_id))
649
257
get_string = get_text
651
def get_texts(self, version_ids):
652
"""Return the texts of listed versions as a list of strings.
654
Raises RevisionNotPresent if version is not present in
657
return [b''.join(self.get_lines(v)) for v in version_ids]
659
259
def get_lines(self, version_id):
660
260
"""Return version contents as a sequence of lines.
758
429
unchanged Alive in both a and b (possibly created in both)
759
430
new-a Created in a
760
431
new-b Created in b
761
ghost-a Killed in a, unborn in b
432
ghost-a Killed in a, unborn in b
762
433
ghost-b Killed in b, unborn in a
763
434
irrelevant Not in either revision
765
436
raise NotImplementedError(VersionedFile.plan_merge)
767
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
438
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
768
439
b_marker=TextMerge.B_MARKER):
769
440
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
772
class RecordingVersionedFilesDecorator(object):
773
"""A minimal versioned files that records calls made on it.
775
Only enough methods have been added to support tests using it to date.
777
:ivar calls: A list of the calls made; can be reset at any time by
781
def __init__(self, backing_vf):
782
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
784
:param backing_vf: The versioned file to answer all methods.
786
self._backing_vf = backing_vf
789
def add_lines(self, key, parents, lines, parent_texts=None,
790
left_matching_blocks=None, nostore_sha=None, random_id=False,
792
self.calls.append(("add_lines", key, parents, lines, parent_texts,
793
left_matching_blocks, nostore_sha, random_id, check_content))
794
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
795
left_matching_blocks, nostore_sha, random_id, check_content)
797
def add_content(self, factory, parent_texts=None,
798
left_matching_blocks=None, nostore_sha=None, random_id=False,
800
self.calls.append(("add_content", factory, parent_texts,
801
left_matching_blocks, nostore_sha, random_id, check_content))
802
return self._backing_vf.add_content(
803
factory, parent_texts, left_matching_blocks, nostore_sha,
804
random_id, check_content)
807
self._backing_vf.check()
809
def get_parent_map(self, keys):
810
self.calls.append(("get_parent_map", copy(keys)))
811
return self._backing_vf.get_parent_map(keys)
813
def get_record_stream(self, keys, sort_order, include_delta_closure):
814
self.calls.append(("get_record_stream", list(keys), sort_order,
815
include_delta_closure))
816
return self._backing_vf.get_record_stream(keys, sort_order,
817
include_delta_closure)
819
def get_sha1s(self, keys):
820
self.calls.append(("get_sha1s", copy(keys)))
821
return self._backing_vf.get_sha1s(keys)
823
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
824
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
825
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
828
self.calls.append(("keys",))
829
return self._backing_vf.keys()
832
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
833
"""A VF that records calls, and returns keys in specific order.
835
:ivar calls: A list of the calls made; can be reset at any time by
839
def __init__(self, backing_vf, key_priority):
840
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
842
:param backing_vf: The versioned file to answer all methods.
843
:param key_priority: A dictionary defining what order keys should be
844
returned from an 'unordered' get_record_stream request.
845
Keys with lower priority are returned first, keys not present in
846
the map get an implicit priority of 0, and are returned in
847
lexicographical order.
849
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
850
self._key_priority = key_priority
852
def get_record_stream(self, keys, sort_order, include_delta_closure):
853
self.calls.append(("get_record_stream", list(keys), sort_order,
854
include_delta_closure))
855
if sort_order == 'unordered':
857
return (self._key_priority.get(key, 0), key)
858
# Use a defined order by asking for the keys one-by-one from the
860
for key in sorted(keys, key=sort_key):
861
for record in self._backing_vf.get_record_stream([key],
862
'unordered', include_delta_closure):
865
for record in self._backing_vf.get_record_stream(keys, sort_order,
866
include_delta_closure):
870
class KeyMapper(object):
871
"""KeyMappers map between keys and underlying partitioned storage."""
874
"""Map key to an underlying storage identifier.
876
:param key: A key tuple e.g. (b'file-id', b'revision-id').
877
:return: An underlying storage identifier, specific to the partitioning
880
raise NotImplementedError(self.map)
882
def unmap(self, partition_id):
883
"""Map a partitioned storage id back to a key prefix.
885
:param partition_id: The underlying partition id.
886
:return: As much of a key (or prefix) as is derivable from the partition
889
raise NotImplementedError(self.unmap)
892
class ConstantMapper(KeyMapper):
893
"""A key mapper that maps to a constant result."""
895
def __init__(self, result):
896
"""Create a ConstantMapper which will return result for all maps."""
897
self._result = result
900
"""See KeyMapper.map()."""
904
class URLEscapeMapper(KeyMapper):
905
"""Base class for use with transport backed storage.
907
This provides a map and unmap wrapper that respectively url escape and
908
unescape their outputs and inputs.
912
"""See KeyMapper.map()."""
913
return urlutils.quote(self._map(key))
915
def unmap(self, partition_id):
916
"""See KeyMapper.unmap()."""
917
return self._unmap(urlutils.unquote(partition_id))
920
class PrefixMapper(URLEscapeMapper):
921
"""A key mapper that extracts the first component of a key.
923
This mapper is for use with a transport based backend.
927
"""See KeyMapper.map()."""
928
return key[0].decode('utf-8')
930
def _unmap(self, partition_id):
931
"""See KeyMapper.unmap()."""
932
return (partition_id.encode('utf-8'),)
935
class HashPrefixMapper(URLEscapeMapper):
936
"""A key mapper that combines the first component of a key with a hash.
938
This mapper is for use with a transport based backend.
942
"""See KeyMapper.map()."""
943
prefix = self._escape(key[0])
944
return "%02x/%s" % (adler32(prefix) & 0xff, prefix.decode('utf-8'))
946
def _escape(self, prefix):
947
"""No escaping needed here."""
950
def _unmap(self, partition_id):
951
"""See KeyMapper.unmap()."""
952
return (self._unescape(osutils.basename(partition_id)).encode('utf-8'),)
954
def _unescape(self, basename):
955
"""No unescaping needed for HashPrefixMapper."""
959
class HashEscapedPrefixMapper(HashPrefixMapper):
960
"""Combines the escaped first component of a key with a hash.
962
This mapper is for use with a transport based backend.
965
_safe = bytearray(b"abcdefghijklmnopqrstuvwxyz0123456789-_@,.")
967
def _escape(self, prefix):
968
"""Turn a key element into a filesystem safe string.
970
This is similar to a plain urlutils.quote, except
971
it uses specific safe characters, so that it doesn't
972
have to translate a lot of valid file ids.
974
# @ does not get escaped. This is because it is a valid
975
# filesystem character we use all the time, and it looks
976
# a lot better than seeing %40 all the time.
977
r = [(c in self._safe) and chr(c) or ('%%%02x' % c)
978
for c in bytearray(prefix)]
979
return ''.join(r).encode('ascii')
981
def _unescape(self, basename):
982
"""Escaped names are easily unescaped by urlutils."""
983
return urlutils.unquote(basename)
986
def make_versioned_files_factory(versioned_file_factory, mapper):
987
"""Create a ThunkedVersionedFiles factory.
989
This will create a callable which when called creates a
990
ThunkedVersionedFiles on a transport, using mapper to access individual
991
versioned files, and versioned_file_factory to create each individual file.
993
def factory(transport):
994
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
999
class VersionedFiles(object):
1000
"""Storage for many versioned files.
1002
This object allows a single keyspace for accessing the history graph and
1003
contents of named bytestrings.
1005
Currently no implementation allows the graph of different key prefixes to
1006
intersect, but the API does allow such implementations in the future.
1008
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
1009
may have a different length key-size, but that size will be constant for
1010
all texts added to or retrieved from it. For instance, breezy uses
1011
instances with a key-size of 2 for storing user files in a repository, with
1012
the first element the fileid, and the second the version of that file.
1014
The use of tuples allows a single code base to support several different
1015
uses with only the mapping logic changing from instance to instance.
1017
:ivar _immediate_fallback_vfs: For subclasses that support stacking,
1018
this is a list of other VersionedFiles immediately underneath this
1019
one. They may in turn each have further fallbacks.
1022
def add_lines(self, key, parents, lines, parent_texts=None,
1023
left_matching_blocks=None, nostore_sha=None, random_id=False,
1024
check_content=True):
1025
"""Add a text to the store.
1027
:param key: The key tuple of the text to add. If the last element is
1028
None, a CHK string will be generated during the addition.
1029
:param parents: The parents key tuples of the text to add.
1030
:param lines: A list of lines. Each line must be a bytestring. And all
1031
of them except the last must be terminated with \n and contain no
1032
other \n's. The last line may either contain no \n's or a single
1033
terminating \n. If the lines list does meet this constraint the add
1034
routine may error or may succeed - but you will be unable to read
1035
the data back accurately. (Checking the lines have been split
1036
correctly is expensive and extremely unlikely to catch bugs so it
1037
is not done at runtime unless check_content is True.)
1038
:param parent_texts: An optional dictionary containing the opaque
1039
representations of some or all of the parents of version_id to
1040
allow delta optimisations. VERY IMPORTANT: the texts must be those
1041
returned by add_lines or data corruption can be caused.
1042
:param left_matching_blocks: a hint about which areas are common
1043
between the text and its left-hand-parent. The format is
1044
the SequenceMatcher.get_matching_blocks format.
1045
:param nostore_sha: Raise ExistingContent and do not add the lines to
1046
the versioned file if the digest of the lines matches this.
1047
:param random_id: If True a random id has been selected rather than
1048
an id determined by some deterministic process such as a converter
1049
from a foreign VCS. When True the backend may choose not to check
1050
for uniqueness of the resulting key within the versioned file, so
1051
this should only be done when the result is expected to be unique
1053
:param check_content: If True, the lines supplied are verified to be
1054
bytestrings that are correctly formed lines.
1055
:return: The text sha1, the number of bytes in the text, and an opaque
1056
representation of the inserted version which can be provided
1057
back to future add_lines calls in the parent_texts dictionary.
1059
raise NotImplementedError(self.add_lines)
1061
def add_content(self, factory, parent_texts=None,
1062
left_matching_blocks=None, nostore_sha=None, random_id=False,
1063
check_content=True):
1064
"""Add a text to the store from a chunk iterable.
1066
:param key: The key tuple of the text to add. If the last element is
1067
None, a CHK string will be generated during the addition.
1068
:param parents: The parents key tuples of the text to add.
1069
:param chunk_iter: An iterable over bytestrings.
1070
:param parent_texts: An optional dictionary containing the opaque
1071
representations of some or all of the parents of version_id to
1072
allow delta optimisations. VERY IMPORTANT: the texts must be those
1073
returned by add_lines or data corruption can be caused.
1074
:param left_matching_blocks: a hint about which areas are common
1075
between the text and its left-hand-parent. The format is
1076
the SequenceMatcher.get_matching_blocks format.
1077
:param nostore_sha: Raise ExistingContent and do not add the lines to
1078
the versioned file if the digest of the lines matches this.
1079
:param random_id: If True a random id has been selected rather than
1080
an id determined by some deterministic process such as a converter
1081
from a foreign VCS. When True the backend may choose not to check
1082
for uniqueness of the resulting key within the versioned file, so
1083
this should only be done when the result is expected to be unique
1085
:param check_content: If True, the lines supplied are verified to be
1086
bytestrings that are correctly formed lines.
1087
:return: The text sha1, the number of bytes in the text, and an opaque
1088
representation of the inserted version which can be provided
1089
back to future add_lines calls in the parent_texts dictionary.
1091
raise NotImplementedError(self.add_content)
1093
def add_mpdiffs(self, records):
1094
"""Add mpdiffs to this VersionedFile.
1096
Records should be iterables of version, parents, expected_sha1,
1097
mpdiff. mpdiff should be a MultiParent instance.
1100
mpvf = multiparent.MultiMemoryVersionedFile()
1102
for version, parent_ids, expected_sha1, mpdiff in records:
1103
versions.append(version)
1104
mpvf.add_diff(mpdiff, version, parent_ids)
1105
needed_parents = set()
1106
for version, parent_ids, expected_sha1, mpdiff in records:
1107
needed_parents.update(p for p in parent_ids
1108
if not mpvf.has_version(p))
1109
# It seems likely that adding all the present parents as fulltexts can
1110
# easily exhaust memory.
1111
for record in self.get_record_stream(needed_parents, 'unordered',
1113
if record.storage_kind == 'absent':
1115
mpvf.add_version(record.get_bytes_as('lines'), record.key, [])
1116
for (key, parent_keys, expected_sha1, mpdiff), lines in zip(
1117
records, mpvf.get_line_list(versions)):
1118
if len(parent_keys) == 1:
1119
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
1120
mpvf.get_diff(parent_keys[0]).num_lines()))
1122
left_matching_blocks = None
1123
version_sha1, _, version_text = self.add_lines(key,
1124
parent_keys, lines, vf_parents,
1125
left_matching_blocks=left_matching_blocks)
1126
if version_sha1 != expected_sha1:
1127
raise errors.VersionedFileInvalidChecksum(version)
1128
vf_parents[key] = version_text
1130
def annotate(self, key):
1131
"""Return a list of (version-key, line) tuples for the text of key.
1133
:raise RevisionNotPresent: If the key is not present.
1135
raise NotImplementedError(self.annotate)
1137
def check(self, progress_bar=None):
1138
"""Check this object for integrity.
1140
:param progress_bar: A progress bar to output as the check progresses.
1141
:param keys: Specific keys within the VersionedFiles to check. When
1142
this parameter is not None, check() becomes a generator as per
1143
get_record_stream. The difference to get_record_stream is that
1144
more or deeper checks will be performed.
1145
:return: None, or if keys was supplied a generator as per
1148
raise NotImplementedError(self.check)
1151
def check_not_reserved_id(version_id):
1152
revision.check_not_reserved_id(version_id)
1154
def clear_cache(self):
1155
"""Clear whatever caches this VersionedFile holds.
1157
This is generally called after an operation has been performed, when we
1158
don't expect to be using this versioned file again soon.
1161
def _check_lines_not_unicode(self, lines):
1162
"""Check that lines being added to a versioned file are not unicode."""
1164
if line.__class__ is not bytes:
1165
raise errors.BzrBadParameterUnicode("lines")
1167
def _check_lines_are_lines(self, lines):
1168
"""Check that the lines really are full lines without inline EOL."""
1170
if b'\n' in line[:-1]:
1171
raise errors.BzrBadParameterContainsNewline("lines")
1173
def get_known_graph_ancestry(self, keys):
1174
"""Get a KnownGraph instance with the ancestry of keys."""
1175
# most basic implementation is a loop around get_parent_map
1179
this_parent_map = self.get_parent_map(pending)
1180
parent_map.update(this_parent_map)
1181
pending = set(itertools.chain.from_iterable(
1182
viewvalues(this_parent_map)))
1183
pending.difference_update(parent_map)
1184
kg = _mod_graph.KnownGraph(parent_map)
1187
def get_parent_map(self, keys):
1188
"""Get a map of the parents of keys.
1190
:param keys: The keys to look up parents for.
1191
:return: A mapping from keys to parents. Absent keys are absent from
1194
raise NotImplementedError(self.get_parent_map)
1196
def get_record_stream(self, keys, ordering, include_delta_closure):
1197
"""Get a stream of records for keys.
1199
:param keys: The keys to include.
1200
:param ordering: Either 'unordered' or 'topological'. A topologically
1201
sorted stream has compression parents strictly before their
1203
:param include_delta_closure: If True then the closure across any
1204
compression parents will be included (in the opaque data).
1205
:return: An iterator of ContentFactory objects, each of which is only
1206
valid until the iterator is advanced.
1208
raise NotImplementedError(self.get_record_stream)
1210
def get_sha1s(self, keys):
1211
"""Get the sha1's of the texts for the given keys.
1213
:param keys: The names of the keys to lookup
1214
:return: a dict from key to sha1 digest. Keys of texts which are not
1215
present in the store are not present in the returned
1218
raise NotImplementedError(self.get_sha1s)
1220
__contains__ = index._has_key_from_parent_map
1222
def get_missing_compression_parent_keys(self):
1223
"""Return an iterable of keys of missing compression parents.
1225
Check this after calling insert_record_stream to find out if there are
1226
any missing compression parents. If there are, the records that
1227
depend on them are not able to be inserted safely. The precise
1228
behaviour depends on the concrete VersionedFiles class in use.
1230
Classes that do not support this will raise NotImplementedError.
1232
raise NotImplementedError(self.get_missing_compression_parent_keys)
1234
def insert_record_stream(self, stream):
1235
"""Insert a record stream into this container.
1237
:param stream: A stream of records to insert.
1239
:seealso VersionedFile.get_record_stream:
1241
raise NotImplementedError
1243
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1244
"""Iterate over the lines in the versioned files from keys.
1246
This may return lines from other keys. Each item the returned
1247
iterator yields is a tuple of a line and a text version that that line
1248
is present in (not introduced in).
1250
Ordering of results is in whatever order is most suitable for the
1251
underlying storage format.
1253
If a progress bar is supplied, it may be used to indicate progress.
1254
The caller is responsible for cleaning up progress bars (because this
1258
* Lines are normalised by the underlying store: they will all have \n
1260
* Lines are returned in arbitrary order.
1262
:return: An iterator over (line, key).
1264
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
1267
"""Return a iterable of the keys for all the contained texts."""
1268
raise NotImplementedError(self.keys)
1270
def make_mpdiffs(self, keys):
1271
"""Create multiparent diffs for specified keys."""
1272
generator = _MPDiffGenerator(self, keys)
1273
return generator.compute_diffs()
1275
def get_annotator(self):
1276
return annotate.Annotator(self)
1278
missing_keys = index._missing_keys_from_parent_map
1280
def _extract_blocks(self, version_id, source, target):
1283
def _transitive_fallbacks(self):
1284
"""Return the whole stack of fallback versionedfiles.
1286
This VersionedFiles may have a list of fallbacks, but it doesn't
1287
necessarily know about the whole stack going down, and it can't know
1288
at open time because they may change after the objects are opened.
1291
for a_vfs in self._immediate_fallback_vfs:
1292
all_fallbacks.append(a_vfs)
1293
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1294
return all_fallbacks
1297
class ThunkedVersionedFiles(VersionedFiles):
1298
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1300
This object allows a single keyspace for accessing the history graph and
1301
contents of named bytestrings.
1303
Currently no implementation allows the graph of different key prefixes to
1304
intersect, but the API does allow such implementations in the future.
1307
def __init__(self, transport, file_factory, mapper, is_locked):
1308
"""Create a ThunkedVersionedFiles."""
1309
self._transport = transport
1310
self._file_factory = file_factory
1311
self._mapper = mapper
1312
self._is_locked = is_locked
1314
def add_content(self, factory, parent_texts=None,
1315
left_matching_blocks=None, nostore_sha=None, random_id=False):
1316
"""See VersionedFiles.add_content()."""
1317
lines = factory.get_bytes_as('lines')
1318
return self.add_lines(
1319
factory.key, factory.parents, lines,
1320
parent_texts=parent_texts,
1321
left_matching_blocks=left_matching_blocks,
1322
nostore_sha=nostore_sha,
1323
random_id=random_id,
1326
def add_lines(self, key, parents, lines, parent_texts=None,
1327
left_matching_blocks=None, nostore_sha=None, random_id=False,
1328
check_content=True):
1329
"""See VersionedFiles.add_lines()."""
1330
path = self._mapper.map(key)
1331
version_id = key[-1]
1332
parents = [parent[-1] for parent in parents]
1333
vf = self._get_vf(path)
1336
return vf.add_lines_with_ghosts(version_id, parents, lines,
1337
parent_texts=parent_texts,
1338
left_matching_blocks=left_matching_blocks,
1339
nostore_sha=nostore_sha, random_id=random_id,
1340
check_content=check_content)
1341
except NotImplementedError:
1342
return vf.add_lines(version_id, parents, lines,
1343
parent_texts=parent_texts,
1344
left_matching_blocks=left_matching_blocks,
1345
nostore_sha=nostore_sha, random_id=random_id,
1346
check_content=check_content)
1347
except errors.NoSuchFile:
1348
# parent directory may be missing, try again.
1349
self._transport.mkdir(osutils.dirname(path))
1351
return vf.add_lines_with_ghosts(version_id, parents, lines,
1352
parent_texts=parent_texts,
1353
left_matching_blocks=left_matching_blocks,
1354
nostore_sha=nostore_sha, random_id=random_id,
1355
check_content=check_content)
1356
except NotImplementedError:
1357
return vf.add_lines(version_id, parents, lines,
1358
parent_texts=parent_texts,
1359
left_matching_blocks=left_matching_blocks,
1360
nostore_sha=nostore_sha, random_id=random_id,
1361
check_content=check_content)
1363
def annotate(self, key):
1364
"""Return a list of (version-key, line) tuples for the text of key.
1366
:raise RevisionNotPresent: If the key is not present.
1369
path = self._mapper.map(prefix)
1370
vf = self._get_vf(path)
1371
origins = vf.annotate(key[-1])
1373
for origin, line in origins:
1374
result.append((prefix + (origin,), line))
1377
def check(self, progress_bar=None, keys=None):
1378
"""See VersionedFiles.check()."""
1379
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1380
# this is tolerable. Ideally we'd pass keys down to check() and
1381
# have the older VersiondFile interface updated too.
1382
for prefix, vf in self._iter_all_components():
1384
if keys is not None:
1385
return self.get_record_stream(keys, 'unordered', True)
1387
def get_parent_map(self, keys):
1388
"""Get a map of the parents of keys.
1390
:param keys: The keys to look up parents for.
1391
:return: A mapping from keys to parents. Absent keys are absent from
1394
prefixes = self._partition_keys(keys)
1396
for prefix, suffixes in viewitems(prefixes):
1397
path = self._mapper.map(prefix)
1398
vf = self._get_vf(path)
1399
parent_map = vf.get_parent_map(suffixes)
1400
for key, parents in viewitems(parent_map):
1401
result[prefix + (key,)] = tuple(
1402
prefix + (parent,) for parent in parents)
1405
def _get_vf(self, path):
1406
if not self._is_locked():
1407
raise errors.ObjectNotLocked(self)
1408
return self._file_factory(path, self._transport, create=True,
1409
get_scope=lambda: None)
1411
def _partition_keys(self, keys):
1412
"""Turn keys into a dict of prefix:suffix_list."""
1415
prefix_keys = result.setdefault(key[:-1], [])
1416
prefix_keys.append(key[-1])
1419
def _iter_all_prefixes(self):
1420
# Identify all key prefixes.
1421
# XXX: A bit hacky, needs polish.
1422
if isinstance(self._mapper, ConstantMapper):
1423
paths = [self._mapper.map(())]
1427
for quoted_relpath in self._transport.iter_files_recursive():
1428
path, ext = os.path.splitext(quoted_relpath)
1430
paths = list(relpaths)
1431
prefixes = [self._mapper.unmap(path) for path in paths]
1432
return zip(paths, prefixes)
1434
def get_record_stream(self, keys, ordering, include_delta_closure):
1435
"""See VersionedFiles.get_record_stream()."""
1436
# Ordering will be taken care of by each partitioned store; group keys
1439
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1440
suffixes = [(suffix,) for suffix in suffixes]
1441
for record in vf.get_record_stream(suffixes, ordering,
1442
include_delta_closure):
1443
if record.parents is not None:
1444
record.parents = tuple(
1445
prefix + parent for parent in record.parents)
1446
record.key = prefix + record.key
1449
def _iter_keys_vf(self, keys):
1450
prefixes = self._partition_keys(keys)
1452
for prefix, suffixes in viewitems(prefixes):
1453
path = self._mapper.map(prefix)
1454
vf = self._get_vf(path)
1455
yield prefix, suffixes, vf
1457
def get_sha1s(self, keys):
1458
"""See VersionedFiles.get_sha1s()."""
1460
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1461
vf_sha1s = vf.get_sha1s(suffixes)
1462
for suffix, sha1 in viewitems(vf_sha1s):
1463
sha1s[prefix + (suffix,)] = sha1
1466
def insert_record_stream(self, stream):
1467
"""Insert a record stream into this container.
1469
:param stream: A stream of records to insert.
1471
:seealso VersionedFile.get_record_stream:
1473
for record in stream:
1474
prefix = record.key[:-1]
1475
key = record.key[-1:]
1476
if record.parents is not None:
1477
parents = [parent[-1:] for parent in record.parents]
1480
thunk_record = AdapterFactory(key, parents, record)
1481
path = self._mapper.map(prefix)
1482
# Note that this parses the file many times; we can do better but
1483
# as this only impacts weaves in terms of performance, it is
1485
vf = self._get_vf(path)
1486
vf.insert_record_stream([thunk_record])
1488
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1489
"""Iterate over the lines in the versioned files from keys.
1491
This may return lines from other keys. Each item the returned
1492
iterator yields is a tuple of a line and a text version that that line
1493
is present in (not introduced in).
1495
Ordering of results is in whatever order is most suitable for the
1496
underlying storage format.
1498
If a progress bar is supplied, it may be used to indicate progress.
1499
The caller is responsible for cleaning up progress bars (because this
1503
* Lines are normalised by the underlying store: they will all have \n
1505
* Lines are returned in arbitrary order.
1507
:return: An iterator over (line, key).
1509
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1510
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1511
yield line, prefix + (version,)
1513
def _iter_all_components(self):
1514
for path, prefix in self._iter_all_prefixes():
1515
yield prefix, self._get_vf(path)
1518
"""See VersionedFiles.keys()."""
1520
for prefix, vf in self._iter_all_components():
1521
for suffix in vf.versions():
1522
result.add(prefix + (suffix,))
1526
class VersionedFilesWithFallbacks(VersionedFiles):
1528
def without_fallbacks(self):
1529
"""Return a clone of this object without any fallbacks configured."""
1530
raise NotImplementedError(self.without_fallbacks)
1532
def add_fallback_versioned_files(self, a_versioned_files):
1533
"""Add a source of texts for texts not present in this knit.
1535
:param a_versioned_files: A VersionedFiles object.
1537
raise NotImplementedError(self.add_fallback_versioned_files)
1539
def get_known_graph_ancestry(self, keys):
1540
"""Get a KnownGraph instance with the ancestry of keys."""
1541
parent_map, missing_keys = self._index.find_ancestry(keys)
1542
for fallback in self._transitive_fallbacks():
1543
if not missing_keys:
1545
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1547
parent_map.update(f_parent_map)
1548
missing_keys = f_missing_keys
1549
kg = _mod_graph.KnownGraph(parent_map)
1553
class _PlanMergeVersionedFile(VersionedFiles):
1554
"""A VersionedFile for uncommitted and committed texts.
1556
It is intended to allow merges to be planned with working tree texts.
1557
It implements only the small part of the VersionedFiles interface used by
1558
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1559
_PlanMergeVersionedFile itself.
1561
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1562
queried for missing texts.
1565
def __init__(self, file_id):
1566
"""Create a _PlanMergeVersionedFile.
1568
:param file_id: Used with _PlanMerge code which is not yet fully
1569
tuple-keyspace aware.
1571
self._file_id = file_id
1572
# fallback locations
1573
self.fallback_versionedfiles = []
1574
# Parents for locally held keys.
1576
# line data for locally held keys.
1578
# key lookup providers
1579
self._providers = [_mod_graph.DictParentsProvider(self._parents)]
1581
def plan_merge(self, ver_a, ver_b, base=None):
1582
"""See VersionedFile.plan_merge"""
1583
from ..merge import _PlanMerge
1585
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1586
old_plan = list(_PlanMerge(ver_a, base, self,
1587
(self._file_id,)).plan_merge())
1588
new_plan = list(_PlanMerge(ver_a, ver_b, self,
1589
(self._file_id,)).plan_merge())
1590
return _PlanMerge._subtract_plans(old_plan, new_plan)
1592
def plan_lca_merge(self, ver_a, ver_b, base=None):
1593
from ..merge import _PlanLCAMerge
1594
graph = _mod_graph.Graph(self)
1595
new_plan = _PlanLCAMerge(
1596
ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1599
old_plan = _PlanLCAMerge(
1600
ver_a, base, self, (self._file_id,), graph).plan_merge()
1601
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1603
def add_content(self, factory):
1604
return self.add_lines(
1605
factory.key, factory.parents, factory.get_bytes_as('lines'))
1607
def add_lines(self, key, parents, lines):
1608
"""See VersionedFiles.add_lines
1610
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1611
are permitted. Only reserved ids are permitted.
1613
if not isinstance(key, tuple):
1614
raise TypeError(key)
1615
if not revision.is_reserved_id(key[-1]):
1616
raise ValueError('Only reserved ids may be used')
1618
raise ValueError('Parents may not be None')
1620
raise ValueError('Lines may not be None')
1621
self._parents[key] = tuple(parents)
1622
self._lines[key] = lines
1624
def get_record_stream(self, keys, ordering, include_delta_closure):
1627
if key in self._lines:
1628
lines = self._lines[key]
1629
parents = self._parents[key]
1631
yield ChunkedContentFactory(
1632
key, parents, None, lines,
1633
chunks_are_lines=True)
1634
for versionedfile in self.fallback_versionedfiles:
1635
for record in versionedfile.get_record_stream(
1636
pending, 'unordered', True):
1637
if record.storage_kind == 'absent':
1640
pending.remove(record.key)
1644
# report absent entries
1646
yield AbsentContentFactory(key)
1648
def get_parent_map(self, keys):
1649
"""See VersionedFiles.get_parent_map"""
1650
# We create a new provider because a fallback may have been added.
1651
# If we make fallbacks private we can update a stack list and avoid
1652
# object creation thrashing.
1655
if revision.NULL_REVISION in keys:
1656
keys.remove(revision.NULL_REVISION)
1657
result[revision.NULL_REVISION] = ()
1658
self._providers = self._providers[:1] + self.fallback_versionedfiles
1660
_mod_graph.StackedParentsProvider(
1661
self._providers).get_parent_map(keys))
1662
for key, parents in viewitems(result):
1664
result[key] = (revision.NULL_REVISION,)
1668
443
class PlanWeaveMerge(TextMerge):
1669
444
"""Weave merge that takes a plan as its input.
1671
446
This exists so that VersionedFile.plan_merge is implementable.
1672
447
Most callers will want to use WeaveMerge instead.
1722
497
elif state == 'new-b':
1724
499
lines_b.append(line)
1725
elif state == 'conflicted-a':
1727
lines_a.append(line)
1728
elif state == 'conflicted-b':
1730
lines_b.append(line)
1731
elif state == 'killed-both':
1732
# This counts as a change, even though there is no associated
1736
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1738
raise AssertionError(state)
501
assert state in ('irrelevant', 'ghost-a', 'ghost-b',
502
'killed-base', 'killed-both'), state
1739
503
for struct in outstanding_struct():
1742
def base_from_plan(self):
1743
"""Construct a BASE file from the plan text."""
1745
for state, line in self.plan:
1746
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1747
# If unchanged, then this line is straight from base. If a or b
1748
# or both killed the line, then it *used* to be in base.
1749
base_lines.append(line)
1751
if state not in ('killed-base', 'irrelevant',
1752
'ghost-a', 'ghost-b',
1754
'conflicted-a', 'conflicted-b'):
1755
# killed-base, irrelevant means it doesn't apply
1756
# ghost-a/ghost-b are harder to say for sure, but they
1757
# aren't in the 'inc_c' which means they aren't in the
1758
# shared base of a & b. So we don't include them. And
1759
# obviously if the line is newly inserted, it isn't in base
1761
# If 'conflicted-a' or b, then it is new vs one base, but
1762
# old versus another base. However, if we make it present
1763
# in the base, it will be deleted from the target, and it
1764
# seems better to get a line doubled in the merge result,
1765
# rather than have it deleted entirely.
1766
# Example, each node is the 'text' at that point:
1774
# There was a criss-cross conflict merge. Both sides
1775
# include the other, but put themselves first.
1776
# Weave marks this as a 'clean' merge, picking OTHER over
1777
# THIS. (Though the details depend on order inserted into
1779
# LCA generates a plan:
1780
# [('unchanged', M),
1781
# ('conflicted-b', b),
1783
# ('conflicted-a', b),
1785
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1786
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1787
# removes one 'b', and BASE vs OTHER removes the other)
1788
# If you include neither, 3-way creates a clean "MbabN" as
1789
# THIS adds one 'b', and OTHER does too.
1790
# It seems that having the line 2 times is better than
1791
# having it omitted. (Easier to manually delete than notice
1792
# it needs to be added.)
1793
raise AssertionError('Unknown state: %s' % (state,))
1797
507
class WeaveMerge(PlanWeaveMerge):
1798
"""Weave merge that takes a VersionedFile and two versions as its input."""
508
"""Weave merge that takes a VersionedFile and two versions as its input"""
1800
def __init__(self, versionedfile, ver_a, ver_b,
1801
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
510
def __init__(self, versionedfile, ver_a, ver_b,
511
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1802
512
plan = versionedfile.plan_merge(ver_a, ver_b)
1803
513
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1806
class VirtualVersionedFiles(VersionedFiles):
1807
"""Dummy implementation for VersionedFiles that uses other functions for
1808
obtaining fulltexts and parent maps.
1810
This is always on the bottom of the stack and uses string keys
1811
(rather than tuples) internally.
1814
def __init__(self, get_parent_map, get_lines):
1815
"""Create a VirtualVersionedFiles.
1817
:param get_parent_map: Same signature as Repository.get_parent_map.
1818
:param get_lines: Should return lines for specified key or None if
1821
super(VirtualVersionedFiles, self).__init__()
1822
self._get_parent_map = get_parent_map
1823
self._get_lines = get_lines
1825
def check(self, progressbar=None):
1826
"""See VersionedFiles.check.
1828
:note: Always returns True for VirtualVersionedFiles.
1832
def add_mpdiffs(self, records):
1833
"""See VersionedFiles.mpdiffs.
1835
:note: Not implemented for VirtualVersionedFiles.
1837
raise NotImplementedError(self.add_mpdiffs)
1839
def get_parent_map(self, keys):
1840
"""See VersionedFiles.get_parent_map."""
1841
parent_view = viewitems(self._get_parent_map(k for (k,) in keys))
1842
return dict(((k,), tuple((p,) for p in v)) for k, v in parent_view)
1844
def get_sha1s(self, keys):
1845
"""See VersionedFiles.get_sha1s."""
1848
lines = self._get_lines(k)
1849
if lines is not None:
1850
if not isinstance(lines, list):
1851
raise AssertionError
1852
ret[(k,)] = osutils.sha_strings(lines)
1855
def get_record_stream(self, keys, ordering, include_delta_closure):
1856
"""See VersionedFiles.get_record_stream."""
1857
for (k,) in list(keys):
1858
lines = self._get_lines(k)
1859
if lines is not None:
1860
if not isinstance(lines, list):
1861
raise AssertionError
1862
yield ChunkedContentFactory(
1863
(k,), None, sha1=osutils.sha_strings(lines),
1864
chunks=lines, chunks_are_lines=True)
1866
yield AbsentContentFactory((k,))
1868
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1869
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1870
for i, (key,) in enumerate(keys):
1872
pb.update("Finding changed lines", i, len(keys))
1873
for l in self._get_lines(key):
1877
class NoDupeAddLinesDecorator(object):
1878
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1882
def __init__(self, store):
1885
def add_lines(self, key, parents, lines, parent_texts=None,
1886
left_matching_blocks=None, nostore_sha=None, random_id=False,
1887
check_content=True):
1888
"""See VersionedFiles.add_lines.
1890
This implementation may return None as the third element of the return
1891
value when the original store wouldn't.
1894
raise NotImplementedError(
1895
"NoDupeAddLinesDecorator.add_lines does not implement the "
1896
"nostore_sha behaviour.")
1898
sha1 = osutils.sha_strings(lines)
1899
key = (b"sha1:" + sha1,)
1902
if key in self._store.get_parent_map([key]):
1903
# This key has already been inserted, so don't do it again.
1905
sha1 = osutils.sha_strings(lines)
1906
return sha1, sum(map(len, lines)), None
1907
return self._store.add_lines(key, parents, lines,
1908
parent_texts=parent_texts,
1909
left_matching_blocks=left_matching_blocks,
1910
nostore_sha=nostore_sha, random_id=random_id,
1911
check_content=check_content)
1913
def __getattr__(self, name):
1914
return getattr(self._store, name)
1917
def network_bytes_to_kind_and_offset(network_bytes):
1918
"""Strip of a record kind from the front of network_bytes.
1920
:param network_bytes: The bytes of a record.
1921
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1923
line_end = network_bytes.find(b'\n')
1924
storage_kind = network_bytes[:line_end].decode('ascii')
1925
return storage_kind, line_end + 1
1928
class NetworkRecordStream(object):
1929
"""A record_stream which reconstitures a serialised stream."""
1931
def __init__(self, bytes_iterator):
1932
"""Create a NetworkRecordStream.
1934
:param bytes_iterator: An iterator of bytes. Each item in this
1935
iterator should have been obtained from a record_streams'
1936
record.get_bytes_as(record.storage_kind) call.
1938
self._bytes_iterator = bytes_iterator
1939
self._kind_factory = {
1940
'fulltext': fulltext_network_to_record,
1941
'groupcompress-block': groupcompress.network_block_to_records,
1942
'knit-ft-gz': knit.knit_network_to_record,
1943
'knit-delta-gz': knit.knit_network_to_record,
1944
'knit-annotated-ft-gz': knit.knit_network_to_record,
1945
'knit-annotated-delta-gz': knit.knit_network_to_record,
1946
'knit-delta-closure': knit.knit_delta_closure_to_records,
1952
:return: An iterator as per VersionedFiles.get_record_stream().
1954
for bytes in self._bytes_iterator:
1955
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1956
for record in self._kind_factory[storage_kind](
1957
storage_kind, bytes, line_end):
1961
def fulltext_network_to_record(kind, bytes, line_end):
1962
"""Convert a network fulltext record to record."""
1963
meta_len, = struct.unpack('!L', bytes[line_end:line_end + 4])
1964
record_meta = bytes[line_end + 4:line_end + 4 + meta_len]
1965
key, parents = bencode.bdecode_as_tuple(record_meta)
1966
if parents == b'nil':
1968
fulltext = bytes[line_end + 4 + meta_len:]
1969
return [FulltextContentFactory(key, parents, None, fulltext)]
1972
def _length_prefix(bytes):
1973
return struct.pack('!L', len(bytes))
1976
def record_to_fulltext_bytes(record):
1977
if record.parents is None:
1980
parents = record.parents
1981
record_meta = bencode.bencode((record.key, parents))
1982
record_content = record.get_bytes_as('fulltext')
1983
return b"fulltext\n%s%s%s" % (
1984
_length_prefix(record_meta), record_meta, record_content)
1987
def sort_groupcompress(parent_map):
1988
"""Sort and group the keys in parent_map into groupcompress order.
1990
groupcompress is defined (currently) as reverse-topological order, grouped
1993
:return: A sorted-list of keys
1995
# gc-optimal ordering is approximately reverse topological,
1996
# properly grouped by file-id.
1998
for item in viewitems(parent_map):
2000
if isinstance(key, bytes) or len(key) == 1:
2005
per_prefix_map[prefix].append(item)
2007
per_prefix_map[prefix] = [item]
2010
for prefix in sorted(per_prefix_map):
2011
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
2015
class _KeyRefs(object):
2017
def __init__(self, track_new_keys=False):
2018
# dict mapping 'key' to 'set of keys referring to that key'
2021
# set remembering all new keys
2022
self.new_keys = set()
2024
self.new_keys = None
2030
self.new_keys.clear()
2032
def add_references(self, key, refs):
2033
# Record the new references
2034
for referenced in refs:
2036
needed_by = self.refs[referenced]
2038
needed_by = self.refs[referenced] = set()
2040
# Discard references satisfied by the new key
2043
def get_new_keys(self):
2044
return self.new_keys
2046
def get_unsatisfied_refs(self):
2047
return self.refs.keys()
2049
def _satisfy_refs_for_key(self, key):
2053
# No keys depended on this key. That's ok.
2056
def add_key(self, key):
2057
# satisfy refs for key, and remember that we've seen this key.
2058
self._satisfy_refs_for_key(key)
2059
if self.new_keys is not None:
2060
self.new_keys.add(key)
2062
def satisfy_refs_for_keys(self, keys):
2064
self._satisfy_refs_for_key(key)
2066
def get_referrers(self):
2067
return set(itertools.chain.from_iterable(viewvalues(self.refs)))
516
class InterVersionedFile(InterObject):
517
"""This class represents operations taking place between two versionedfiles..
519
Its instances have methods like join, and contain
520
references to the source and target versionedfiles these operations can be
523
Often we will provide convenience methods on 'versionedfile' which carry out
524
operations with another versionedfile - they will always forward to
525
InterVersionedFile.get(other).method_name(parameters).
529
"""The available optimised InterVersionedFile types."""
531
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
532
"""Integrate versions from self.source into self.target.
534
If version_ids is None all versions from source should be
535
incorporated into this versioned file.
537
Must raise RevisionNotPresent if any of the specified versions
538
are not present in the other files history unless ignore_missing is
539
supplied when they are silently skipped.
542
# - if the target is empty, just add all the versions from
543
# source to target, otherwise:
544
# - make a temporary versioned file of type target
545
# - insert the source content into it one at a time
547
if not self.target.versions():
550
# Make a new target-format versioned file.
551
temp_source = self.target.create_empty("temp", MemoryTransport())
553
graph = self.source.get_graph()
554
order = topo_sort(graph.items())
555
pb = ui.ui_factory.nested_progress_bar()
558
# TODO for incremental cross-format work:
559
# make a versioned file with the following content:
560
# all revisions we have been asked to join
561
# all their ancestors that are *not* in target already.
562
# the immediate parents of the above two sets, with
563
# empty parent lists - these versions are in target already
564
# and the incorrect version data will be ignored.
565
# TODO: for all ancestors that are present in target already,
566
# check them for consistent data, this requires moving sha1 from
568
# TODO: remove parent texts when they are not relevant any more for
569
# memory pressure reduction. RBC 20060313
570
# pb.update('Converting versioned data', 0, len(order))
571
# deltas = self.source.get_deltas(order)
572
for index, version in enumerate(order):
573
pb.update('Converting versioned data', index, len(order))
574
parent_text = target.add_lines(version,
575
self.source.get_parents(version),
576
self.source.get_lines(version),
577
parent_texts=parent_texts)
578
parent_texts[version] = parent_text
579
#delta_parent, sha1, noeol, delta = deltas[version]
580
#target.add_delta(version,
581
# self.source.get_parents(version),
586
#target.get_lines(version)
588
# this should hit the native code path for target
589
if target is not self.target:
590
return self.target.join(temp_source,
599
class InterVersionedFileTestProviderAdapter(object):
600
"""A tool to generate a suite testing multiple inter versioned-file classes.
602
This is done by copying the test once for each interversionedfile provider
603
and injecting the transport_server, transport_readonly_server,
604
versionedfile_factory and versionedfile_factory_to classes into each copy.
605
Each copy is also given a new id() to make it easy to identify.
608
def __init__(self, transport_server, transport_readonly_server, formats):
609
self._transport_server = transport_server
610
self._transport_readonly_server = transport_readonly_server
611
self._formats = formats
613
def adapt(self, test):
615
for (interversionedfile_class,
616
versionedfile_factory,
617
versionedfile_factory_to) in self._formats:
618
new_test = deepcopy(test)
619
new_test.transport_server = self._transport_server
620
new_test.transport_readonly_server = self._transport_readonly_server
621
new_test.interversionedfile_class = interversionedfile_class
622
new_test.versionedfile_factory = versionedfile_factory
623
new_test.versionedfile_factory_to = versionedfile_factory_to
624
def make_new_test_id():
625
new_id = "%s(%s)" % (new_test.id(), interversionedfile_class.__name__)
626
return lambda: new_id
627
new_test.id = make_new_test_id()
628
result.addTest(new_test)
632
def default_test_list():
633
"""Generate the default list of interversionedfile permutations to test."""
634
from bzrlib.weave import WeaveFile
635
from bzrlib.knit import KnitVersionedFile
637
# test the fallback InterVersionedFile from weave to annotated knits
638
result.append((InterVersionedFile,
641
for optimiser in InterVersionedFile._optimisers:
642
result.append((optimiser,
643
optimiser._matching_file_factory,
644
optimiser._matching_file_factory
646
# if there are specific combinations we want to use, we can add them