1
# Copyright (C) 2005, 2006 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
"""Versioned text file storage api."""
22
from cStringIO import StringIO
24
from zlib import adler32
26
from bzrlib.lazy_import import lazy_import
27
lazy_import(globals(), """
37
from bzrlib.graph import Graph
38
from bzrlib.transport.memory import MemoryTransport
40
from bzrlib.inter import InterObject
41
from bzrlib.registry import Registry
42
from bzrlib.symbol_versioning import *
43
from bzrlib.textmerge import TextMerge
46
adapter_registry = Registry()
47
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
48
'DeltaPlainToFullText')
49
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
51
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
52
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
53
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
54
'bzrlib.knit', 'DeltaAnnotatedToFullText')
55
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
56
'bzrlib.knit', 'FTAnnotatedToUnannotated')
57
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
58
'bzrlib.knit', 'FTAnnotatedToFullText')
61
class ContentFactory(object):
62
"""Abstract interface for insertion and retrieval from a VersionedFile.
64
:ivar sha1: None, or the sha1 of the content fulltext.
65
:ivar storage_kind: The native storage kind of this factory. One of
66
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
67
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
68
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
69
:ivar key: The key of this content. Each key is a tuple with a single
71
:ivar parents: A tuple of parent keys for self.key. If the object has
72
no parent information, None (as opposed to () for an empty list of
77
"""Create a ContentFactory."""
79
self.storage_kind = None
84
class AbsentContentFactory(object):
85
"""A placeholder content factory for unavailable texts.
88
:ivar storage_kind: 'absent'.
89
:ivar key: The key of this content. Each key is a tuple with a single
94
def __init__(self, key):
95
"""Create a ContentFactory."""
97
self.storage_kind = 'absent'
102
def filter_absent(record_stream):
103
"""Adapt a record stream to remove absent records."""
104
for record in record_stream:
105
if record.storage_kind != 'absent':
109
class VersionedFile(object):
110
"""Versioned text file storage.
112
A versioned file manages versions of line-based text files,
113
keeping track of the originating version for each line.
115
To clients the "lines" of the file are represented as a list of
116
strings. These strings will typically have terminal newline
117
characters, but this is not required. In particular files commonly
118
do not have a newline at the end of the file.
120
Texts are identified by a version-id string.
124
def check_not_reserved_id(version_id):
125
revision.check_not_reserved_id(version_id)
127
def copy_to(self, name, transport):
128
"""Copy this versioned file to name on transport."""
129
raise NotImplementedError(self.copy_to)
131
def get_record_stream(self, versions, ordering, include_delta_closure):
132
"""Get a stream of records for versions.
134
:param versions: The versions to include. Each version is a tuple
136
:param ordering: Either 'unordered' or 'topological'. A topologically
137
sorted stream has compression parents strictly before their
139
:param include_delta_closure: If True then the closure across any
140
compression parents will be included (in the data content of the
141
stream, not in the emitted records). This guarantees that
142
'fulltext' can be used successfully on every record.
143
:return: An iterator of ContentFactory objects, each of which is only
144
valid until the iterator is advanced.
146
raise NotImplementedError(self.get_record_stream)
148
def has_version(self, version_id):
149
"""Returns whether version is present."""
150
raise NotImplementedError(self.has_version)
152
def insert_record_stream(self, stream):
153
"""Insert a record stream into this versioned file.
155
:param stream: A stream of records to insert.
157
:seealso VersionedFile.get_record_stream:
159
raise NotImplementedError
161
def add_lines(self, version_id, parents, lines, parent_texts=None,
162
left_matching_blocks=None, nostore_sha=None, random_id=False,
164
"""Add a single text on top of the versioned file.
166
Must raise RevisionAlreadyPresent if the new version is
167
already present in file history.
169
Must raise RevisionNotPresent if any of the given parents are
170
not present in file history.
172
:param lines: A list of lines. Each line must be a bytestring. And all
173
of them except the last must be terminated with \n and contain no
174
other \n's. The last line may either contain no \n's or a single
175
terminated \n. If the lines list does meet this constraint the add
176
routine may error or may succeed - but you will be unable to read
177
the data back accurately. (Checking the lines have been split
178
correctly is expensive and extremely unlikely to catch bugs so it
179
is not done at runtime unless check_content is True.)
180
:param parent_texts: An optional dictionary containing the opaque
181
representations of some or all of the parents of version_id to
182
allow delta optimisations. VERY IMPORTANT: the texts must be those
183
returned by add_lines or data corruption can be caused.
184
:param left_matching_blocks: a hint about which areas are common
185
between the text and its left-hand-parent. The format is
186
the SequenceMatcher.get_matching_blocks format.
187
:param nostore_sha: Raise ExistingContent and do not add the lines to
188
the versioned file if the digest of the lines matches this.
189
:param random_id: If True a random id has been selected rather than
190
an id determined by some deterministic process such as a converter
191
from a foreign VCS. When True the backend may choose not to check
192
for uniqueness of the resulting key within the versioned file, so
193
this should only be done when the result is expected to be unique
195
:param check_content: If True, the lines supplied are verified to be
196
bytestrings that are correctly formed lines.
197
:return: The text sha1, the number of bytes in the text, and an opaque
198
representation of the inserted version which can be provided
199
back to future add_lines calls in the parent_texts dictionary.
201
self._check_write_ok()
202
return self._add_lines(version_id, parents, lines, parent_texts,
203
left_matching_blocks, nostore_sha, random_id, check_content)
205
def _add_lines(self, version_id, parents, lines, parent_texts,
206
left_matching_blocks, nostore_sha, random_id, check_content):
207
"""Helper to do the class specific add_lines."""
208
raise NotImplementedError(self.add_lines)
210
def add_lines_with_ghosts(self, version_id, parents, lines,
211
parent_texts=None, nostore_sha=None, random_id=False,
212
check_content=True, left_matching_blocks=None):
213
"""Add lines to the versioned file, allowing ghosts to be present.
215
This takes the same parameters as add_lines and returns the same.
217
self._check_write_ok()
218
return self._add_lines_with_ghosts(version_id, parents, lines,
219
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
221
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
222
nostore_sha, random_id, check_content, left_matching_blocks):
223
"""Helper to do class specific add_lines_with_ghosts."""
224
raise NotImplementedError(self.add_lines_with_ghosts)
226
def check(self, progress_bar=None):
227
"""Check the versioned file for integrity."""
228
raise NotImplementedError(self.check)
230
def _check_lines_not_unicode(self, lines):
231
"""Check that lines being added to a versioned file are not unicode."""
233
if line.__class__ is not str:
234
raise errors.BzrBadParameterUnicode("lines")
236
def _check_lines_are_lines(self, lines):
237
"""Check that the lines really are full lines without inline EOL."""
239
if '\n' in line[:-1]:
240
raise errors.BzrBadParameterContainsNewline("lines")
242
def get_format_signature(self):
243
"""Get a text description of the data encoding in this file.
247
raise NotImplementedError(self.get_format_signature)
249
def make_mpdiffs(self, version_ids):
250
"""Create multiparent diffs for specified versions."""
251
knit_versions = set()
252
knit_versions.update(version_ids)
253
parent_map = self.get_parent_map(version_ids)
254
for version_id in version_ids:
256
knit_versions.update(parent_map[version_id])
258
raise RevisionNotPresent(version_id, self)
259
# We need to filter out ghosts, because we can't diff against them.
260
knit_versions = set(self.get_parent_map(knit_versions).keys())
261
lines = dict(zip(knit_versions,
262
self._get_lf_split_line_list(knit_versions)))
264
for version_id in version_ids:
265
target = lines[version_id]
267
parents = [lines[p] for p in parent_map[version_id] if p in
270
raise RevisionNotPresent(version_id, self)
272
left_parent_blocks = self._extract_blocks(version_id,
275
left_parent_blocks = None
276
diffs.append(multiparent.MultiParent.from_lines(target, parents,
280
def _extract_blocks(self, version_id, source, target):
283
def add_mpdiffs(self, records):
284
"""Add mpdiffs to this VersionedFile.
286
Records should be iterables of version, parents, expected_sha1,
287
mpdiff. mpdiff should be a MultiParent instance.
289
# Does this need to call self._check_write_ok()? (IanC 20070919)
291
mpvf = multiparent.MultiMemoryVersionedFile()
293
for version, parent_ids, expected_sha1, mpdiff in records:
294
versions.append(version)
295
mpvf.add_diff(mpdiff, version, parent_ids)
296
needed_parents = set()
297
for version, parent_ids, expected_sha1, mpdiff in records:
298
needed_parents.update(p for p in parent_ids
299
if not mpvf.has_version(p))
300
present_parents = set(self.get_parent_map(needed_parents).keys())
301
for parent_id, lines in zip(present_parents,
302
self._get_lf_split_line_list(present_parents)):
303
mpvf.add_version(lines, parent_id, [])
304
for (version, parent_ids, expected_sha1, mpdiff), lines in\
305
zip(records, mpvf.get_line_list(versions)):
306
if len(parent_ids) == 1:
307
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
308
mpvf.get_diff(parent_ids[0]).num_lines()))
310
left_matching_blocks = None
312
_, _, version_text = self.add_lines_with_ghosts(version,
313
parent_ids, lines, vf_parents,
314
left_matching_blocks=left_matching_blocks)
315
except NotImplementedError:
316
# The vf can't handle ghosts, so add lines normally, which will
317
# (reasonably) fail if there are ghosts in the data.
318
_, _, version_text = self.add_lines(version,
319
parent_ids, lines, vf_parents,
320
left_matching_blocks=left_matching_blocks)
321
vf_parents[version] = version_text
322
for (version, parent_ids, expected_sha1, mpdiff), sha1 in\
323
zip(records, self.get_sha1s(versions)):
324
if expected_sha1 != sha1:
325
raise errors.VersionedFileInvalidChecksum(version)
327
def get_sha1s(self, version_ids):
328
"""Get the stored sha1 sums for the given revisions.
330
:param version_ids: The names of the versions to lookup
331
:return: a list of sha1s in order according to the version_ids
333
raise NotImplementedError(self.get_sha1s)
335
def get_text(self, version_id):
336
"""Return version contents as a text string.
338
Raises RevisionNotPresent if version is not present in
341
return ''.join(self.get_lines(version_id))
342
get_string = get_text
344
def get_texts(self, version_ids):
345
"""Return the texts of listed versions as a list of strings.
347
Raises RevisionNotPresent if version is not present in
350
return [''.join(self.get_lines(v)) for v in version_ids]
352
def get_lines(self, version_id):
353
"""Return version contents as a sequence of lines.
355
Raises RevisionNotPresent if version is not present in
358
raise NotImplementedError(self.get_lines)
360
def _get_lf_split_line_list(self, version_ids):
361
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
363
def get_ancestry(self, version_ids, topo_sorted=True):
364
"""Return a list of all ancestors of given version(s). This
365
will not include the null revision.
367
This list will not be topologically sorted if topo_sorted=False is
370
Must raise RevisionNotPresent if any of the given versions are
371
not present in file history."""
372
if isinstance(version_ids, basestring):
373
version_ids = [version_ids]
374
raise NotImplementedError(self.get_ancestry)
376
def get_ancestry_with_ghosts(self, version_ids):
377
"""Return a list of all ancestors of given version(s). This
378
will not include the null revision.
380
Must raise RevisionNotPresent if any of the given versions are
381
not present in file history.
383
Ghosts that are known about will be included in ancestry list,
384
but are not explicitly marked.
386
raise NotImplementedError(self.get_ancestry_with_ghosts)
388
def get_parent_map(self, version_ids):
389
"""Get a map of the parents of version_ids.
391
:param version_ids: The version ids to look up parents for.
392
:return: A mapping from version id to parents.
394
raise NotImplementedError(self.get_parent_map)
396
def get_parents_with_ghosts(self, version_id):
397
"""Return version names for parents of version_id.
399
Will raise RevisionNotPresent if version_id is not present
402
Ghosts that are known about will be included in the parent list,
403
but are not explicitly marked.
406
return list(self.get_parent_map([version_id])[version_id])
408
raise errors.RevisionNotPresent(version_id, self)
410
def annotate(self, version_id):
411
"""Return a list of (version-id, line) tuples for version_id.
413
:raise RevisionNotPresent: If the given version is
414
not present in file history.
416
raise NotImplementedError(self.annotate)
418
@deprecated_method(one_five)
419
def join(self, other, pb=None, msg=None, version_ids=None,
420
ignore_missing=False):
421
"""Integrate versions from other into this versioned file.
423
If version_ids is None all versions from other should be
424
incorporated into this versioned file.
426
Must raise RevisionNotPresent if any of the specified versions
427
are not present in the other file's history unless ignore_missing
428
is supplied in which case they are silently skipped.
430
self._check_write_ok()
431
return InterVersionedFile.get(other, self).join(
437
def iter_lines_added_or_present_in_versions(self, version_ids=None,
439
"""Iterate over the lines in the versioned file from version_ids.
441
This may return lines from other versions. Each item the returned
442
iterator yields is a tuple of a line and a text version that that line
443
is present in (not introduced in).
445
Ordering of results is in whatever order is most suitable for the
446
underlying storage format.
448
If a progress bar is supplied, it may be used to indicate progress.
449
The caller is responsible for cleaning up progress bars (because this
452
NOTES: Lines are normalised: they will all have \n terminators.
453
Lines are returned in arbitrary order.
455
:return: An iterator over (line, version_id).
457
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
459
def plan_merge(self, ver_a, ver_b):
460
"""Return pseudo-annotation indicating how the two versions merge.
462
This is computed between versions a and b and their common
465
Weave lines present in none of them are skipped entirely.
468
killed-base Dead in base revision
469
killed-both Killed in each revision
472
unchanged Alive in both a and b (possibly created in both)
475
ghost-a Killed in a, unborn in b
476
ghost-b Killed in b, unborn in a
477
irrelevant Not in either revision
479
raise NotImplementedError(VersionedFile.plan_merge)
481
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
482
b_marker=TextMerge.B_MARKER):
483
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
486
class RecordingVersionedFileDecorator(object):
487
"""A minimal versioned file that records calls made on it.
489
Only enough methods have been added to support tests using it to date.
491
:ivar calls: A list of the calls made; can be reset at any time by
495
def __init__(self, backing_vf):
496
"""Create a RecordingVersionedFileDecorator decorating backing_vf.
498
:param backing_vf: The versioned file to answer all methods.
500
self._backing_vf = backing_vf
503
def get_lines(self, version_ids):
504
self.calls.append(("get_lines", version_ids))
505
return self._backing_vf.get_lines(version_ids)
508
class _PlanMergeVersionedFile(object):
509
"""A VersionedFile for uncommitted and committed texts.
511
It is intended to allow merges to be planned with working tree texts.
512
It implements only the small part of the VersionedFile interface used by
513
PlanMerge. It falls back to multiple versionedfiles for data not stored in
514
_PlanMergeVersionedFile itself.
517
def __init__(self, file_id, fallback_versionedfiles=None):
520
:param file_id: Used when raising exceptions.
521
:param fallback_versionedfiles: If supplied, the set of fallbacks to
522
use. Otherwise, _PlanMergeVersionedFile.fallback_versionedfiles
523
can be appended to later.
525
self._file_id = file_id
526
if fallback_versionedfiles is None:
527
self.fallback_versionedfiles = []
529
self.fallback_versionedfiles = fallback_versionedfiles
533
def plan_merge(self, ver_a, ver_b, base=None):
534
"""See VersionedFile.plan_merge"""
535
from bzrlib.merge import _PlanMerge
537
return _PlanMerge(ver_a, ver_b, self).plan_merge()
538
old_plan = list(_PlanMerge(ver_a, base, self).plan_merge())
539
new_plan = list(_PlanMerge(ver_a, ver_b, self).plan_merge())
540
return _PlanMerge._subtract_plans(old_plan, new_plan)
542
def plan_lca_merge(self, ver_a, ver_b, base=None):
543
from bzrlib.merge import _PlanLCAMerge
544
graph = self._get_graph()
545
new_plan = _PlanLCAMerge(ver_a, ver_b, self, graph).plan_merge()
548
old_plan = _PlanLCAMerge(ver_a, base, self, graph).plan_merge()
549
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
551
def add_lines(self, version_id, parents, lines):
552
"""See VersionedFile.add_lines
554
Lines are added locally, not fallback versionedfiles. Also, ghosts are
555
permitted. Only reserved ids are permitted.
557
if not revision.is_reserved_id(version_id):
558
raise ValueError('Only reserved ids may be used')
560
raise ValueError('Parents may not be None')
562
raise ValueError('Lines may not be None')
563
self._parents[version_id] = tuple(parents)
564
self._lines[version_id] = lines
566
def get_lines(self, version_id):
567
"""See VersionedFile.get_ancestry"""
568
lines = self._lines.get(version_id)
569
if lines is not None:
571
for versionedfile in self.fallback_versionedfiles:
573
return versionedfile.get_lines(version_id)
574
except errors.RevisionNotPresent:
577
raise errors.RevisionNotPresent(version_id, self._file_id)
579
def get_ancestry(self, version_id, topo_sorted=False):
580
"""See VersionedFile.get_ancestry.
582
Note that this implementation assumes that if a VersionedFile can
583
answer get_ancestry at all, it can give an authoritative answer. In
584
fact, ghosts can invalidate this assumption. But it's good enough
585
99% of the time, and far cheaper/simpler.
587
Also note that the results of this version are never topologically
588
sorted, and are a set.
591
raise ValueError('This implementation does not provide sorting')
592
parents = self._parents.get(version_id)
594
for vf in self.fallback_versionedfiles:
596
return vf.get_ancestry(version_id, topo_sorted=False)
597
except errors.RevisionNotPresent:
600
raise errors.RevisionNotPresent(version_id, self._file_id)
601
ancestry = set([version_id])
602
for parent in parents:
603
ancestry.update(self.get_ancestry(parent, topo_sorted=False))
606
def get_parent_map(self, version_ids):
607
"""See VersionedFile.get_parent_map"""
609
pending = set(version_ids)
610
for key in version_ids:
612
result[key] = self._parents[key]
615
pending = pending - set(result.keys())
616
for versionedfile in self.fallback_versionedfiles:
617
parents = versionedfile.get_parent_map(pending)
618
result.update(parents)
619
pending = pending - set(parents.keys())
624
def _get_graph(self):
625
from bzrlib.graph import (
628
_StackedParentsProvider,
630
from bzrlib.repofmt.knitrepo import _KnitParentsProvider
631
parent_providers = [DictParentsProvider(self._parents)]
632
for vf in self.fallback_versionedfiles:
633
parent_providers.append(_KnitParentsProvider(vf))
634
return Graph(_StackedParentsProvider(parent_providers))
637
class PlanWeaveMerge(TextMerge):
638
"""Weave merge that takes a plan as its input.
640
This exists so that VersionedFile.plan_merge is implementable.
641
Most callers will want to use WeaveMerge instead.
644
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
645
b_marker=TextMerge.B_MARKER):
646
TextMerge.__init__(self, a_marker, b_marker)
649
def _merge_struct(self):
654
def outstanding_struct():
655
if not lines_a and not lines_b:
657
elif ch_a and not ch_b:
660
elif ch_b and not ch_a:
662
elif lines_a == lines_b:
665
yield (lines_a, lines_b)
667
# We previously considered either 'unchanged' or 'killed-both' lines
668
# to be possible places to resynchronize. However, assuming agreement
669
# on killed-both lines may be too aggressive. -- mbp 20060324
670
for state, line in self.plan:
671
if state == 'unchanged':
672
# resync and flush queued conflicts changes if any
673
for struct in outstanding_struct():
679
if state == 'unchanged':
682
elif state == 'killed-a':
685
elif state == 'killed-b':
688
elif state == 'new-a':
691
elif state == 'new-b':
694
elif state == 'conflicted-a':
697
elif state == 'conflicted-b':
701
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
702
'killed-base', 'killed-both'):
703
raise AssertionError(state)
704
for struct in outstanding_struct():
708
class WeaveMerge(PlanWeaveMerge):
709
"""Weave merge that takes a VersionedFile and two versions as its input."""
711
def __init__(self, versionedfile, ver_a, ver_b,
712
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
713
plan = versionedfile.plan_merge(ver_a, ver_b)
714
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
717
class InterVersionedFile(InterObject):
718
"""This class represents operations taking place between two VersionedFiles.
720
Its instances have methods like join, and contain
721
references to the source and target versionedfiles these operations can be
724
Often we will provide convenience methods on 'versionedfile' which carry out
725
operations with another versionedfile - they will always forward to
726
InterVersionedFile.get(other).method_name(parameters).
730
"""The available optimised InterVersionedFile types."""
732
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
733
"""Integrate versions from self.source into self.target.
735
If version_ids is None all versions from source should be
736
incorporated into this versioned file.
738
Must raise RevisionNotPresent if any of the specified versions
739
are not present in the other file's history unless ignore_missing is
740
supplied in which case they are silently skipped.
743
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
744
graph = Graph(self.source)
745
search = graph._make_breadth_first_searcher(version_ids)
746
transitive_ids = set()
747
map(transitive_ids.update, list(search))
748
parent_map = self.source.get_parent_map(transitive_ids)
749
order = tsort.topo_sort(parent_map.items())
750
pb = ui.ui_factory.nested_progress_bar()
753
# TODO for incremental cross-format work:
754
# make a versioned file with the following content:
755
# all revisions we have been asked to join
756
# all their ancestors that are *not* in target already.
757
# the immediate parents of the above two sets, with
758
# empty parent lists - these versions are in target already
759
# and the incorrect version data will be ignored.
760
# TODO: for all ancestors that are present in target already,
761
# check them for consistent data, this requires moving sha1 from
763
# TODO: remove parent texts when they are not relevant any more for
764
# memory pressure reduction. RBC 20060313
765
# pb.update('Converting versioned data', 0, len(order))
767
for index, version in enumerate(order):
768
pb.update('Converting versioned data', index, total)
769
if version in target:
771
_, _, parent_text = target.add_lines(version,
773
self.source.get_lines(version),
774
parent_texts=parent_texts)
775
parent_texts[version] = parent_text
780
def _get_source_version_ids(self, version_ids, ignore_missing):
781
"""Determine the version ids to be used from self.source.
783
:param version_ids: The caller-supplied version ids to check. (None
784
for all). If None is in version_ids, it is stripped.
785
:param ignore_missing: if True, remove missing ids from the version
786
list. If False, raise RevisionNotPresent on
787
a missing version id.
788
:return: A set of version ids.
790
if version_ids is None:
791
# None cannot be in source.versions
792
return set(self.source.versions())
795
return set(self.source.versions()).intersection(set(version_ids))
797
new_version_ids = set()
798
for version in version_ids:
801
if not self.source.has_version(version):
802
raise errors.RevisionNotPresent(version, str(self.source))
804
new_version_ids.add(version)
805
return new_version_ids
808
class KeyMapper(object):
809
"""KeyMappers map between keys and underlying paritioned storage."""
812
"""Map key to an underlying storage identifier.
814
:param key: A key tuple e.g. ('file-id', 'revision-id').
815
:return: An underlying storage identifier, specific to the partitioning
819
def unmap(self, partition_id):
820
"""Map a partitioned storage id back to a key prefix.
822
:param partition_id: The underlying partition id.
823
:return: As much of a key (or prefix) as is derivable from the parition
828
class ConstantMapper(KeyMapper):
829
"""A key mapper that maps to a constant result."""
831
def __init__(self, result):
832
"""Create a ConstantMapper which will return result for all maps."""
833
self._result = result
836
"""See KeyMapper.map()."""
840
class PrefixMapper(KeyMapper):
841
"""A key mapper that extracts the first component of a key."""
844
"""See KeyMapper.map()."""
847
def unmap(self, partition_id):
848
"""See KeyMapper.unmap()."""
849
return (partition_id,)
852
class HashPrefixMapper(KeyMapper):
853
"""A key mapper that combines the first component of a key with a hash."""
856
"""See KeyMapper.map()."""
857
prefix = self._escape(key[0])
858
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
860
def _escape(self, prefix):
861
"""No escaping needed here."""
864
def unmap(self, partition_id):
865
"""See KeyMapper.unmap()."""
866
return (self._unescape(osutils.basename(partition_id)),)
868
def _unescape(self, basename):
869
"""No unescaping needed for HashPrefixMapper."""
873
class HashEscapedPrefixMapper(HashPrefixMapper):
874
"""Combines the escaped first component of a key with a hash."""
876
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
878
def _escape(self, prefix):
879
"""Turn a key element into a filesystem safe string.
881
This is similar to a plain urllib.quote, except
882
it uses specific safe characters, so that it doesn't
883
have to translate a lot of valid file ids.
885
# @ does not get escaped. This is because it is a valid
886
# filesystem character we use all the time, and it looks
887
# a lot better than seeing %40 all the time.
888
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
892
def _unescape(self, basename):
893
"""Escaped names are unescaped by urlutils."""
894
return urllib.unquote(basename)