1
# Copyright (C) 2005, 2006 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
"""Versioned text file storage api."""
22
from cStringIO import StringIO
24
from zlib import adler32
26
from bzrlib.lazy_import import lazy_import
27
lazy_import(globals(), """
37
from bzrlib.graph import Graph
38
from bzrlib.transport.memory import MemoryTransport
40
from bzrlib.inter import InterObject
41
from bzrlib.registry import Registry
42
from bzrlib.symbol_versioning import *
43
from bzrlib.textmerge import TextMerge
46
adapter_registry = Registry()
47
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
48
'DeltaPlainToFullText')
49
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
51
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
52
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
53
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
54
'bzrlib.knit', 'DeltaAnnotatedToFullText')
55
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
56
'bzrlib.knit', 'FTAnnotatedToUnannotated')
57
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
58
'bzrlib.knit', 'FTAnnotatedToFullText')
61
class ContentFactory(object):
62
"""Abstract interface for insertion and retrieval from a VersionedFile.
64
:ivar sha1: None, or the sha1 of the content fulltext.
65
:ivar storage_kind: The native storage kind of this factory. One of
66
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
67
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
68
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
69
:ivar key: The key of this content. Each key is a tuple with a single
71
:ivar parents: A tuple of parent keys for self.key. If the object has
72
no parent information, None (as opposed to () for an empty list of
77
"""Create a ContentFactory."""
79
self.storage_kind = None
84
class AbsentContentFactory(object):
85
"""A placeholder content factory for unavailable texts.
88
:ivar storage_kind: 'absent'.
89
:ivar key: The key of this content. Each key is a tuple with a single
94
def __init__(self, key):
95
"""Create a ContentFactory."""
97
self.storage_kind = 'absent'
102
def filter_absent(record_stream):
103
"""Adapt a record stream to remove absent records."""
104
for record in record_stream:
105
if record.storage_kind != 'absent':
109
class VersionedFile(object):
110
"""Versioned text file storage.
112
A versioned file manages versions of line-based text files,
113
keeping track of the originating version for each line.
115
To clients the "lines" of the file are represented as a list of
116
strings. These strings will typically have terminal newline
117
characters, but this is not required. In particular files commonly
118
do not have a newline at the end of the file.
120
Texts are identified by a version-id string.
124
def check_not_reserved_id(version_id):
125
revision.check_not_reserved_id(version_id)
127
def copy_to(self, name, transport):
128
"""Copy this versioned file to name on transport."""
129
raise NotImplementedError(self.copy_to)
131
def get_record_stream(self, versions, ordering, include_delta_closure):
132
"""Get a stream of records for versions.
134
:param versions: The versions to include. Each version is a tuple
136
:param ordering: Either 'unordered' or 'topological'. A topologically
137
sorted stream has compression parents strictly before their
139
:param include_delta_closure: If True then the closure across any
140
compression parents will be included (in the opaque data).
141
:return: An iterator of ContentFactory objects, each of which is only
142
valid until the iterator is advanced.
144
raise NotImplementedError(self.get_record_stream)
146
def has_version(self, version_id):
147
"""Returns whether version is present."""
148
raise NotImplementedError(self.has_version)
150
def insert_record_stream(self, stream):
151
"""Insert a record stream into this versioned file.
153
:param stream: A stream of records to insert.
155
:seealso VersionedFile.get_record_stream:
157
raise NotImplementedError
159
def add_lines(self, version_id, parents, lines, parent_texts=None,
160
left_matching_blocks=None, nostore_sha=None, random_id=False,
162
"""Add a single text on top of the versioned file.
164
Must raise RevisionAlreadyPresent if the new version is
165
already present in file history.
167
Must raise RevisionNotPresent if any of the given parents are
168
not present in file history.
170
:param lines: A list of lines. Each line must be a bytestring. And all
171
of them except the last must be terminated with \n and contain no
172
other \n's. The last line may either contain no \n's or a single
173
terminated \n. If the lines list does meet this constraint the add
174
routine may error or may succeed - but you will be unable to read
175
the data back accurately. (Checking the lines have been split
176
correctly is expensive and extremely unlikely to catch bugs so it
177
is not done at runtime unless check_content is True.)
178
:param parent_texts: An optional dictionary containing the opaque
179
representations of some or all of the parents of version_id to
180
allow delta optimisations. VERY IMPORTANT: the texts must be those
181
returned by add_lines or data corruption can be caused.
182
:param left_matching_blocks: a hint about which areas are common
183
between the text and its left-hand-parent. The format is
184
the SequenceMatcher.get_matching_blocks format.
185
:param nostore_sha: Raise ExistingContent and do not add the lines to
186
the versioned file if the digest of the lines matches this.
187
:param random_id: If True a random id has been selected rather than
188
an id determined by some deterministic process such as a converter
189
from a foreign VCS. When True the backend may choose not to check
190
for uniqueness of the resulting key within the versioned file, so
191
this should only be done when the result is expected to be unique
193
:param check_content: If True, the lines supplied are verified to be
194
bytestrings that are correctly formed lines.
195
:return: The text sha1, the number of bytes in the text, and an opaque
196
representation of the inserted version which can be provided
197
back to future add_lines calls in the parent_texts dictionary.
199
self._check_write_ok()
200
return self._add_lines(version_id, parents, lines, parent_texts,
201
left_matching_blocks, nostore_sha, random_id, check_content)
203
def _add_lines(self, version_id, parents, lines, parent_texts,
204
left_matching_blocks, nostore_sha, random_id, check_content):
205
"""Helper to do the class specific add_lines."""
206
raise NotImplementedError(self.add_lines)
208
def add_lines_with_ghosts(self, version_id, parents, lines,
209
parent_texts=None, nostore_sha=None, random_id=False,
210
check_content=True, left_matching_blocks=None):
211
"""Add lines to the versioned file, allowing ghosts to be present.
213
This takes the same parameters as add_lines and returns the same.
215
self._check_write_ok()
216
return self._add_lines_with_ghosts(version_id, parents, lines,
217
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
219
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
220
nostore_sha, random_id, check_content, left_matching_blocks):
221
"""Helper to do class specific add_lines_with_ghosts."""
222
raise NotImplementedError(self.add_lines_with_ghosts)
224
def check(self, progress_bar=None):
225
"""Check the versioned file for integrity."""
226
raise NotImplementedError(self.check)
228
def _check_lines_not_unicode(self, lines):
229
"""Check that lines being added to a versioned file are not unicode."""
231
if line.__class__ is not str:
232
raise errors.BzrBadParameterUnicode("lines")
234
def _check_lines_are_lines(self, lines):
235
"""Check that the lines really are full lines without inline EOL."""
237
if '\n' in line[:-1]:
238
raise errors.BzrBadParameterContainsNewline("lines")
240
def get_format_signature(self):
241
"""Get a text description of the data encoding in this file.
245
raise NotImplementedError(self.get_format_signature)
247
def make_mpdiffs(self, version_ids):
248
"""Create multiparent diffs for specified versions."""
249
knit_versions = set()
250
knit_versions.update(version_ids)
251
parent_map = self.get_parent_map(version_ids)
252
for version_id in version_ids:
254
knit_versions.update(parent_map[version_id])
256
raise RevisionNotPresent(version_id, self)
257
# We need to filter out ghosts, because we can't diff against them.
258
knit_versions = set(self.get_parent_map(knit_versions).keys())
259
lines = dict(zip(knit_versions,
260
self._get_lf_split_line_list(knit_versions)))
262
for version_id in version_ids:
263
target = lines[version_id]
265
parents = [lines[p] for p in parent_map[version_id] if p in
268
raise RevisionNotPresent(version_id, self)
270
left_parent_blocks = self._extract_blocks(version_id,
273
left_parent_blocks = None
274
diffs.append(multiparent.MultiParent.from_lines(target, parents,
278
def _extract_blocks(self, version_id, source, target):
281
def add_mpdiffs(self, records):
282
"""Add mpdiffs to this VersionedFile.
284
Records should be iterables of version, parents, expected_sha1,
285
mpdiff. mpdiff should be a MultiParent instance.
287
# Does this need to call self._check_write_ok()? (IanC 20070919)
289
mpvf = multiparent.MultiMemoryVersionedFile()
291
for version, parent_ids, expected_sha1, mpdiff in records:
292
versions.append(version)
293
mpvf.add_diff(mpdiff, version, parent_ids)
294
needed_parents = set()
295
for version, parent_ids, expected_sha1, mpdiff in records:
296
needed_parents.update(p for p in parent_ids
297
if not mpvf.has_version(p))
298
present_parents = set(self.get_parent_map(needed_parents).keys())
299
for parent_id, lines in zip(present_parents,
300
self._get_lf_split_line_list(present_parents)):
301
mpvf.add_version(lines, parent_id, [])
302
for (version, parent_ids, expected_sha1, mpdiff), lines in\
303
zip(records, mpvf.get_line_list(versions)):
304
if len(parent_ids) == 1:
305
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
306
mpvf.get_diff(parent_ids[0]).num_lines()))
308
left_matching_blocks = None
310
_, _, version_text = self.add_lines_with_ghosts(version,
311
parent_ids, lines, vf_parents,
312
left_matching_blocks=left_matching_blocks)
313
except NotImplementedError:
314
# The vf can't handle ghosts, so add lines normally, which will
315
# (reasonably) fail if there are ghosts in the data.
316
_, _, version_text = self.add_lines(version,
317
parent_ids, lines, vf_parents,
318
left_matching_blocks=left_matching_blocks)
319
vf_parents[version] = version_text
320
for (version, parent_ids, expected_sha1, mpdiff), sha1 in\
321
zip(records, self.get_sha1s(versions)):
322
if expected_sha1 != sha1:
323
raise errors.VersionedFileInvalidChecksum(version)
325
def get_sha1s(self, version_ids):
326
"""Get the stored sha1 sums for the given revisions.
328
:param version_ids: The names of the versions to lookup
329
:return: a list of sha1s in order according to the version_ids
331
raise NotImplementedError(self.get_sha1s)
333
def get_text(self, version_id):
334
"""Return version contents as a text string.
336
Raises RevisionNotPresent if version is not present in
339
return ''.join(self.get_lines(version_id))
340
get_string = get_text
342
def get_texts(self, version_ids):
343
"""Return the texts of listed versions as a list of strings.
345
Raises RevisionNotPresent if version is not present in
348
return [''.join(self.get_lines(v)) for v in version_ids]
350
def get_lines(self, version_id):
351
"""Return version contents as a sequence of lines.
353
Raises RevisionNotPresent if version is not present in
356
raise NotImplementedError(self.get_lines)
358
def _get_lf_split_line_list(self, version_ids):
359
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
361
def get_ancestry(self, version_ids, topo_sorted=True):
362
"""Return a list of all ancestors of given version(s). This
363
will not include the null revision.
365
This list will not be topologically sorted if topo_sorted=False is
368
Must raise RevisionNotPresent if any of the given versions are
369
not present in file history."""
370
if isinstance(version_ids, basestring):
371
version_ids = [version_ids]
372
raise NotImplementedError(self.get_ancestry)
374
def get_ancestry_with_ghosts(self, version_ids):
375
"""Return a list of all ancestors of given version(s). This
376
will not include the null revision.
378
Must raise RevisionNotPresent if any of the given versions are
379
not present in file history.
381
Ghosts that are known about will be included in ancestry list,
382
but are not explicitly marked.
384
raise NotImplementedError(self.get_ancestry_with_ghosts)
386
def get_parent_map(self, version_ids):
387
"""Get a map of the parents of version_ids.
389
:param version_ids: The version ids to look up parents for.
390
:return: A mapping from version id to parents.
392
raise NotImplementedError(self.get_parent_map)
394
def get_parents_with_ghosts(self, version_id):
395
"""Return version names for parents of version_id.
397
Will raise RevisionNotPresent if version_id is not present
400
Ghosts that are known about will be included in the parent list,
401
but are not explicitly marked.
404
return list(self.get_parent_map([version_id])[version_id])
406
raise errors.RevisionNotPresent(version_id, self)
408
def annotate(self, version_id):
409
"""Return a list of (version-id, line) tuples for version_id.
411
:raise RevisionNotPresent: If the given version is
412
not present in file history.
414
raise NotImplementedError(self.annotate)
416
@deprecated_method(one_five)
417
def join(self, other, pb=None, msg=None, version_ids=None,
418
ignore_missing=False):
419
"""Integrate versions from other into this versioned file.
421
If version_ids is None all versions from other should be
422
incorporated into this versioned file.
424
Must raise RevisionNotPresent if any of the specified versions
425
are not present in the other file's history unless ignore_missing
426
is supplied in which case they are silently skipped.
428
self._check_write_ok()
429
return InterVersionedFile.get(other, self).join(
435
def iter_lines_added_or_present_in_versions(self, version_ids=None,
437
"""Iterate over the lines in the versioned file from version_ids.
439
This may return lines from other versions. Each item the returned
440
iterator yields is a tuple of a line and a text version that that line
441
is present in (not introduced in).
443
Ordering of results is in whatever order is most suitable for the
444
underlying storage format.
446
If a progress bar is supplied, it may be used to indicate progress.
447
The caller is responsible for cleaning up progress bars (because this
450
NOTES: Lines are normalised: they will all have \n terminators.
451
Lines are returned in arbitrary order.
453
:return: An iterator over (line, version_id).
455
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
457
def plan_merge(self, ver_a, ver_b):
458
"""Return pseudo-annotation indicating how the two versions merge.
460
This is computed between versions a and b and their common
463
Weave lines present in none of them are skipped entirely.
466
killed-base Dead in base revision
467
killed-both Killed in each revision
470
unchanged Alive in both a and b (possibly created in both)
473
ghost-a Killed in a, unborn in b
474
ghost-b Killed in b, unborn in a
475
irrelevant Not in either revision
477
raise NotImplementedError(VersionedFile.plan_merge)
479
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
480
b_marker=TextMerge.B_MARKER):
481
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
484
class RecordingVersionedFileDecorator(object):
485
"""A minimal versioned file that records calls made on it.
487
Only enough methods have been added to support tests using it to date.
489
:ivar calls: A list of the calls made; can be reset at any time by
493
def __init__(self, backing_vf):
494
"""Create a RecordingVersionedFileDecorator decorating backing_vf.
496
:param backing_vf: The versioned file to answer all methods.
498
self._backing_vf = backing_vf
501
def get_lines(self, version_ids):
502
self.calls.append(("get_lines", version_ids))
503
return self._backing_vf.get_lines(version_ids)
506
class _PlanMergeVersionedFile(object):
507
"""A VersionedFile for uncommitted and committed texts.
509
It is intended to allow merges to be planned with working tree texts.
510
It implements only the small part of the VersionedFile interface used by
511
PlanMerge. It falls back to multiple versionedfiles for data not stored in
512
_PlanMergeVersionedFile itself.
515
def __init__(self, file_id, fallback_versionedfiles=None):
518
:param file_id: Used when raising exceptions.
519
:param fallback_versionedfiles: If supplied, the set of fallbacks to
520
use. Otherwise, _PlanMergeVersionedFile.fallback_versionedfiles
521
can be appended to later.
523
self._file_id = file_id
524
if fallback_versionedfiles is None:
525
self.fallback_versionedfiles = []
527
self.fallback_versionedfiles = fallback_versionedfiles
531
def plan_merge(self, ver_a, ver_b, base=None):
532
"""See VersionedFile.plan_merge"""
533
from bzrlib.merge import _PlanMerge
535
return _PlanMerge(ver_a, ver_b, self).plan_merge()
536
old_plan = list(_PlanMerge(ver_a, base, self).plan_merge())
537
new_plan = list(_PlanMerge(ver_a, ver_b, self).plan_merge())
538
return _PlanMerge._subtract_plans(old_plan, new_plan)
540
def plan_lca_merge(self, ver_a, ver_b, base=None):
541
from bzrlib.merge import _PlanLCAMerge
542
graph = self._get_graph()
543
new_plan = _PlanLCAMerge(ver_a, ver_b, self, graph).plan_merge()
546
old_plan = _PlanLCAMerge(ver_a, base, self, graph).plan_merge()
547
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
549
def add_lines(self, version_id, parents, lines):
550
"""See VersionedFile.add_lines
552
Lines are added locally, not fallback versionedfiles. Also, ghosts are
553
permitted. Only reserved ids are permitted.
555
if not revision.is_reserved_id(version_id):
556
raise ValueError('Only reserved ids may be used')
558
raise ValueError('Parents may not be None')
560
raise ValueError('Lines may not be None')
561
self._parents[version_id] = tuple(parents)
562
self._lines[version_id] = lines
564
def get_lines(self, version_id):
565
"""See VersionedFile.get_ancestry"""
566
lines = self._lines.get(version_id)
567
if lines is not None:
569
for versionedfile in self.fallback_versionedfiles:
571
return versionedfile.get_lines(version_id)
572
except errors.RevisionNotPresent:
575
raise errors.RevisionNotPresent(version_id, self._file_id)
577
def get_ancestry(self, version_id, topo_sorted=False):
578
"""See VersionedFile.get_ancestry.
580
Note that this implementation assumes that if a VersionedFile can
581
answer get_ancestry at all, it can give an authoritative answer. In
582
fact, ghosts can invalidate this assumption. But it's good enough
583
99% of the time, and far cheaper/simpler.
585
Also note that the results of this version are never topologically
586
sorted, and are a set.
589
raise ValueError('This implementation does not provide sorting')
590
parents = self._parents.get(version_id)
592
for vf in self.fallback_versionedfiles:
594
return vf.get_ancestry(version_id, topo_sorted=False)
595
except errors.RevisionNotPresent:
598
raise errors.RevisionNotPresent(version_id, self._file_id)
599
ancestry = set([version_id])
600
for parent in parents:
601
ancestry.update(self.get_ancestry(parent, topo_sorted=False))
604
def get_parent_map(self, version_ids):
605
"""See VersionedFile.get_parent_map"""
607
pending = set(version_ids)
608
for key in version_ids:
610
result[key] = self._parents[key]
613
pending = pending - set(result.keys())
614
for versionedfile in self.fallback_versionedfiles:
615
parents = versionedfile.get_parent_map(pending)
616
result.update(parents)
617
pending = pending - set(parents.keys())
622
def _get_graph(self):
623
from bzrlib.graph import (
626
_StackedParentsProvider,
628
from bzrlib.repofmt.knitrepo import _KnitParentsProvider
629
parent_providers = [DictParentsProvider(self._parents)]
630
for vf in self.fallback_versionedfiles:
631
parent_providers.append(_KnitParentsProvider(vf))
632
return Graph(_StackedParentsProvider(parent_providers))
635
class PlanWeaveMerge(TextMerge):
636
"""Weave merge that takes a plan as its input.
638
This exists so that VersionedFile.plan_merge is implementable.
639
Most callers will want to use WeaveMerge instead.
642
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
643
b_marker=TextMerge.B_MARKER):
644
TextMerge.__init__(self, a_marker, b_marker)
647
def _merge_struct(self):
652
def outstanding_struct():
653
if not lines_a and not lines_b:
655
elif ch_a and not ch_b:
658
elif ch_b and not ch_a:
660
elif lines_a == lines_b:
663
yield (lines_a, lines_b)
665
# We previously considered either 'unchanged' or 'killed-both' lines
666
# to be possible places to resynchronize. However, assuming agreement
667
# on killed-both lines may be too aggressive. -- mbp 20060324
668
for state, line in self.plan:
669
if state == 'unchanged':
670
# resync and flush queued conflicts changes if any
671
for struct in outstanding_struct():
677
if state == 'unchanged':
680
elif state == 'killed-a':
683
elif state == 'killed-b':
686
elif state == 'new-a':
689
elif state == 'new-b':
692
elif state == 'conflicted-a':
695
elif state == 'conflicted-b':
699
assert state in ('irrelevant', 'ghost-a', 'ghost-b',
700
'killed-base', 'killed-both'), state
701
for struct in outstanding_struct():
705
class WeaveMerge(PlanWeaveMerge):
706
"""Weave merge that takes a VersionedFile and two versions as its input."""
708
def __init__(self, versionedfile, ver_a, ver_b,
709
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
710
plan = versionedfile.plan_merge(ver_a, ver_b)
711
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
714
class InterVersionedFile(InterObject):
715
"""This class represents operations taking place between two VersionedFiles.
717
Its instances have methods like join, and contain
718
references to the source and target versionedfiles these operations can be
721
Often we will provide convenience methods on 'versionedfile' which carry out
722
operations with another versionedfile - they will always forward to
723
InterVersionedFile.get(other).method_name(parameters).
727
"""The available optimised InterVersionedFile types."""
729
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
730
"""Integrate versions from self.source into self.target.
732
If version_ids is None all versions from source should be
733
incorporated into this versioned file.
735
Must raise RevisionNotPresent if any of the specified versions
736
are not present in the other file's history unless ignore_missing is
737
supplied in which case they are silently skipped.
740
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
741
graph = Graph(self.source)
742
search = graph._make_breadth_first_searcher(version_ids)
743
transitive_ids = set()
744
map(transitive_ids.update, list(search))
745
parent_map = self.source.get_parent_map(transitive_ids)
746
order = tsort.topo_sort(parent_map.items())
747
pb = ui.ui_factory.nested_progress_bar()
750
# TODO for incremental cross-format work:
751
# make a versioned file with the following content:
752
# all revisions we have been asked to join
753
# all their ancestors that are *not* in target already.
754
# the immediate parents of the above two sets, with
755
# empty parent lists - these versions are in target already
756
# and the incorrect version data will be ignored.
757
# TODO: for all ancestors that are present in target already,
758
# check them for consistent data, this requires moving sha1 from
760
# TODO: remove parent texts when they are not relevant any more for
761
# memory pressure reduction. RBC 20060313
762
# pb.update('Converting versioned data', 0, len(order))
764
for index, version in enumerate(order):
765
pb.update('Converting versioned data', index, total)
766
if version in target:
768
_, _, parent_text = target.add_lines(version,
770
self.source.get_lines(version),
771
parent_texts=parent_texts)
772
parent_texts[version] = parent_text
777
def _get_source_version_ids(self, version_ids, ignore_missing):
778
"""Determine the version ids to be used from self.source.
780
:param version_ids: The caller-supplied version ids to check. (None
781
for all). If None is in version_ids, it is stripped.
782
:param ignore_missing: if True, remove missing ids from the version
783
list. If False, raise RevisionNotPresent on
784
a missing version id.
785
:return: A set of version ids.
787
if version_ids is None:
788
# None cannot be in source.versions
789
return set(self.source.versions())
792
return set(self.source.versions()).intersection(set(version_ids))
794
new_version_ids = set()
795
for version in version_ids:
798
if not self.source.has_version(version):
799
raise errors.RevisionNotPresent(version, str(self.source))
801
new_version_ids.add(version)
802
return new_version_ids
805
class KeyMapper(object):
806
"""KeyMappers map between keys and underlying paritioned storage."""
809
"""Map key to an underlying storage identifier.
811
:param key: A key tuple e.g. ('file-id', 'revision-id').
812
:return: An underlying storage identifier, specific to the partitioning
816
def unmap(self, partition_id):
817
"""Map a partitioned storage id back to a key prefix.
819
:param partition_id: The underlying partition id.
820
:return: As much of a key (or prefix) as is derivable from the parition
825
class ConstantMapper(KeyMapper):
826
"""A key mapper that maps to a constant result."""
828
def __init__(self, result):
829
"""Create a ConstantMapper which will return result for all maps."""
830
self._result = result
833
"""See KeyMapper.map()."""
837
class PrefixMapper(KeyMapper):
838
"""A key mapper that extracts the first component of a key."""
841
"""See KeyMapper.map()."""
844
def unmap(self, partition_id):
845
"""See KeyMapper.unmap()."""
846
return (partition_id,)
849
class HashPrefixMapper(KeyMapper):
850
"""A key mapper that combines the first component of a key with a hash."""
853
"""See KeyMapper.map()."""
854
prefix = self._escape(key[0])
855
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
857
def _escape(self, prefix):
858
"""No escaping needed here."""
861
def unmap(self, partition_id):
862
"""See KeyMapper.unmap()."""
863
return (self._unescape(osutils.basename(partition_id)),)
865
def _unescape(self, basename):
866
"""No unescaping needed for HashPrefixMapper."""
870
class HashEscapedPrefixMapper(HashPrefixMapper):
871
"""Combines the escaped first component of a key with a hash."""
873
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
875
def _escape(self, prefix):
876
"""Turn a key element into a filesystem safe string.
878
This is similar to a plain urllib.quote, except
879
it uses specific safe characters, so that it doesn't
880
have to translate a lot of valid file ids.
882
# @ does not get escaped. This is because it is a valid
883
# filesystem character we use all the time, and it looks
884
# a lot better than seeing %40 all the time.
885
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
889
def _unescape(self, basename):
890
"""Escaped names are unescaped by urlutils."""
891
return urllib.unquote(basename)