1
# Copyright (C) 2006-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Versioned text file storage api."""
19
from __future__ import absolute_import
25
from zlib import adler32
27
from ..lazy_import import lazy_import
28
lazy_import(globals(), """
40
from breezy.bzr import (
46
from ..registry import Registry
47
from ..sixish import (
53
from ..textmerge import TextMerge
56
adapter_registry = Registry()
57
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'breezy.bzr.knit',
58
'DeltaPlainToFullText')
59
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'breezy.bzr.knit',
61
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
62
'breezy.bzr.knit', 'DeltaAnnotatedToUnannotated')
63
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
64
'breezy.bzr.knit', 'DeltaAnnotatedToFullText')
65
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
66
'breezy.bzr.knit', 'FTAnnotatedToUnannotated')
67
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
68
'breezy.bzr.knit', 'FTAnnotatedToFullText')
69
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
70
# 'breezy.bzr.knit', 'FTAnnotatedToChunked')
73
class ContentFactory(object):
74
"""Abstract interface for insertion and retrieval from a VersionedFile.
76
:ivar sha1: None, or the sha1 of the content fulltext.
77
:ivar storage_kind: The native storage kind of this factory. One of
78
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
79
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
80
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
81
:ivar key: The key of this content. Each key is a tuple with a single
83
:ivar parents: A tuple of parent keys for self.key. If the object has
84
no parent information, None (as opposed to () for an empty list of
89
"""Create a ContentFactory."""
91
self.storage_kind = None
96
class ChunkedContentFactory(ContentFactory):
97
"""Static data content factory.
99
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
100
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
101
satisfies this, as does a list of lines.
103
:ivar sha1: None, or the sha1 of the content fulltext.
104
:ivar storage_kind: The native storage kind of this factory. Always
106
:ivar key: The key of this content. Each key is a tuple with a single
108
:ivar parents: A tuple of parent keys for self.key. If the object has
109
no parent information, None (as opposed to () for an empty list of
113
def __init__(self, key, parents, sha1, chunks):
114
"""Create a ContentFactory."""
116
self.storage_kind = 'chunked'
118
self.parents = parents
119
self._chunks = chunks
121
def get_bytes_as(self, storage_kind):
122
if storage_kind == 'chunked':
124
elif storage_kind == 'fulltext':
125
return b''.join(self._chunks)
126
raise errors.UnavailableRepresentation(self.key, storage_kind,
130
class FulltextContentFactory(ContentFactory):
131
"""Static data content factory.
133
This takes a fulltext when created and just returns that during
134
get_bytes_as('fulltext').
136
:ivar sha1: None, or the sha1 of the content fulltext.
137
:ivar storage_kind: The native storage kind of this factory. Always
139
:ivar key: The key of this content. Each key is a tuple with a single
141
:ivar parents: A tuple of parent keys for self.key. If the object has
142
no parent information, None (as opposed to () for an empty list of
146
def __init__(self, key, parents, sha1, text):
147
"""Create a ContentFactory."""
149
self.storage_kind = 'fulltext'
151
self.parents = parents
152
if not isinstance(text, bytes):
153
raise TypeError(text)
156
def get_bytes_as(self, storage_kind):
157
if storage_kind == self.storage_kind:
159
elif storage_kind == 'chunked':
161
raise errors.UnavailableRepresentation(self.key, storage_kind,
165
class AbsentContentFactory(ContentFactory):
166
"""A placeholder content factory for unavailable texts.
169
:ivar storage_kind: 'absent'.
170
:ivar key: The key of this content. Each key is a tuple with a single
175
def __init__(self, key):
176
"""Create a ContentFactory."""
178
self.storage_kind = 'absent'
182
def get_bytes_as(self, storage_kind):
183
raise ValueError('A request was made for key: %s, but that'
184
' content is not available, and the calling'
185
' code does not handle if it is missing.'
189
class AdapterFactory(ContentFactory):
190
"""A content factory to adapt between key prefix's."""
192
def __init__(self, key, parents, adapted):
193
"""Create an adapter factory instance."""
195
self.parents = parents
196
self._adapted = adapted
198
def __getattr__(self, attr):
199
"""Return a member from the adapted object."""
200
if attr in ('key', 'parents'):
201
return self.__dict__[attr]
203
return getattr(self._adapted, attr)
206
def filter_absent(record_stream):
207
"""Adapt a record stream to remove absent records."""
208
for record in record_stream:
209
if record.storage_kind != 'absent':
213
class _MPDiffGenerator(object):
214
"""Pull out the functionality for generating mp_diffs."""
216
def __init__(self, vf, keys):
218
# This is the order the keys were requested in
219
self.ordered_keys = tuple(keys)
220
# keys + their parents, what we need to compute the diffs
221
self.needed_keys = ()
222
# Map from key: mp_diff
224
# Map from key: parents_needed (may have ghosts)
226
# Parents that aren't present
227
self.ghost_parents = ()
228
# Map from parent_key => number of children for this text
230
# Content chunks that are cached while we still need them
233
def _find_needed_keys(self):
234
"""Find the set of keys we need to request.
236
This includes all the original keys passed in, and the non-ghost
237
parents of those keys.
239
:return: (needed_keys, refcounts)
240
needed_keys is the set of all texts we need to extract
241
refcounts is a dict of {key: num_children} letting us know when we
242
no longer need to cache a given parent text
244
# All the keys and their parents
245
needed_keys = set(self.ordered_keys)
246
parent_map = self.vf.get_parent_map(needed_keys)
247
self.parent_map = parent_map
248
# TODO: Should we be using a different construct here? I think this
249
# uses difference_update internally, and we expect the result to
251
missing_keys = needed_keys.difference(parent_map)
253
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
254
# Parents that might be missing. They are allowed to be ghosts, but we
255
# should check for them
257
setdefault = refcounts.setdefault
259
for child_key, parent_keys in viewitems(parent_map):
261
# parent_keys may be None if a given VersionedFile claims to
262
# not support graph operations.
264
just_parents.update(parent_keys)
265
needed_keys.update(parent_keys)
266
for p in parent_keys:
267
refcounts[p] = setdefault(p, 0) + 1
268
just_parents.difference_update(parent_map)
269
# Remove any parents that are actually ghosts from the needed set
270
self.present_parents = set(self.vf.get_parent_map(just_parents))
271
self.ghost_parents = just_parents.difference(self.present_parents)
272
needed_keys.difference_update(self.ghost_parents)
273
self.needed_keys = needed_keys
274
self.refcounts = refcounts
275
return needed_keys, refcounts
277
def _compute_diff(self, key, parent_lines, lines):
278
"""Compute a single mp_diff, and store it in self._diffs"""
279
if len(parent_lines) > 0:
280
# XXX: _extract_blocks is not usefully defined anywhere...
281
# It was meant to extract the left-parent diff without
282
# having to recompute it for Knit content (pack-0.92,
283
# etc). That seems to have regressed somewhere
284
left_parent_blocks = self.vf._extract_blocks(key,
285
parent_lines[0], lines)
287
left_parent_blocks = None
288
diff = multiparent.MultiParent.from_lines(lines,
289
parent_lines, left_parent_blocks)
290
self.diffs[key] = diff
292
def _process_one_record(self, key, this_chunks):
294
if key in self.parent_map:
295
# This record should be ready to diff, since we requested
296
# content in 'topological' order
297
parent_keys = self.parent_map.pop(key)
298
# If a VersionedFile claims 'no-graph' support, then it may return
299
# None for any parent request, so we replace it with an empty tuple
300
if parent_keys is None:
303
for p in parent_keys:
304
# Alternatively we could check p not in self.needed_keys, but
305
# ghost_parents should be tiny versus huge
306
if p in self.ghost_parents:
308
refcount = self.refcounts[p]
309
if refcount == 1: # Last child reference
310
self.refcounts.pop(p)
311
parent_chunks = self.chunks.pop(p)
313
self.refcounts[p] = refcount - 1
314
parent_chunks = self.chunks[p]
315
p_lines = osutils.chunks_to_lines(parent_chunks)
316
# TODO: Should we cache the line form? We did the
317
# computation to get it, but storing it this way will
318
# be less memory efficient...
319
parent_lines.append(p_lines)
321
lines = osutils.chunks_to_lines(this_chunks)
322
# Since we needed the lines, we'll go ahead and cache them this way
324
self._compute_diff(key, parent_lines, lines)
326
# Is this content required for any more children?
327
if key in self.refcounts:
328
self.chunks[key] = this_chunks
330
def _extract_diffs(self):
331
needed_keys, refcounts = self._find_needed_keys()
332
for record in self.vf.get_record_stream(needed_keys,
333
'topological', True):
334
if record.storage_kind == 'absent':
335
raise errors.RevisionNotPresent(record.key, self.vf)
336
self._process_one_record(record.key,
337
record.get_bytes_as('chunked'))
339
def compute_diffs(self):
340
self._extract_diffs()
341
dpop = self.diffs.pop
342
return [dpop(k) for k in self.ordered_keys]
345
class VersionedFile(object):
346
"""Versioned text file storage.
348
A versioned file manages versions of line-based text files,
349
keeping track of the originating version for each line.
351
To clients the "lines" of the file are represented as a list of
352
strings. These strings will typically have terminal newline
353
characters, but this is not required. In particular files commonly
354
do not have a newline at the end of the file.
356
Texts are identified by a version-id string.
360
def check_not_reserved_id(version_id):
361
revision.check_not_reserved_id(version_id)
363
def copy_to(self, name, transport):
364
"""Copy this versioned file to name on transport."""
365
raise NotImplementedError(self.copy_to)
367
def get_record_stream(self, versions, ordering, include_delta_closure):
368
"""Get a stream of records for versions.
370
:param versions: The versions to include. Each version is a tuple
372
:param ordering: Either 'unordered' or 'topological'. A topologically
373
sorted stream has compression parents strictly before their
375
:param include_delta_closure: If True then the closure across any
376
compression parents will be included (in the data content of the
377
stream, not in the emitted records). This guarantees that
378
'fulltext' can be used successfully on every record.
379
:return: An iterator of ContentFactory objects, each of which is only
380
valid until the iterator is advanced.
382
raise NotImplementedError(self.get_record_stream)
384
def has_version(self, version_id):
385
"""Returns whether version is present."""
386
raise NotImplementedError(self.has_version)
388
def insert_record_stream(self, stream):
389
"""Insert a record stream into this versioned file.
391
:param stream: A stream of records to insert.
393
:seealso VersionedFile.get_record_stream:
395
raise NotImplementedError
397
def add_lines(self, version_id, parents, lines, parent_texts=None,
398
left_matching_blocks=None, nostore_sha=None, random_id=False,
400
"""Add a single text on top of the versioned file.
402
Must raise RevisionAlreadyPresent if the new version is
403
already present in file history.
405
Must raise RevisionNotPresent if any of the given parents are
406
not present in file history.
408
:param lines: A list of lines. Each line must be a bytestring. And all
409
of them except the last must be terminated with \n and contain no
410
other \n's. The last line may either contain no \n's or a single
411
terminated \n. If the lines list does meet this constraint the add
412
routine may error or may succeed - but you will be unable to read
413
the data back accurately. (Checking the lines have been split
414
correctly is expensive and extremely unlikely to catch bugs so it
415
is not done at runtime unless check_content is True.)
416
:param parent_texts: An optional dictionary containing the opaque
417
representations of some or all of the parents of version_id to
418
allow delta optimisations. VERY IMPORTANT: the texts must be those
419
returned by add_lines or data corruption can be caused.
420
:param left_matching_blocks: a hint about which areas are common
421
between the text and its left-hand-parent. The format is
422
the SequenceMatcher.get_matching_blocks format.
423
:param nostore_sha: Raise ExistingContent and do not add the lines to
424
the versioned file if the digest of the lines matches this.
425
:param random_id: If True a random id has been selected rather than
426
an id determined by some deterministic process such as a converter
427
from a foreign VCS. When True the backend may choose not to check
428
for uniqueness of the resulting key within the versioned file, so
429
this should only be done when the result is expected to be unique
431
:param check_content: If True, the lines supplied are verified to be
432
bytestrings that are correctly formed lines.
433
:return: The text sha1, the number of bytes in the text, and an opaque
434
representation of the inserted version which can be provided
435
back to future add_lines calls in the parent_texts dictionary.
437
self._check_write_ok()
438
return self._add_lines(version_id, parents, lines, parent_texts,
439
left_matching_blocks, nostore_sha, random_id, check_content)
441
def _add_lines(self, version_id, parents, lines, parent_texts,
442
left_matching_blocks, nostore_sha, random_id, check_content):
443
"""Helper to do the class specific add_lines."""
444
raise NotImplementedError(self.add_lines)
446
def add_lines_with_ghosts(self, version_id, parents, lines,
447
parent_texts=None, nostore_sha=None, random_id=False,
448
check_content=True, left_matching_blocks=None):
449
"""Add lines to the versioned file, allowing ghosts to be present.
451
This takes the same parameters as add_lines and returns the same.
453
self._check_write_ok()
454
return self._add_lines_with_ghosts(version_id, parents, lines,
455
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
457
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
458
nostore_sha, random_id, check_content, left_matching_blocks):
459
"""Helper to do class specific add_lines_with_ghosts."""
460
raise NotImplementedError(self.add_lines_with_ghosts)
462
def check(self, progress_bar=None):
463
"""Check the versioned file for integrity."""
464
raise NotImplementedError(self.check)
466
def _check_lines_not_unicode(self, lines):
467
"""Check that lines being added to a versioned file are not unicode."""
469
if not isinstance(line, bytes):
470
raise errors.BzrBadParameterUnicode("lines")
472
def _check_lines_are_lines(self, lines):
473
"""Check that the lines really are full lines without inline EOL."""
475
if b'\n' in line[:-1]:
476
raise errors.BzrBadParameterContainsNewline("lines")
478
def get_format_signature(self):
479
"""Get a text description of the data encoding in this file.
483
raise NotImplementedError(self.get_format_signature)
485
def make_mpdiffs(self, version_ids):
486
"""Create multiparent diffs for specified versions."""
487
# XXX: Can't use _MPDiffGenerator just yet. This is because version_ids
488
# is a list of strings, not keys. And while self.get_record_stream
489
# is supported, it takes *keys*, while self.get_parent_map() takes
491
knit_versions = set()
492
knit_versions.update(version_ids)
493
parent_map = self.get_parent_map(version_ids)
494
for version_id in version_ids:
496
knit_versions.update(parent_map[version_id])
498
raise errors.RevisionNotPresent(version_id, self)
499
# We need to filter out ghosts, because we can't diff against them.
500
knit_versions = set(self.get_parent_map(knit_versions))
501
lines = dict(zip(knit_versions,
502
self._get_lf_split_line_list(knit_versions)))
504
for version_id in version_ids:
505
target = lines[version_id]
507
parents = [lines[p] for p in parent_map[version_id] if p in
510
# I don't know how this could ever trigger.
511
# parent_map[version_id] was already triggered in the previous
512
# for loop, and lines[p] has the 'if p in knit_versions' check,
513
# so we again won't have a KeyError.
514
raise errors.RevisionNotPresent(version_id, self)
516
left_parent_blocks = self._extract_blocks(version_id,
519
left_parent_blocks = None
520
diffs.append(multiparent.MultiParent.from_lines(target, parents,
524
def _extract_blocks(self, version_id, source, target):
527
def add_mpdiffs(self, records):
528
"""Add mpdiffs to this VersionedFile.
530
Records should be iterables of version, parents, expected_sha1,
531
mpdiff. mpdiff should be a MultiParent instance.
533
# Does this need to call self._check_write_ok()? (IanC 20070919)
535
mpvf = multiparent.MultiMemoryVersionedFile()
537
for version, parent_ids, expected_sha1, mpdiff in records:
538
versions.append(version)
539
mpvf.add_diff(mpdiff, version, parent_ids)
540
needed_parents = set()
541
for version, parent_ids, expected_sha1, mpdiff in records:
542
needed_parents.update(p for p in parent_ids
543
if not mpvf.has_version(p))
544
present_parents = set(self.get_parent_map(needed_parents))
545
for parent_id, lines in zip(present_parents,
546
self._get_lf_split_line_list(present_parents)):
547
mpvf.add_version(lines, parent_id, [])
548
for (version, parent_ids, expected_sha1, mpdiff), lines in zip(
549
records, mpvf.get_line_list(versions)):
550
if len(parent_ids) == 1:
551
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
552
mpvf.get_diff(parent_ids[0]).num_lines()))
554
left_matching_blocks = None
556
_, _, version_text = self.add_lines_with_ghosts(version,
557
parent_ids, lines, vf_parents,
558
left_matching_blocks=left_matching_blocks)
559
except NotImplementedError:
560
# The vf can't handle ghosts, so add lines normally, which will
561
# (reasonably) fail if there are ghosts in the data.
562
_, _, version_text = self.add_lines(version,
563
parent_ids, lines, vf_parents,
564
left_matching_blocks=left_matching_blocks)
565
vf_parents[version] = version_text
566
sha1s = self.get_sha1s(versions)
567
for version, parent_ids, expected_sha1, mpdiff in records:
568
if expected_sha1 != sha1s[version]:
569
raise errors.VersionedFileInvalidChecksum(version)
571
def get_text(self, version_id):
572
"""Return version contents as a text string.
574
Raises RevisionNotPresent if version is not present in
577
return b''.join(self.get_lines(version_id))
578
get_string = get_text
580
def get_texts(self, version_ids):
581
"""Return the texts of listed versions as a list of strings.
583
Raises RevisionNotPresent if version is not present in
586
return [b''.join(self.get_lines(v)) for v in version_ids]
588
def get_lines(self, version_id):
589
"""Return version contents as a sequence of lines.
591
Raises RevisionNotPresent if version is not present in
594
raise NotImplementedError(self.get_lines)
596
def _get_lf_split_line_list(self, version_ids):
597
return [BytesIO(t).readlines() for t in self.get_texts(version_ids)]
599
def get_ancestry(self, version_ids, topo_sorted=True):
600
"""Return a list of all ancestors of given version(s). This
601
will not include the null revision.
603
This list will not be topologically sorted if topo_sorted=False is
606
Must raise RevisionNotPresent if any of the given versions are
607
not present in file history."""
608
raise NotImplementedError(self.get_ancestry)
610
def get_ancestry_with_ghosts(self, version_ids):
611
"""Return a list of all ancestors of given version(s). This
612
will not include the null revision.
614
Must raise RevisionNotPresent if any of the given versions are
615
not present in file history.
617
Ghosts that are known about will be included in ancestry list,
618
but are not explicitly marked.
620
raise NotImplementedError(self.get_ancestry_with_ghosts)
622
def get_parent_map(self, version_ids):
623
"""Get a map of the parents of version_ids.
625
:param version_ids: The version ids to look up parents for.
626
:return: A mapping from version id to parents.
628
raise NotImplementedError(self.get_parent_map)
630
def get_parents_with_ghosts(self, version_id):
631
"""Return version names for parents of version_id.
633
Will raise RevisionNotPresent if version_id is not present
636
Ghosts that are known about will be included in the parent list,
637
but are not explicitly marked.
640
return list(self.get_parent_map([version_id])[version_id])
642
raise errors.RevisionNotPresent(version_id, self)
644
def annotate(self, version_id):
645
"""Return a list of (version-id, line) tuples for version_id.
647
:raise RevisionNotPresent: If the given version is
648
not present in file history.
650
raise NotImplementedError(self.annotate)
652
def iter_lines_added_or_present_in_versions(self, version_ids=None,
654
"""Iterate over the lines in the versioned file from version_ids.
656
This may return lines from other versions. Each item the returned
657
iterator yields is a tuple of a line and a text version that that line
658
is present in (not introduced in).
660
Ordering of results is in whatever order is most suitable for the
661
underlying storage format.
663
If a progress bar is supplied, it may be used to indicate progress.
664
The caller is responsible for cleaning up progress bars (because this
667
NOTES: Lines are normalised: they will all have \n terminators.
668
Lines are returned in arbitrary order.
670
:return: An iterator over (line, version_id).
672
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
674
def plan_merge(self, ver_a, ver_b, base=None):
675
"""Return pseudo-annotation indicating how the two versions merge.
677
This is computed between versions a and b and their common
680
Weave lines present in none of them are skipped entirely.
683
killed-base Dead in base revision
684
killed-both Killed in each revision
687
unchanged Alive in both a and b (possibly created in both)
690
ghost-a Killed in a, unborn in b
691
ghost-b Killed in b, unborn in a
692
irrelevant Not in either revision
694
raise NotImplementedError(VersionedFile.plan_merge)
696
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
697
b_marker=TextMerge.B_MARKER):
698
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
701
class RecordingVersionedFilesDecorator(object):
702
"""A minimal versioned files that records calls made on it.
704
Only enough methods have been added to support tests using it to date.
706
:ivar calls: A list of the calls made; can be reset at any time by
710
def __init__(self, backing_vf):
711
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
713
:param backing_vf: The versioned file to answer all methods.
715
self._backing_vf = backing_vf
718
def add_lines(self, key, parents, lines, parent_texts=None,
719
left_matching_blocks=None, nostore_sha=None, random_id=False,
721
self.calls.append(("add_lines", key, parents, lines, parent_texts,
722
left_matching_blocks, nostore_sha, random_id, check_content))
723
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
724
left_matching_blocks, nostore_sha, random_id, check_content)
727
self._backing_vf.check()
729
def get_parent_map(self, keys):
730
self.calls.append(("get_parent_map", copy(keys)))
731
return self._backing_vf.get_parent_map(keys)
733
def get_record_stream(self, keys, sort_order, include_delta_closure):
734
self.calls.append(("get_record_stream", list(keys), sort_order,
735
include_delta_closure))
736
return self._backing_vf.get_record_stream(keys, sort_order,
737
include_delta_closure)
739
def get_sha1s(self, keys):
740
self.calls.append(("get_sha1s", copy(keys)))
741
return self._backing_vf.get_sha1s(keys)
743
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
744
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
745
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
748
self.calls.append(("keys",))
749
return self._backing_vf.keys()
752
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
753
"""A VF that records calls, and returns keys in specific order.
755
:ivar calls: A list of the calls made; can be reset at any time by
759
def __init__(self, backing_vf, key_priority):
760
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
762
:param backing_vf: The versioned file to answer all methods.
763
:param key_priority: A dictionary defining what order keys should be
764
returned from an 'unordered' get_record_stream request.
765
Keys with lower priority are returned first, keys not present in
766
the map get an implicit priority of 0, and are returned in
767
lexicographical order.
769
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
770
self._key_priority = key_priority
772
def get_record_stream(self, keys, sort_order, include_delta_closure):
773
self.calls.append(("get_record_stream", list(keys), sort_order,
774
include_delta_closure))
775
if sort_order == 'unordered':
777
return (self._key_priority.get(key, 0), key)
778
# Use a defined order by asking for the keys one-by-one from the
780
for key in sorted(keys, key=sort_key):
781
for record in self._backing_vf.get_record_stream([key],
782
'unordered', include_delta_closure):
785
for record in self._backing_vf.get_record_stream(keys, sort_order,
786
include_delta_closure):
790
class KeyMapper(object):
791
"""KeyMappers map between keys and underlying partitioned storage."""
794
"""Map key to an underlying storage identifier.
796
:param key: A key tuple e.g. (b'file-id', b'revision-id').
797
:return: An underlying storage identifier, specific to the partitioning
800
raise NotImplementedError(self.map)
802
def unmap(self, partition_id):
803
"""Map a partitioned storage id back to a key prefix.
805
:param partition_id: The underlying partition id.
806
:return: As much of a key (or prefix) as is derivable from the partition
809
raise NotImplementedError(self.unmap)
812
class ConstantMapper(KeyMapper):
813
"""A key mapper that maps to a constant result."""
815
def __init__(self, result):
816
"""Create a ConstantMapper which will return result for all maps."""
817
self._result = result
820
"""See KeyMapper.map()."""
824
class URLEscapeMapper(KeyMapper):
825
"""Base class for use with transport backed storage.
827
This provides a map and unmap wrapper that respectively url escape and
828
unescape their outputs and inputs.
832
"""See KeyMapper.map()."""
833
return urlutils.quote(self._map(key))
835
def unmap(self, partition_id):
836
"""See KeyMapper.unmap()."""
837
return self._unmap(urlutils.unquote(partition_id))
840
class PrefixMapper(URLEscapeMapper):
841
"""A key mapper that extracts the first component of a key.
843
This mapper is for use with a transport based backend.
847
"""See KeyMapper.map()."""
848
return key[0].decode('utf-8')
850
def _unmap(self, partition_id):
851
"""See KeyMapper.unmap()."""
852
return (partition_id.encode('utf-8'),)
855
class HashPrefixMapper(URLEscapeMapper):
856
"""A key mapper that combines the first component of a key with a hash.
858
This mapper is for use with a transport based backend.
862
"""See KeyMapper.map()."""
863
prefix = self._escape(key[0])
864
return "%02x/%s" % (adler32(prefix) & 0xff, prefix.decode('utf-8'))
866
def _escape(self, prefix):
867
"""No escaping needed here."""
870
def _unmap(self, partition_id):
871
"""See KeyMapper.unmap()."""
872
return (self._unescape(osutils.basename(partition_id)).encode('utf-8'),)
874
def _unescape(self, basename):
875
"""No unescaping needed for HashPrefixMapper."""
879
class HashEscapedPrefixMapper(HashPrefixMapper):
880
"""Combines the escaped first component of a key with a hash.
882
This mapper is for use with a transport based backend.
885
_safe = bytearray(b"abcdefghijklmnopqrstuvwxyz0123456789-_@,.")
887
def _escape(self, prefix):
888
"""Turn a key element into a filesystem safe string.
890
This is similar to a plain urlutils.quote, except
891
it uses specific safe characters, so that it doesn't
892
have to translate a lot of valid file ids.
894
# @ does not get escaped. This is because it is a valid
895
# filesystem character we use all the time, and it looks
896
# a lot better than seeing %40 all the time.
897
r = [(c in self._safe) and chr(c) or ('%%%02x' % c)
898
for c in bytearray(prefix)]
899
return ''.join(r).encode('ascii')
901
def _unescape(self, basename):
902
"""Escaped names are easily unescaped by urlutils."""
903
return urlutils.unquote(basename)
906
def make_versioned_files_factory(versioned_file_factory, mapper):
907
"""Create a ThunkedVersionedFiles factory.
909
This will create a callable which when called creates a
910
ThunkedVersionedFiles on a transport, using mapper to access individual
911
versioned files, and versioned_file_factory to create each individual file.
913
def factory(transport):
914
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
919
class VersionedFiles(object):
920
"""Storage for many versioned files.
922
This object allows a single keyspace for accessing the history graph and
923
contents of named bytestrings.
925
Currently no implementation allows the graph of different key prefixes to
926
intersect, but the API does allow such implementations in the future.
928
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
929
may have a different length key-size, but that size will be constant for
930
all texts added to or retrieved from it. For instance, breezy uses
931
instances with a key-size of 2 for storing user files in a repository, with
932
the first element the fileid, and the second the version of that file.
934
The use of tuples allows a single code base to support several different
935
uses with only the mapping logic changing from instance to instance.
937
:ivar _immediate_fallback_vfs: For subclasses that support stacking,
938
this is a list of other VersionedFiles immediately underneath this
939
one. They may in turn each have further fallbacks.
942
def add_lines(self, key, parents, lines, parent_texts=None,
943
left_matching_blocks=None, nostore_sha=None, random_id=False,
945
"""Add a text to the store.
947
:param key: The key tuple of the text to add. If the last element is
948
None, a CHK string will be generated during the addition.
949
:param parents: The parents key tuples of the text to add.
950
:param lines: A list of lines. Each line must be a bytestring. And all
951
of them except the last must be terminated with \n and contain no
952
other \n's. The last line may either contain no \n's or a single
953
terminating \n. If the lines list does meet this constraint the add
954
routine may error or may succeed - but you will be unable to read
955
the data back accurately. (Checking the lines have been split
956
correctly is expensive and extremely unlikely to catch bugs so it
957
is not done at runtime unless check_content is True.)
958
:param parent_texts: An optional dictionary containing the opaque
959
representations of some or all of the parents of version_id to
960
allow delta optimisations. VERY IMPORTANT: the texts must be those
961
returned by add_lines or data corruption can be caused.
962
:param left_matching_blocks: a hint about which areas are common
963
between the text and its left-hand-parent. The format is
964
the SequenceMatcher.get_matching_blocks format.
965
:param nostore_sha: Raise ExistingContent and do not add the lines to
966
the versioned file if the digest of the lines matches this.
967
:param random_id: If True a random id has been selected rather than
968
an id determined by some deterministic process such as a converter
969
from a foreign VCS. When True the backend may choose not to check
970
for uniqueness of the resulting key within the versioned file, so
971
this should only be done when the result is expected to be unique
973
:param check_content: If True, the lines supplied are verified to be
974
bytestrings that are correctly formed lines.
975
:return: The text sha1, the number of bytes in the text, and an opaque
976
representation of the inserted version which can be provided
977
back to future add_lines calls in the parent_texts dictionary.
979
raise NotImplementedError(self.add_lines)
981
def add_chunks(self, key, parents, chunk_iter, parent_texts=None,
982
left_matching_blocks=None, nostore_sha=None, random_id=False,
984
"""Add a text to the store from a chunk iterable.
986
:param key: The key tuple of the text to add. If the last element is
987
None, a CHK string will be generated during the addition.
988
:param parents: The parents key tuples of the text to add.
989
:param chunk_iter: An iterable over bytestrings.
990
:param parent_texts: An optional dictionary containing the opaque
991
representations of some or all of the parents of version_id to
992
allow delta optimisations. VERY IMPORTANT: the texts must be those
993
returned by add_lines or data corruption can be caused.
994
:param left_matching_blocks: a hint about which areas are common
995
between the text and its left-hand-parent. The format is
996
the SequenceMatcher.get_matching_blocks format.
997
:param nostore_sha: Raise ExistingContent and do not add the lines to
998
the versioned file if the digest of the lines matches this.
999
:param random_id: If True a random id has been selected rather than
1000
an id determined by some deterministic process such as a converter
1001
from a foreign VCS. When True the backend may choose not to check
1002
for uniqueness of the resulting key within the versioned file, so
1003
this should only be done when the result is expected to be unique
1005
:param check_content: If True, the lines supplied are verified to be
1006
bytestrings that are correctly formed lines.
1007
:return: The text sha1, the number of bytes in the text, and an opaque
1008
representation of the inserted version which can be provided
1009
back to future add_lines calls in the parent_texts dictionary.
1011
raise NotImplementedError(self.add_chunks)
1013
def add_mpdiffs(self, records):
1014
"""Add mpdiffs to this VersionedFile.
1016
Records should be iterables of version, parents, expected_sha1,
1017
mpdiff. mpdiff should be a MultiParent instance.
1020
mpvf = multiparent.MultiMemoryVersionedFile()
1022
for version, parent_ids, expected_sha1, mpdiff in records:
1023
versions.append(version)
1024
mpvf.add_diff(mpdiff, version, parent_ids)
1025
needed_parents = set()
1026
for version, parent_ids, expected_sha1, mpdiff in records:
1027
needed_parents.update(p for p in parent_ids
1028
if not mpvf.has_version(p))
1029
# It seems likely that adding all the present parents as fulltexts can
1030
# easily exhaust memory.
1031
chunks_to_lines = osutils.chunks_to_lines
1032
for record in self.get_record_stream(needed_parents, 'unordered',
1034
if record.storage_kind == 'absent':
1036
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
1038
for (key, parent_keys, expected_sha1, mpdiff), lines in zip(
1039
records, mpvf.get_line_list(versions)):
1040
if len(parent_keys) == 1:
1041
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
1042
mpvf.get_diff(parent_keys[0]).num_lines()))
1044
left_matching_blocks = None
1045
version_sha1, _, version_text = self.add_lines(key,
1046
parent_keys, lines, vf_parents,
1047
left_matching_blocks=left_matching_blocks)
1048
if version_sha1 != expected_sha1:
1049
raise errors.VersionedFileInvalidChecksum(version)
1050
vf_parents[key] = version_text
1052
def annotate(self, key):
1053
"""Return a list of (version-key, line) tuples for the text of key.
1055
:raise RevisionNotPresent: If the key is not present.
1057
raise NotImplementedError(self.annotate)
1059
def check(self, progress_bar=None):
1060
"""Check this object for integrity.
1062
:param progress_bar: A progress bar to output as the check progresses.
1063
:param keys: Specific keys within the VersionedFiles to check. When
1064
this parameter is not None, check() becomes a generator as per
1065
get_record_stream. The difference to get_record_stream is that
1066
more or deeper checks will be performed.
1067
:return: None, or if keys was supplied a generator as per
1070
raise NotImplementedError(self.check)
1073
def check_not_reserved_id(version_id):
1074
revision.check_not_reserved_id(version_id)
1076
def clear_cache(self):
1077
"""Clear whatever caches this VersionedFile holds.
1079
This is generally called after an operation has been performed, when we
1080
don't expect to be using this versioned file again soon.
1083
def _check_lines_not_unicode(self, lines):
1084
"""Check that lines being added to a versioned file are not unicode."""
1086
if line.__class__ is not bytes:
1087
raise errors.BzrBadParameterUnicode("lines")
1089
def _check_lines_are_lines(self, lines):
1090
"""Check that the lines really are full lines without inline EOL."""
1092
if b'\n' in line[:-1]:
1093
raise errors.BzrBadParameterContainsNewline("lines")
1095
def get_known_graph_ancestry(self, keys):
1096
"""Get a KnownGraph instance with the ancestry of keys."""
1097
# most basic implementation is a loop around get_parent_map
1101
this_parent_map = self.get_parent_map(pending)
1102
parent_map.update(this_parent_map)
1103
pending = set(itertools.chain.from_iterable(
1104
viewvalues(this_parent_map)))
1105
pending.difference_update(parent_map)
1106
kg = _mod_graph.KnownGraph(parent_map)
1109
def get_parent_map(self, keys):
1110
"""Get a map of the parents of keys.
1112
:param keys: The keys to look up parents for.
1113
:return: A mapping from keys to parents. Absent keys are absent from
1116
raise NotImplementedError(self.get_parent_map)
1118
def get_record_stream(self, keys, ordering, include_delta_closure):
1119
"""Get a stream of records for keys.
1121
:param keys: The keys to include.
1122
:param ordering: Either 'unordered' or 'topological'. A topologically
1123
sorted stream has compression parents strictly before their
1125
:param include_delta_closure: If True then the closure across any
1126
compression parents will be included (in the opaque data).
1127
:return: An iterator of ContentFactory objects, each of which is only
1128
valid until the iterator is advanced.
1130
raise NotImplementedError(self.get_record_stream)
1132
def get_sha1s(self, keys):
1133
"""Get the sha1's of the texts for the given keys.
1135
:param keys: The names of the keys to lookup
1136
:return: a dict from key to sha1 digest. Keys of texts which are not
1137
present in the store are not present in the returned
1140
raise NotImplementedError(self.get_sha1s)
1142
__contains__ = index._has_key_from_parent_map
1144
def get_missing_compression_parent_keys(self):
1145
"""Return an iterable of keys of missing compression parents.
1147
Check this after calling insert_record_stream to find out if there are
1148
any missing compression parents. If there are, the records that
1149
depend on them are not able to be inserted safely. The precise
1150
behaviour depends on the concrete VersionedFiles class in use.
1152
Classes that do not support this will raise NotImplementedError.
1154
raise NotImplementedError(self.get_missing_compression_parent_keys)
1156
def insert_record_stream(self, stream):
1157
"""Insert a record stream into this container.
1159
:param stream: A stream of records to insert.
1161
:seealso VersionedFile.get_record_stream:
1163
raise NotImplementedError
1165
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1166
"""Iterate over the lines in the versioned files from keys.
1168
This may return lines from other keys. Each item the returned
1169
iterator yields is a tuple of a line and a text version that that line
1170
is present in (not introduced in).
1172
Ordering of results is in whatever order is most suitable for the
1173
underlying storage format.
1175
If a progress bar is supplied, it may be used to indicate progress.
1176
The caller is responsible for cleaning up progress bars (because this
1180
* Lines are normalised by the underlying store: they will all have \n
1182
* Lines are returned in arbitrary order.
1184
:return: An iterator over (line, key).
1186
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
1189
"""Return a iterable of the keys for all the contained texts."""
1190
raise NotImplementedError(self.keys)
1192
def make_mpdiffs(self, keys):
1193
"""Create multiparent diffs for specified keys."""
1194
generator = _MPDiffGenerator(self, keys)
1195
return generator.compute_diffs()
1197
def get_annotator(self):
1198
return annotate.Annotator(self)
1200
missing_keys = index._missing_keys_from_parent_map
1202
def _extract_blocks(self, version_id, source, target):
1205
def _transitive_fallbacks(self):
1206
"""Return the whole stack of fallback versionedfiles.
1208
This VersionedFiles may have a list of fallbacks, but it doesn't
1209
necessarily know about the whole stack going down, and it can't know
1210
at open time because they may change after the objects are opened.
1213
for a_vfs in self._immediate_fallback_vfs:
1214
all_fallbacks.append(a_vfs)
1215
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1216
return all_fallbacks
1219
class ThunkedVersionedFiles(VersionedFiles):
1220
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1222
This object allows a single keyspace for accessing the history graph and
1223
contents of named bytestrings.
1225
Currently no implementation allows the graph of different key prefixes to
1226
intersect, but the API does allow such implementations in the future.
1229
def __init__(self, transport, file_factory, mapper, is_locked):
1230
"""Create a ThunkedVersionedFiles."""
1231
self._transport = transport
1232
self._file_factory = file_factory
1233
self._mapper = mapper
1234
self._is_locked = is_locked
1236
def add_chunks(self, key, parents, chunk_iter, parent_texts=None,
1237
left_matching_blocks=None, nostore_sha=None,
1239
# For now, just fallback to add_lines.
1240
lines = osutils.chunks_to_lines(list(chunk_iter))
1241
return self.add_lines(
1242
key, parents, lines, parent_texts,
1243
left_matching_blocks, nostore_sha, random_id,
1246
def add_lines(self, key, parents, lines, parent_texts=None,
1247
left_matching_blocks=None, nostore_sha=None, random_id=False,
1248
check_content=True):
1249
"""See VersionedFiles.add_lines()."""
1250
path = self._mapper.map(key)
1251
version_id = key[-1]
1252
parents = [parent[-1] for parent in parents]
1253
vf = self._get_vf(path)
1256
return vf.add_lines_with_ghosts(version_id, parents, lines,
1257
parent_texts=parent_texts,
1258
left_matching_blocks=left_matching_blocks,
1259
nostore_sha=nostore_sha, random_id=random_id,
1260
check_content=check_content)
1261
except NotImplementedError:
1262
return vf.add_lines(version_id, parents, lines,
1263
parent_texts=parent_texts,
1264
left_matching_blocks=left_matching_blocks,
1265
nostore_sha=nostore_sha, random_id=random_id,
1266
check_content=check_content)
1267
except errors.NoSuchFile:
1268
# parent directory may be missing, try again.
1269
self._transport.mkdir(osutils.dirname(path))
1271
return vf.add_lines_with_ghosts(version_id, parents, lines,
1272
parent_texts=parent_texts,
1273
left_matching_blocks=left_matching_blocks,
1274
nostore_sha=nostore_sha, random_id=random_id,
1275
check_content=check_content)
1276
except NotImplementedError:
1277
return vf.add_lines(version_id, parents, lines,
1278
parent_texts=parent_texts,
1279
left_matching_blocks=left_matching_blocks,
1280
nostore_sha=nostore_sha, random_id=random_id,
1281
check_content=check_content)
1283
def annotate(self, key):
1284
"""Return a list of (version-key, line) tuples for the text of key.
1286
:raise RevisionNotPresent: If the key is not present.
1289
path = self._mapper.map(prefix)
1290
vf = self._get_vf(path)
1291
origins = vf.annotate(key[-1])
1293
for origin, line in origins:
1294
result.append((prefix + (origin,), line))
1297
def check(self, progress_bar=None, keys=None):
1298
"""See VersionedFiles.check()."""
1299
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1300
# this is tolerable. Ideally we'd pass keys down to check() and
1301
# have the older VersiondFile interface updated too.
1302
for prefix, vf in self._iter_all_components():
1304
if keys is not None:
1305
return self.get_record_stream(keys, 'unordered', True)
1307
def get_parent_map(self, keys):
1308
"""Get a map of the parents of keys.
1310
:param keys: The keys to look up parents for.
1311
:return: A mapping from keys to parents. Absent keys are absent from
1314
prefixes = self._partition_keys(keys)
1316
for prefix, suffixes in viewitems(prefixes):
1317
path = self._mapper.map(prefix)
1318
vf = self._get_vf(path)
1319
parent_map = vf.get_parent_map(suffixes)
1320
for key, parents in viewitems(parent_map):
1321
result[prefix + (key,)] = tuple(
1322
prefix + (parent,) for parent in parents)
1325
def _get_vf(self, path):
1326
if not self._is_locked():
1327
raise errors.ObjectNotLocked(self)
1328
return self._file_factory(path, self._transport, create=True,
1329
get_scope=lambda: None)
1331
def _partition_keys(self, keys):
1332
"""Turn keys into a dict of prefix:suffix_list."""
1335
prefix_keys = result.setdefault(key[:-1], [])
1336
prefix_keys.append(key[-1])
1339
def _iter_all_prefixes(self):
1340
# Identify all key prefixes.
1341
# XXX: A bit hacky, needs polish.
1342
if isinstance(self._mapper, ConstantMapper):
1343
paths = [self._mapper.map(())]
1347
for quoted_relpath in self._transport.iter_files_recursive():
1348
path, ext = os.path.splitext(quoted_relpath)
1350
paths = list(relpaths)
1351
prefixes = [self._mapper.unmap(path) for path in paths]
1352
return zip(paths, prefixes)
1354
def get_record_stream(self, keys, ordering, include_delta_closure):
1355
"""See VersionedFiles.get_record_stream()."""
1356
# Ordering will be taken care of by each partitioned store; group keys
1359
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1360
suffixes = [(suffix,) for suffix in suffixes]
1361
for record in vf.get_record_stream(suffixes, ordering,
1362
include_delta_closure):
1363
if record.parents is not None:
1364
record.parents = tuple(
1365
prefix + parent for parent in record.parents)
1366
record.key = prefix + record.key
1369
def _iter_keys_vf(self, keys):
1370
prefixes = self._partition_keys(keys)
1372
for prefix, suffixes in viewitems(prefixes):
1373
path = self._mapper.map(prefix)
1374
vf = self._get_vf(path)
1375
yield prefix, suffixes, vf
1377
def get_sha1s(self, keys):
1378
"""See VersionedFiles.get_sha1s()."""
1380
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1381
vf_sha1s = vf.get_sha1s(suffixes)
1382
for suffix, sha1 in viewitems(vf_sha1s):
1383
sha1s[prefix + (suffix,)] = sha1
1386
def insert_record_stream(self, stream):
1387
"""Insert a record stream into this container.
1389
:param stream: A stream of records to insert.
1391
:seealso VersionedFile.get_record_stream:
1393
for record in stream:
1394
prefix = record.key[:-1]
1395
key = record.key[-1:]
1396
if record.parents is not None:
1397
parents = [parent[-1:] for parent in record.parents]
1400
thunk_record = AdapterFactory(key, parents, record)
1401
path = self._mapper.map(prefix)
1402
# Note that this parses the file many times; we can do better but
1403
# as this only impacts weaves in terms of performance, it is
1405
vf = self._get_vf(path)
1406
vf.insert_record_stream([thunk_record])
1408
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1409
"""Iterate over the lines in the versioned files from keys.
1411
This may return lines from other keys. Each item the returned
1412
iterator yields is a tuple of a line and a text version that that line
1413
is present in (not introduced in).
1415
Ordering of results is in whatever order is most suitable for the
1416
underlying storage format.
1418
If a progress bar is supplied, it may be used to indicate progress.
1419
The caller is responsible for cleaning up progress bars (because this
1423
* Lines are normalised by the underlying store: they will all have \n
1425
* Lines are returned in arbitrary order.
1427
:return: An iterator over (line, key).
1429
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1430
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1431
yield line, prefix + (version,)
1433
def _iter_all_components(self):
1434
for path, prefix in self._iter_all_prefixes():
1435
yield prefix, self._get_vf(path)
1438
"""See VersionedFiles.keys()."""
1440
for prefix, vf in self._iter_all_components():
1441
for suffix in vf.versions():
1442
result.add(prefix + (suffix,))
1446
class VersionedFilesWithFallbacks(VersionedFiles):
1448
def without_fallbacks(self):
1449
"""Return a clone of this object without any fallbacks configured."""
1450
raise NotImplementedError(self.without_fallbacks)
1452
def add_fallback_versioned_files(self, a_versioned_files):
1453
"""Add a source of texts for texts not present in this knit.
1455
:param a_versioned_files: A VersionedFiles object.
1457
raise NotImplementedError(self.add_fallback_versioned_files)
1459
def get_known_graph_ancestry(self, keys):
1460
"""Get a KnownGraph instance with the ancestry of keys."""
1461
parent_map, missing_keys = self._index.find_ancestry(keys)
1462
for fallback in self._transitive_fallbacks():
1463
if not missing_keys:
1465
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1467
parent_map.update(f_parent_map)
1468
missing_keys = f_missing_keys
1469
kg = _mod_graph.KnownGraph(parent_map)
1473
class _PlanMergeVersionedFile(VersionedFiles):
1474
"""A VersionedFile for uncommitted and committed texts.
1476
It is intended to allow merges to be planned with working tree texts.
1477
It implements only the small part of the VersionedFiles interface used by
1478
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1479
_PlanMergeVersionedFile itself.
1481
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1482
queried for missing texts.
1485
def __init__(self, file_id):
1486
"""Create a _PlanMergeVersionedFile.
1488
:param file_id: Used with _PlanMerge code which is not yet fully
1489
tuple-keyspace aware.
1491
self._file_id = file_id
1492
# fallback locations
1493
self.fallback_versionedfiles = []
1494
# Parents for locally held keys.
1496
# line data for locally held keys.
1498
# key lookup providers
1499
self._providers = [_mod_graph.DictParentsProvider(self._parents)]
1501
def plan_merge(self, ver_a, ver_b, base=None):
1502
"""See VersionedFile.plan_merge"""
1503
from ..merge import _PlanMerge
1505
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1506
old_plan = list(_PlanMerge(ver_a, base, self,
1507
(self._file_id,)).plan_merge())
1508
new_plan = list(_PlanMerge(ver_a, ver_b, self,
1509
(self._file_id,)).plan_merge())
1510
return _PlanMerge._subtract_plans(old_plan, new_plan)
1512
def plan_lca_merge(self, ver_a, ver_b, base=None):
1513
from ..merge import _PlanLCAMerge
1514
graph = _mod_graph.Graph(self)
1515
new_plan = _PlanLCAMerge(
1516
ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1519
old_plan = _PlanLCAMerge(
1520
ver_a, base, self, (self._file_id,), graph).plan_merge()
1521
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1523
def add_lines(self, key, parents, lines):
1524
"""See VersionedFiles.add_lines
1526
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1527
are permitted. Only reserved ids are permitted.
1529
if not isinstance(key, tuple):
1530
raise TypeError(key)
1531
if not revision.is_reserved_id(key[-1]):
1532
raise ValueError('Only reserved ids may be used')
1534
raise ValueError('Parents may not be None')
1536
raise ValueError('Lines may not be None')
1537
self._parents[key] = tuple(parents)
1538
self._lines[key] = lines
1540
def get_record_stream(self, keys, ordering, include_delta_closure):
1543
if key in self._lines:
1544
lines = self._lines[key]
1545
parents = self._parents[key]
1547
yield ChunkedContentFactory(key, parents, None, lines)
1548
for versionedfile in self.fallback_versionedfiles:
1549
for record in versionedfile.get_record_stream(
1550
pending, 'unordered', True):
1551
if record.storage_kind == 'absent':
1554
pending.remove(record.key)
1558
# report absent entries
1560
yield AbsentContentFactory(key)
1562
def get_parent_map(self, keys):
1563
"""See VersionedFiles.get_parent_map"""
1564
# We create a new provider because a fallback may have been added.
1565
# If we make fallbacks private we can update a stack list and avoid
1566
# object creation thrashing.
1569
if revision.NULL_REVISION in keys:
1570
keys.remove(revision.NULL_REVISION)
1571
result[revision.NULL_REVISION] = ()
1572
self._providers = self._providers[:1] + self.fallback_versionedfiles
1574
_mod_graph.StackedParentsProvider(
1575
self._providers).get_parent_map(keys))
1576
for key, parents in viewitems(result):
1578
result[key] = (revision.NULL_REVISION,)
1582
class PlanWeaveMerge(TextMerge):
1583
"""Weave merge that takes a plan as its input.
1585
This exists so that VersionedFile.plan_merge is implementable.
1586
Most callers will want to use WeaveMerge instead.
1589
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1590
b_marker=TextMerge.B_MARKER):
1591
TextMerge.__init__(self, a_marker, b_marker)
1592
self.plan = list(plan)
1594
def _merge_struct(self):
1599
def outstanding_struct():
1600
if not lines_a and not lines_b:
1602
elif ch_a and not ch_b:
1605
elif ch_b and not ch_a:
1607
elif lines_a == lines_b:
1610
yield (lines_a, lines_b)
1612
# We previously considered either 'unchanged' or 'killed-both' lines
1613
# to be possible places to resynchronize. However, assuming agreement
1614
# on killed-both lines may be too aggressive. -- mbp 20060324
1615
for state, line in self.plan:
1616
if state == 'unchanged':
1617
# resync and flush queued conflicts changes if any
1618
for struct in outstanding_struct():
1624
if state == 'unchanged':
1627
elif state == 'killed-a':
1629
lines_b.append(line)
1630
elif state == 'killed-b':
1632
lines_a.append(line)
1633
elif state == 'new-a':
1635
lines_a.append(line)
1636
elif state == 'new-b':
1638
lines_b.append(line)
1639
elif state == 'conflicted-a':
1641
lines_a.append(line)
1642
elif state == 'conflicted-b':
1644
lines_b.append(line)
1645
elif state == 'killed-both':
1646
# This counts as a change, even though there is no associated
1650
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1652
raise AssertionError(state)
1653
for struct in outstanding_struct():
1656
def base_from_plan(self):
1657
"""Construct a BASE file from the plan text."""
1659
for state, line in self.plan:
1660
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1661
# If unchanged, then this line is straight from base. If a or b
1662
# or both killed the line, then it *used* to be in base.
1663
base_lines.append(line)
1665
if state not in ('killed-base', 'irrelevant',
1666
'ghost-a', 'ghost-b',
1668
'conflicted-a', 'conflicted-b'):
1669
# killed-base, irrelevant means it doesn't apply
1670
# ghost-a/ghost-b are harder to say for sure, but they
1671
# aren't in the 'inc_c' which means they aren't in the
1672
# shared base of a & b. So we don't include them. And
1673
# obviously if the line is newly inserted, it isn't in base
1675
# If 'conflicted-a' or b, then it is new vs one base, but
1676
# old versus another base. However, if we make it present
1677
# in the base, it will be deleted from the target, and it
1678
# seems better to get a line doubled in the merge result,
1679
# rather than have it deleted entirely.
1680
# Example, each node is the 'text' at that point:
1688
# There was a criss-cross conflict merge. Both sides
1689
# include the other, but put themselves first.
1690
# Weave marks this as a 'clean' merge, picking OTHER over
1691
# THIS. (Though the details depend on order inserted into
1693
# LCA generates a plan:
1694
# [('unchanged', M),
1695
# ('conflicted-b', b),
1697
# ('conflicted-a', b),
1699
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1700
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1701
# removes one 'b', and BASE vs OTHER removes the other)
1702
# If you include neither, 3-way creates a clean "MbabN" as
1703
# THIS adds one 'b', and OTHER does too.
1704
# It seems that having the line 2 times is better than
1705
# having it omitted. (Easier to manually delete than notice
1706
# it needs to be added.)
1707
raise AssertionError('Unknown state: %s' % (state,))
1711
class WeaveMerge(PlanWeaveMerge):
1712
"""Weave merge that takes a VersionedFile and two versions as its input."""
1714
def __init__(self, versionedfile, ver_a, ver_b,
1715
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1716
plan = versionedfile.plan_merge(ver_a, ver_b)
1717
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1720
class VirtualVersionedFiles(VersionedFiles):
1721
"""Dummy implementation for VersionedFiles that uses other functions for
1722
obtaining fulltexts and parent maps.
1724
This is always on the bottom of the stack and uses string keys
1725
(rather than tuples) internally.
1728
def __init__(self, get_parent_map, get_lines):
1729
"""Create a VirtualVersionedFiles.
1731
:param get_parent_map: Same signature as Repository.get_parent_map.
1732
:param get_lines: Should return lines for specified key or None if
1735
super(VirtualVersionedFiles, self).__init__()
1736
self._get_parent_map = get_parent_map
1737
self._get_lines = get_lines
1739
def check(self, progressbar=None):
1740
"""See VersionedFiles.check.
1742
:note: Always returns True for VirtualVersionedFiles.
1746
def add_mpdiffs(self, records):
1747
"""See VersionedFiles.mpdiffs.
1749
:note: Not implemented for VirtualVersionedFiles.
1751
raise NotImplementedError(self.add_mpdiffs)
1753
def get_parent_map(self, keys):
1754
"""See VersionedFiles.get_parent_map."""
1755
parent_view = viewitems(self._get_parent_map(k for (k,) in keys))
1756
return dict(((k,), tuple((p,) for p in v)) for k, v in parent_view)
1758
def get_sha1s(self, keys):
1759
"""See VersionedFiles.get_sha1s."""
1762
lines = self._get_lines(k)
1763
if lines is not None:
1764
if not isinstance(lines, list):
1765
raise AssertionError
1766
ret[(k,)] = osutils.sha_strings(lines)
1769
def get_record_stream(self, keys, ordering, include_delta_closure):
1770
"""See VersionedFiles.get_record_stream."""
1771
for (k,) in list(keys):
1772
lines = self._get_lines(k)
1773
if lines is not None:
1774
if not isinstance(lines, list):
1775
raise AssertionError
1776
yield ChunkedContentFactory((k,), None,
1777
sha1=osutils.sha_strings(lines),
1780
yield AbsentContentFactory((k,))
1782
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1783
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1784
for i, (key,) in enumerate(keys):
1786
pb.update("Finding changed lines", i, len(keys))
1787
for l in self._get_lines(key):
1791
class NoDupeAddLinesDecorator(object):
1792
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1796
def __init__(self, store):
1799
def add_lines(self, key, parents, lines, parent_texts=None,
1800
left_matching_blocks=None, nostore_sha=None, random_id=False,
1801
check_content=True):
1802
"""See VersionedFiles.add_lines.
1804
This implementation may return None as the third element of the return
1805
value when the original store wouldn't.
1808
raise NotImplementedError(
1809
"NoDupeAddLinesDecorator.add_lines does not implement the "
1810
"nostore_sha behaviour.")
1812
sha1 = osutils.sha_strings(lines)
1813
key = (b"sha1:" + sha1,)
1816
if key in self._store.get_parent_map([key]):
1817
# This key has already been inserted, so don't do it again.
1819
sha1 = osutils.sha_strings(lines)
1820
return sha1, sum(map(len, lines)), None
1821
return self._store.add_lines(key, parents, lines,
1822
parent_texts=parent_texts,
1823
left_matching_blocks=left_matching_blocks,
1824
nostore_sha=nostore_sha, random_id=random_id,
1825
check_content=check_content)
1827
def __getattr__(self, name):
1828
return getattr(self._store, name)
1831
def network_bytes_to_kind_and_offset(network_bytes):
1832
"""Strip of a record kind from the front of network_bytes.
1834
:param network_bytes: The bytes of a record.
1835
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1837
line_end = network_bytes.find(b'\n')
1838
storage_kind = network_bytes[:line_end].decode('ascii')
1839
return storage_kind, line_end + 1
1842
class NetworkRecordStream(object):
1843
"""A record_stream which reconstitures a serialised stream."""
1845
def __init__(self, bytes_iterator):
1846
"""Create a NetworkRecordStream.
1848
:param bytes_iterator: An iterator of bytes. Each item in this
1849
iterator should have been obtained from a record_streams'
1850
record.get_bytes_as(record.storage_kind) call.
1852
self._bytes_iterator = bytes_iterator
1853
self._kind_factory = {
1854
'fulltext': fulltext_network_to_record,
1855
'groupcompress-block': groupcompress.network_block_to_records,
1856
'knit-ft-gz': knit.knit_network_to_record,
1857
'knit-delta-gz': knit.knit_network_to_record,
1858
'knit-annotated-ft-gz': knit.knit_network_to_record,
1859
'knit-annotated-delta-gz': knit.knit_network_to_record,
1860
'knit-delta-closure': knit.knit_delta_closure_to_records,
1866
:return: An iterator as per VersionedFiles.get_record_stream().
1868
for bytes in self._bytes_iterator:
1869
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1870
for record in self._kind_factory[storage_kind](
1871
storage_kind, bytes, line_end):
1875
def fulltext_network_to_record(kind, bytes, line_end):
1876
"""Convert a network fulltext record to record."""
1877
meta_len, = struct.unpack('!L', bytes[line_end:line_end + 4])
1878
record_meta = bytes[line_end + 4:line_end + 4 + meta_len]
1879
key, parents = bencode.bdecode_as_tuple(record_meta)
1880
if parents == b'nil':
1882
fulltext = bytes[line_end + 4 + meta_len:]
1883
return [FulltextContentFactory(key, parents, None, fulltext)]
1886
def _length_prefix(bytes):
1887
return struct.pack('!L', len(bytes))
1890
def record_to_fulltext_bytes(record):
1891
if record.parents is None:
1894
parents = record.parents
1895
record_meta = bencode.bencode((record.key, parents))
1896
record_content = record.get_bytes_as('fulltext')
1897
return b"fulltext\n%s%s%s" % (
1898
_length_prefix(record_meta), record_meta, record_content)
1901
def sort_groupcompress(parent_map):
1902
"""Sort and group the keys in parent_map into groupcompress order.
1904
groupcompress is defined (currently) as reverse-topological order, grouped
1907
:return: A sorted-list of keys
1909
# gc-optimal ordering is approximately reverse topological,
1910
# properly grouped by file-id.
1912
for item in viewitems(parent_map):
1914
if isinstance(key, bytes) or len(key) == 1:
1919
per_prefix_map[prefix].append(item)
1921
per_prefix_map[prefix] = [item]
1924
for prefix in sorted(per_prefix_map):
1925
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1929
class _KeyRefs(object):
1931
def __init__(self, track_new_keys=False):
1932
# dict mapping 'key' to 'set of keys referring to that key'
1935
# set remembering all new keys
1936
self.new_keys = set()
1938
self.new_keys = None
1944
self.new_keys.clear()
1946
def add_references(self, key, refs):
1947
# Record the new references
1948
for referenced in refs:
1950
needed_by = self.refs[referenced]
1952
needed_by = self.refs[referenced] = set()
1954
# Discard references satisfied by the new key
1957
def get_new_keys(self):
1958
return self.new_keys
1960
def get_unsatisfied_refs(self):
1961
return self.refs.keys()
1963
def _satisfy_refs_for_key(self, key):
1967
# No keys depended on this key. That's ok.
1970
def add_key(self, key):
1971
# satisfy refs for key, and remember that we've seen this key.
1972
self._satisfy_refs_for_key(key)
1973
if self.new_keys is not None:
1974
self.new_keys.add(key)
1976
def satisfy_refs_for_keys(self, keys):
1978
self._satisfy_refs_for_key(key)
1980
def get_referrers(self):
1981
return set(itertools.chain.from_iterable(viewvalues(self.refs)))