1
# Copyright (C) 2006-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Versioned text file storage api."""
19
from __future__ import absolute_import
25
from zlib import adler32
27
from ..lazy_import import lazy_import
28
lazy_import(globals(), """
40
from breezy.bzr import (
46
from ..registry import Registry
47
from ..sixish import (
53
from ..textmerge import TextMerge
56
adapter_registry = Registry()
57
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'breezy.bzr.knit',
58
'DeltaPlainToFullText')
59
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'breezy.bzr.knit',
61
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
62
'breezy.bzr.knit', 'DeltaAnnotatedToUnannotated')
63
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
64
'breezy.bzr.knit', 'DeltaAnnotatedToFullText')
65
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
66
'breezy.bzr.knit', 'FTAnnotatedToUnannotated')
67
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
68
'breezy.bzr.knit', 'FTAnnotatedToFullText')
69
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
70
# 'breezy.bzr.knit', 'FTAnnotatedToChunked')
73
class ContentFactory(object):
74
"""Abstract interface for insertion and retrieval from a VersionedFile.
76
:ivar sha1: None, or the sha1 of the content fulltext.
77
:ivar storage_kind: The native storage kind of this factory. One of
78
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
79
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
80
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
81
:ivar key: The key of this content. Each key is a tuple with a single
83
:ivar parents: A tuple of parent keys for self.key. If the object has
84
no parent information, None (as opposed to () for an empty list of
89
"""Create a ContentFactory."""
91
self.storage_kind = None
96
class ChunkedContentFactory(ContentFactory):
97
"""Static data content factory.
99
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
100
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
101
satisfies this, as does a list of lines.
103
:ivar sha1: None, or the sha1 of the content fulltext.
104
:ivar storage_kind: The native storage kind of this factory. Always
106
:ivar key: The key of this content. Each key is a tuple with a single
108
:ivar parents: A tuple of parent keys for self.key. If the object has
109
no parent information, None (as opposed to () for an empty list of
113
def __init__(self, key, parents, sha1, chunks):
114
"""Create a ContentFactory."""
116
self.storage_kind = 'chunked'
118
self.parents = parents
119
self._chunks = chunks
121
def get_bytes_as(self, storage_kind):
122
if storage_kind == 'chunked':
124
elif storage_kind == 'fulltext':
125
return b''.join(self._chunks)
126
raise errors.UnavailableRepresentation(self.key, storage_kind,
130
class FulltextContentFactory(ContentFactory):
131
"""Static data content factory.
133
This takes a fulltext when created and just returns that during
134
get_bytes_as('fulltext').
136
:ivar sha1: None, or the sha1 of the content fulltext.
137
:ivar storage_kind: The native storage kind of this factory. Always
139
:ivar key: The key of this content. Each key is a tuple with a single
141
:ivar parents: A tuple of parent keys for self.key. If the object has
142
no parent information, None (as opposed to () for an empty list of
146
def __init__(self, key, parents, sha1, text):
147
"""Create a ContentFactory."""
149
self.storage_kind = 'fulltext'
151
self.parents = parents
152
if not isinstance(text, bytes):
153
raise TypeError(text)
156
def get_bytes_as(self, storage_kind):
157
if storage_kind == self.storage_kind:
159
elif storage_kind == 'chunked':
161
raise errors.UnavailableRepresentation(self.key, storage_kind,
165
class AbsentContentFactory(ContentFactory):
166
"""A placeholder content factory for unavailable texts.
169
:ivar storage_kind: 'absent'.
170
:ivar key: The key of this content. Each key is a tuple with a single
175
def __init__(self, key):
176
"""Create a ContentFactory."""
178
self.storage_kind = 'absent'
182
def get_bytes_as(self, storage_kind):
183
raise ValueError('A request was made for key: %s, but that'
184
' content is not available, and the calling'
185
' code does not handle if it is missing.'
189
class AdapterFactory(ContentFactory):
190
"""A content factory to adapt between key prefix's."""
192
def __init__(self, key, parents, adapted):
193
"""Create an adapter factory instance."""
195
self.parents = parents
196
self._adapted = adapted
198
def __getattr__(self, attr):
199
"""Return a member from the adapted object."""
200
if attr in ('key', 'parents'):
201
return self.__dict__[attr]
203
return getattr(self._adapted, attr)
206
def filter_absent(record_stream):
207
"""Adapt a record stream to remove absent records."""
208
for record in record_stream:
209
if record.storage_kind != 'absent':
213
class _MPDiffGenerator(object):
214
"""Pull out the functionality for generating mp_diffs."""
216
def __init__(self, vf, keys):
218
# This is the order the keys were requested in
219
self.ordered_keys = tuple(keys)
220
# keys + their parents, what we need to compute the diffs
221
self.needed_keys = ()
222
# Map from key: mp_diff
224
# Map from key: parents_needed (may have ghosts)
226
# Parents that aren't present
227
self.ghost_parents = ()
228
# Map from parent_key => number of children for this text
230
# Content chunks that are cached while we still need them
233
def _find_needed_keys(self):
234
"""Find the set of keys we need to request.
236
This includes all the original keys passed in, and the non-ghost
237
parents of those keys.
239
:return: (needed_keys, refcounts)
240
needed_keys is the set of all texts we need to extract
241
refcounts is a dict of {key: num_children} letting us know when we
242
no longer need to cache a given parent text
244
# All the keys and their parents
245
needed_keys = set(self.ordered_keys)
246
parent_map = self.vf.get_parent_map(needed_keys)
247
self.parent_map = parent_map
248
# TODO: Should we be using a different construct here? I think this
249
# uses difference_update internally, and we expect the result to
251
missing_keys = needed_keys.difference(parent_map)
253
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
254
# Parents that might be missing. They are allowed to be ghosts, but we
255
# should check for them
257
setdefault = refcounts.setdefault
259
for child_key, parent_keys in viewitems(parent_map):
261
# parent_keys may be None if a given VersionedFile claims to
262
# not support graph operations.
264
just_parents.update(parent_keys)
265
needed_keys.update(parent_keys)
266
for p in parent_keys:
267
refcounts[p] = setdefault(p, 0) + 1
268
just_parents.difference_update(parent_map)
269
# Remove any parents that are actually ghosts from the needed set
270
self.present_parents = set(self.vf.get_parent_map(just_parents))
271
self.ghost_parents = just_parents.difference(self.present_parents)
272
needed_keys.difference_update(self.ghost_parents)
273
self.needed_keys = needed_keys
274
self.refcounts = refcounts
275
return needed_keys, refcounts
277
def _compute_diff(self, key, parent_lines, lines):
278
"""Compute a single mp_diff, and store it in self._diffs"""
279
if len(parent_lines) > 0:
280
# XXX: _extract_blocks is not usefully defined anywhere...
281
# It was meant to extract the left-parent diff without
282
# having to recompute it for Knit content (pack-0.92,
283
# etc). That seems to have regressed somewhere
284
left_parent_blocks = self.vf._extract_blocks(key,
285
parent_lines[0], lines)
287
left_parent_blocks = None
288
diff = multiparent.MultiParent.from_lines(lines,
289
parent_lines, left_parent_blocks)
290
self.diffs[key] = diff
292
def _process_one_record(self, key, this_chunks):
294
if key in self.parent_map:
295
# This record should be ready to diff, since we requested
296
# content in 'topological' order
297
parent_keys = self.parent_map.pop(key)
298
# If a VersionedFile claims 'no-graph' support, then it may return
299
# None for any parent request, so we replace it with an empty tuple
300
if parent_keys is None:
303
for p in parent_keys:
304
# Alternatively we could check p not in self.needed_keys, but
305
# ghost_parents should be tiny versus huge
306
if p in self.ghost_parents:
308
refcount = self.refcounts[p]
309
if refcount == 1: # Last child reference
310
self.refcounts.pop(p)
311
parent_chunks = self.chunks.pop(p)
313
self.refcounts[p] = refcount - 1
314
parent_chunks = self.chunks[p]
315
p_lines = osutils.chunks_to_lines(parent_chunks)
316
# TODO: Should we cache the line form? We did the
317
# computation to get it, but storing it this way will
318
# be less memory efficient...
319
parent_lines.append(p_lines)
321
lines = osutils.chunks_to_lines(this_chunks)
322
# Since we needed the lines, we'll go ahead and cache them this way
324
self._compute_diff(key, parent_lines, lines)
326
# Is this content required for any more children?
327
if key in self.refcounts:
328
self.chunks[key] = this_chunks
330
def _extract_diffs(self):
331
needed_keys, refcounts = self._find_needed_keys()
332
for record in self.vf.get_record_stream(needed_keys,
333
'topological', True):
334
if record.storage_kind == 'absent':
335
raise errors.RevisionNotPresent(record.key, self.vf)
336
self._process_one_record(record.key,
337
record.get_bytes_as('chunked'))
339
def compute_diffs(self):
340
self._extract_diffs()
341
dpop = self.diffs.pop
342
return [dpop(k) for k in self.ordered_keys]
345
class VersionedFile(object):
346
"""Versioned text file storage.
348
A versioned file manages versions of line-based text files,
349
keeping track of the originating version for each line.
351
To clients the "lines" of the file are represented as a list of
352
strings. These strings will typically have terminal newline
353
characters, but this is not required. In particular files commonly
354
do not have a newline at the end of the file.
356
Texts are identified by a version-id string.
360
def check_not_reserved_id(version_id):
361
revision.check_not_reserved_id(version_id)
363
def copy_to(self, name, transport):
364
"""Copy this versioned file to name on transport."""
365
raise NotImplementedError(self.copy_to)
367
def get_record_stream(self, versions, ordering, include_delta_closure):
368
"""Get a stream of records for versions.
370
:param versions: The versions to include. Each version is a tuple
372
:param ordering: Either 'unordered' or 'topological'. A topologically
373
sorted stream has compression parents strictly before their
375
:param include_delta_closure: If True then the closure across any
376
compression parents will be included (in the data content of the
377
stream, not in the emitted records). This guarantees that
378
'fulltext' can be used successfully on every record.
379
:return: An iterator of ContentFactory objects, each of which is only
380
valid until the iterator is advanced.
382
raise NotImplementedError(self.get_record_stream)
384
def has_version(self, version_id):
385
"""Returns whether version is present."""
386
raise NotImplementedError(self.has_version)
388
def insert_record_stream(self, stream):
389
"""Insert a record stream into this versioned file.
391
:param stream: A stream of records to insert.
393
:seealso VersionedFile.get_record_stream:
395
raise NotImplementedError
397
def add_lines(self, version_id, parents, lines, parent_texts=None,
398
left_matching_blocks=None, nostore_sha=None, random_id=False,
400
"""Add a single text on top of the versioned file.
402
Must raise RevisionAlreadyPresent if the new version is
403
already present in file history.
405
Must raise RevisionNotPresent if any of the given parents are
406
not present in file history.
408
:param lines: A list of lines. Each line must be a bytestring. And all
409
of them except the last must be terminated with \n and contain no
410
other \n's. The last line may either contain no \n's or a single
411
terminated \n. If the lines list does meet this constraint the add
412
routine may error or may succeed - but you will be unable to read
413
the data back accurately. (Checking the lines have been split
414
correctly is expensive and extremely unlikely to catch bugs so it
415
is not done at runtime unless check_content is True.)
416
:param parent_texts: An optional dictionary containing the opaque
417
representations of some or all of the parents of version_id to
418
allow delta optimisations. VERY IMPORTANT: the texts must be those
419
returned by add_lines or data corruption can be caused.
420
:param left_matching_blocks: a hint about which areas are common
421
between the text and its left-hand-parent. The format is
422
the SequenceMatcher.get_matching_blocks format.
423
:param nostore_sha: Raise ExistingContent and do not add the lines to
424
the versioned file if the digest of the lines matches this.
425
:param random_id: If True a random id has been selected rather than
426
an id determined by some deterministic process such as a converter
427
from a foreign VCS. When True the backend may choose not to check
428
for uniqueness of the resulting key within the versioned file, so
429
this should only be done when the result is expected to be unique
431
:param check_content: If True, the lines supplied are verified to be
432
bytestrings that are correctly formed lines.
433
:return: The text sha1, the number of bytes in the text, and an opaque
434
representation of the inserted version which can be provided
435
back to future add_lines calls in the parent_texts dictionary.
437
self._check_write_ok()
438
return self._add_lines(version_id, parents, lines, parent_texts,
439
left_matching_blocks, nostore_sha, random_id, check_content)
441
def _add_lines(self, version_id, parents, lines, parent_texts,
442
left_matching_blocks, nostore_sha, random_id, check_content):
443
"""Helper to do the class specific add_lines."""
444
raise NotImplementedError(self.add_lines)
446
def add_lines_with_ghosts(self, version_id, parents, lines,
447
parent_texts=None, nostore_sha=None, random_id=False,
448
check_content=True, left_matching_blocks=None):
449
"""Add lines to the versioned file, allowing ghosts to be present.
451
This takes the same parameters as add_lines and returns the same.
453
self._check_write_ok()
454
return self._add_lines_with_ghosts(version_id, parents, lines,
455
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
457
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
458
nostore_sha, random_id, check_content, left_matching_blocks):
459
"""Helper to do class specific add_lines_with_ghosts."""
460
raise NotImplementedError(self.add_lines_with_ghosts)
462
def check(self, progress_bar=None):
463
"""Check the versioned file for integrity."""
464
raise NotImplementedError(self.check)
466
def _check_lines_not_unicode(self, lines):
467
"""Check that lines being added to a versioned file are not unicode."""
469
if not isinstance(line, bytes):
470
raise errors.BzrBadParameterUnicode("lines")
472
def _check_lines_are_lines(self, lines):
473
"""Check that the lines really are full lines without inline EOL."""
475
if b'\n' in line[:-1]:
476
raise errors.BzrBadParameterContainsNewline("lines")
478
def get_format_signature(self):
479
"""Get a text description of the data encoding in this file.
483
raise NotImplementedError(self.get_format_signature)
485
def make_mpdiffs(self, version_ids):
486
"""Create multiparent diffs for specified versions."""
487
# XXX: Can't use _MPDiffGenerator just yet. This is because version_ids
488
# is a list of strings, not keys. And while self.get_record_stream
489
# is supported, it takes *keys*, while self.get_parent_map() takes
491
knit_versions = set()
492
knit_versions.update(version_ids)
493
parent_map = self.get_parent_map(version_ids)
494
for version_id in version_ids:
496
knit_versions.update(parent_map[version_id])
498
raise errors.RevisionNotPresent(version_id, self)
499
# We need to filter out ghosts, because we can't diff against them.
500
knit_versions = set(self.get_parent_map(knit_versions))
501
lines = dict(zip(knit_versions,
502
self._get_lf_split_line_list(knit_versions)))
504
for version_id in version_ids:
505
target = lines[version_id]
507
parents = [lines[p] for p in parent_map[version_id] if p in
510
# I don't know how this could ever trigger.
511
# parent_map[version_id] was already triggered in the previous
512
# for loop, and lines[p] has the 'if p in knit_versions' check,
513
# so we again won't have a KeyError.
514
raise errors.RevisionNotPresent(version_id, self)
516
left_parent_blocks = self._extract_blocks(version_id,
519
left_parent_blocks = None
520
diffs.append(multiparent.MultiParent.from_lines(target, parents,
524
def _extract_blocks(self, version_id, source, target):
527
def add_mpdiffs(self, records):
528
"""Add mpdiffs to this VersionedFile.
530
Records should be iterables of version, parents, expected_sha1,
531
mpdiff. mpdiff should be a MultiParent instance.
533
# Does this need to call self._check_write_ok()? (IanC 20070919)
535
mpvf = multiparent.MultiMemoryVersionedFile()
537
for version, parent_ids, expected_sha1, mpdiff in records:
538
versions.append(version)
539
mpvf.add_diff(mpdiff, version, parent_ids)
540
needed_parents = set()
541
for version, parent_ids, expected_sha1, mpdiff in records:
542
needed_parents.update(p for p in parent_ids
543
if not mpvf.has_version(p))
544
present_parents = set(self.get_parent_map(needed_parents))
545
for parent_id, lines in zip(present_parents,
546
self._get_lf_split_line_list(present_parents)):
547
mpvf.add_version(lines, parent_id, [])
548
for (version, parent_ids, expected_sha1, mpdiff), lines in zip(
549
records, mpvf.get_line_list(versions)):
550
if len(parent_ids) == 1:
551
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
552
mpvf.get_diff(parent_ids[0]).num_lines()))
554
left_matching_blocks = None
556
_, _, version_text = self.add_lines_with_ghosts(version,
557
parent_ids, lines, vf_parents,
558
left_matching_blocks=left_matching_blocks)
559
except NotImplementedError:
560
# The vf can't handle ghosts, so add lines normally, which will
561
# (reasonably) fail if there are ghosts in the data.
562
_, _, version_text = self.add_lines(version,
563
parent_ids, lines, vf_parents,
564
left_matching_blocks=left_matching_blocks)
565
vf_parents[version] = version_text
566
sha1s = self.get_sha1s(versions)
567
for version, parent_ids, expected_sha1, mpdiff in records:
568
if expected_sha1 != sha1s[version]:
569
raise errors.VersionedFileInvalidChecksum(version)
571
def get_text(self, version_id):
572
"""Return version contents as a text string.
574
Raises RevisionNotPresent if version is not present in
577
return b''.join(self.get_lines(version_id))
578
get_string = get_text
580
def get_texts(self, version_ids):
581
"""Return the texts of listed versions as a list of strings.
583
Raises RevisionNotPresent if version is not present in
586
return [b''.join(self.get_lines(v)) for v in version_ids]
588
def get_lines(self, version_id):
589
"""Return version contents as a sequence of lines.
591
Raises RevisionNotPresent if version is not present in
594
raise NotImplementedError(self.get_lines)
596
def _get_lf_split_line_list(self, version_ids):
597
return [BytesIO(t).readlines() for t in self.get_texts(version_ids)]
599
def get_ancestry(self, version_ids, topo_sorted=True):
600
"""Return a list of all ancestors of given version(s). This
601
will not include the null revision.
603
This list will not be topologically sorted if topo_sorted=False is
606
Must raise RevisionNotPresent if any of the given versions are
607
not present in file history."""
608
raise NotImplementedError(self.get_ancestry)
610
def get_ancestry_with_ghosts(self, version_ids):
611
"""Return a list of all ancestors of given version(s). This
612
will not include the null revision.
614
Must raise RevisionNotPresent if any of the given versions are
615
not present in file history.
617
Ghosts that are known about will be included in ancestry list,
618
but are not explicitly marked.
620
raise NotImplementedError(self.get_ancestry_with_ghosts)
622
def get_parent_map(self, version_ids):
623
"""Get a map of the parents of version_ids.
625
:param version_ids: The version ids to look up parents for.
626
:return: A mapping from version id to parents.
628
raise NotImplementedError(self.get_parent_map)
630
def get_parents_with_ghosts(self, version_id):
631
"""Return version names for parents of version_id.
633
Will raise RevisionNotPresent if version_id is not present
636
Ghosts that are known about will be included in the parent list,
637
but are not explicitly marked.
640
return list(self.get_parent_map([version_id])[version_id])
642
raise errors.RevisionNotPresent(version_id, self)
644
def annotate(self, version_id):
645
"""Return a list of (version-id, line) tuples for version_id.
647
:raise RevisionNotPresent: If the given version is
648
not present in file history.
650
raise NotImplementedError(self.annotate)
652
def iter_lines_added_or_present_in_versions(self, version_ids=None,
654
"""Iterate over the lines in the versioned file from version_ids.
656
This may return lines from other versions. Each item the returned
657
iterator yields is a tuple of a line and a text version that that line
658
is present in (not introduced in).
660
Ordering of results is in whatever order is most suitable for the
661
underlying storage format.
663
If a progress bar is supplied, it may be used to indicate progress.
664
The caller is responsible for cleaning up progress bars (because this
667
NOTES: Lines are normalised: they will all have \n terminators.
668
Lines are returned in arbitrary order.
670
:return: An iterator over (line, version_id).
672
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
674
def plan_merge(self, ver_a, ver_b, base=None):
675
"""Return pseudo-annotation indicating how the two versions merge.
677
This is computed between versions a and b and their common
680
Weave lines present in none of them are skipped entirely.
683
killed-base Dead in base revision
684
killed-both Killed in each revision
687
unchanged Alive in both a and b (possibly created in both)
690
ghost-a Killed in a, unborn in b
691
ghost-b Killed in b, unborn in a
692
irrelevant Not in either revision
694
raise NotImplementedError(VersionedFile.plan_merge)
696
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
697
b_marker=TextMerge.B_MARKER):
698
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
701
class RecordingVersionedFilesDecorator(object):
702
"""A minimal versioned files that records calls made on it.
704
Only enough methods have been added to support tests using it to date.
706
:ivar calls: A list of the calls made; can be reset at any time by
710
def __init__(self, backing_vf):
711
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
713
:param backing_vf: The versioned file to answer all methods.
715
self._backing_vf = backing_vf
718
def add_lines(self, key, parents, lines, parent_texts=None,
719
left_matching_blocks=None, nostore_sha=None, random_id=False,
721
self.calls.append(("add_lines", key, parents, lines, parent_texts,
722
left_matching_blocks, nostore_sha, random_id, check_content))
723
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
724
left_matching_blocks, nostore_sha, random_id, check_content)
727
self._backing_vf.check()
729
def get_parent_map(self, keys):
730
self.calls.append(("get_parent_map", copy(keys)))
731
return self._backing_vf.get_parent_map(keys)
733
def get_record_stream(self, keys, sort_order, include_delta_closure):
734
self.calls.append(("get_record_stream", list(keys), sort_order,
735
include_delta_closure))
736
return self._backing_vf.get_record_stream(keys, sort_order,
737
include_delta_closure)
739
def get_sha1s(self, keys):
740
self.calls.append(("get_sha1s", copy(keys)))
741
return self._backing_vf.get_sha1s(keys)
743
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
744
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
745
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
748
self.calls.append(("keys",))
749
return self._backing_vf.keys()
752
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
753
"""A VF that records calls, and returns keys in specific order.
755
:ivar calls: A list of the calls made; can be reset at any time by
759
def __init__(self, backing_vf, key_priority):
760
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
762
:param backing_vf: The versioned file to answer all methods.
763
:param key_priority: A dictionary defining what order keys should be
764
returned from an 'unordered' get_record_stream request.
765
Keys with lower priority are returned first, keys not present in
766
the map get an implicit priority of 0, and are returned in
767
lexicographical order.
769
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
770
self._key_priority = key_priority
772
def get_record_stream(self, keys, sort_order, include_delta_closure):
773
self.calls.append(("get_record_stream", list(keys), sort_order,
774
include_delta_closure))
775
if sort_order == 'unordered':
777
return (self._key_priority.get(key, 0), key)
778
# Use a defined order by asking for the keys one-by-one from the
780
for key in sorted(keys, key=sort_key):
781
for record in self._backing_vf.get_record_stream([key],
782
'unordered', include_delta_closure):
785
for record in self._backing_vf.get_record_stream(keys, sort_order,
786
include_delta_closure):
790
class KeyMapper(object):
791
"""KeyMappers map between keys and underlying partitioned storage."""
794
"""Map key to an underlying storage identifier.
796
:param key: A key tuple e.g. (b'file-id', b'revision-id').
797
:return: An underlying storage identifier, specific to the partitioning
800
raise NotImplementedError(self.map)
802
def unmap(self, partition_id):
803
"""Map a partitioned storage id back to a key prefix.
805
:param partition_id: The underlying partition id.
806
:return: As much of a key (or prefix) as is derivable from the partition
809
raise NotImplementedError(self.unmap)
812
class ConstantMapper(KeyMapper):
813
"""A key mapper that maps to a constant result."""
815
def __init__(self, result):
816
"""Create a ConstantMapper which will return result for all maps."""
817
self._result = result
820
"""See KeyMapper.map()."""
824
class URLEscapeMapper(KeyMapper):
825
"""Base class for use with transport backed storage.
827
This provides a map and unmap wrapper that respectively url escape and
828
unescape their outputs and inputs.
832
"""See KeyMapper.map()."""
833
return urlutils.quote(self._map(key))
835
def unmap(self, partition_id):
836
"""See KeyMapper.unmap()."""
837
return self._unmap(urlutils.unquote(partition_id))
840
class PrefixMapper(URLEscapeMapper):
841
"""A key mapper that extracts the first component of a key.
843
This mapper is for use with a transport based backend.
847
"""See KeyMapper.map()."""
848
return key[0].decode('utf-8')
850
def _unmap(self, partition_id):
851
"""See KeyMapper.unmap()."""
852
return (partition_id.encode('utf-8'),)
855
class HashPrefixMapper(URLEscapeMapper):
856
"""A key mapper that combines the first component of a key with a hash.
858
This mapper is for use with a transport based backend.
862
"""See KeyMapper.map()."""
863
prefix = self._escape(key[0])
864
return "%02x/%s" % (adler32(prefix) & 0xff, prefix.decode('utf-8'))
866
def _escape(self, prefix):
867
"""No escaping needed here."""
870
def _unmap(self, partition_id):
871
"""See KeyMapper.unmap()."""
872
return (self._unescape(osutils.basename(partition_id)).encode('utf-8'),)
874
def _unescape(self, basename):
875
"""No unescaping needed for HashPrefixMapper."""
879
class HashEscapedPrefixMapper(HashPrefixMapper):
880
"""Combines the escaped first component of a key with a hash.
882
This mapper is for use with a transport based backend.
885
_safe = bytearray(b"abcdefghijklmnopqrstuvwxyz0123456789-_@,.")
887
def _escape(self, prefix):
888
"""Turn a key element into a filesystem safe string.
890
This is similar to a plain urlutils.quote, except
891
it uses specific safe characters, so that it doesn't
892
have to translate a lot of valid file ids.
894
# @ does not get escaped. This is because it is a valid
895
# filesystem character we use all the time, and it looks
896
# a lot better than seeing %40 all the time.
897
r = [(c in self._safe) and chr(c) or ('%%%02x' % c)
898
for c in bytearray(prefix)]
899
return ''.join(r).encode('ascii')
901
def _unescape(self, basename):
902
"""Escaped names are easily unescaped by urlutils."""
903
return urlutils.unquote(basename)
906
def make_versioned_files_factory(versioned_file_factory, mapper):
907
"""Create a ThunkedVersionedFiles factory.
909
This will create a callable which when called creates a
910
ThunkedVersionedFiles on a transport, using mapper to access individual
911
versioned files, and versioned_file_factory to create each individual file.
913
def factory(transport):
914
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
919
class VersionedFiles(object):
920
"""Storage for many versioned files.
922
This object allows a single keyspace for accessing the history graph and
923
contents of named bytestrings.
925
Currently no implementation allows the graph of different key prefixes to
926
intersect, but the API does allow such implementations in the future.
928
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
929
may have a different length key-size, but that size will be constant for
930
all texts added to or retrieved from it. For instance, breezy uses
931
instances with a key-size of 2 for storing user files in a repository, with
932
the first element the fileid, and the second the version of that file.
934
The use of tuples allows a single code base to support several different
935
uses with only the mapping logic changing from instance to instance.
937
:ivar _immediate_fallback_vfs: For subclasses that support stacking,
938
this is a list of other VersionedFiles immediately underneath this
939
one. They may in turn each have further fallbacks.
942
def add_lines(self, key, parents, lines, parent_texts=None,
943
left_matching_blocks=None, nostore_sha=None, random_id=False,
945
"""Add a text to the store.
947
:param key: The key tuple of the text to add. If the last element is
948
None, a CHK string will be generated during the addition.
949
:param parents: The parents key tuples of the text to add.
950
:param lines: A list of lines. Each line must be a bytestring. And all
951
of them except the last must be terminated with \n and contain no
952
other \n's. The last line may either contain no \n's or a single
953
terminating \n. If the lines list does meet this constraint the add
954
routine may error or may succeed - but you will be unable to read
955
the data back accurately. (Checking the lines have been split
956
correctly is expensive and extremely unlikely to catch bugs so it
957
is not done at runtime unless check_content is True.)
958
:param parent_texts: An optional dictionary containing the opaque
959
representations of some or all of the parents of version_id to
960
allow delta optimisations. VERY IMPORTANT: the texts must be those
961
returned by add_lines or data corruption can be caused.
962
:param left_matching_blocks: a hint about which areas are common
963
between the text and its left-hand-parent. The format is
964
the SequenceMatcher.get_matching_blocks format.
965
:param nostore_sha: Raise ExistingContent and do not add the lines to
966
the versioned file if the digest of the lines matches this.
967
:param random_id: If True a random id has been selected rather than
968
an id determined by some deterministic process such as a converter
969
from a foreign VCS. When True the backend may choose not to check
970
for uniqueness of the resulting key within the versioned file, so
971
this should only be done when the result is expected to be unique
973
:param check_content: If True, the lines supplied are verified to be
974
bytestrings that are correctly formed lines.
975
:return: The text sha1, the number of bytes in the text, and an opaque
976
representation of the inserted version which can be provided
977
back to future add_lines calls in the parent_texts dictionary.
979
raise NotImplementedError(self.add_lines)
981
def add_mpdiffs(self, records):
982
"""Add mpdiffs to this VersionedFile.
984
Records should be iterables of version, parents, expected_sha1,
985
mpdiff. mpdiff should be a MultiParent instance.
988
mpvf = multiparent.MultiMemoryVersionedFile()
990
for version, parent_ids, expected_sha1, mpdiff in records:
991
versions.append(version)
992
mpvf.add_diff(mpdiff, version, parent_ids)
993
needed_parents = set()
994
for version, parent_ids, expected_sha1, mpdiff in records:
995
needed_parents.update(p for p in parent_ids
996
if not mpvf.has_version(p))
997
# It seems likely that adding all the present parents as fulltexts can
998
# easily exhaust memory.
999
chunks_to_lines = osutils.chunks_to_lines
1000
for record in self.get_record_stream(needed_parents, 'unordered',
1002
if record.storage_kind == 'absent':
1004
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
1006
for (key, parent_keys, expected_sha1, mpdiff), lines in zip(
1007
records, mpvf.get_line_list(versions)):
1008
if len(parent_keys) == 1:
1009
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
1010
mpvf.get_diff(parent_keys[0]).num_lines()))
1012
left_matching_blocks = None
1013
version_sha1, _, version_text = self.add_lines(key,
1014
parent_keys, lines, vf_parents,
1015
left_matching_blocks=left_matching_blocks)
1016
if version_sha1 != expected_sha1:
1017
raise errors.VersionedFileInvalidChecksum(version)
1018
vf_parents[key] = version_text
1020
def annotate(self, key):
1021
"""Return a list of (version-key, line) tuples for the text of key.
1023
:raise RevisionNotPresent: If the key is not present.
1025
raise NotImplementedError(self.annotate)
1027
def check(self, progress_bar=None):
1028
"""Check this object for integrity.
1030
:param progress_bar: A progress bar to output as the check progresses.
1031
:param keys: Specific keys within the VersionedFiles to check. When
1032
this parameter is not None, check() becomes a generator as per
1033
get_record_stream. The difference to get_record_stream is that
1034
more or deeper checks will be performed.
1035
:return: None, or if keys was supplied a generator as per
1038
raise NotImplementedError(self.check)
1041
def check_not_reserved_id(version_id):
1042
revision.check_not_reserved_id(version_id)
1044
def clear_cache(self):
1045
"""Clear whatever caches this VersionedFile holds.
1047
This is generally called after an operation has been performed, when we
1048
don't expect to be using this versioned file again soon.
1051
def _check_lines_not_unicode(self, lines):
1052
"""Check that lines being added to a versioned file are not unicode."""
1054
if line.__class__ is not bytes:
1055
raise errors.BzrBadParameterUnicode("lines")
1057
def _check_lines_are_lines(self, lines):
1058
"""Check that the lines really are full lines without inline EOL."""
1060
if b'\n' in line[:-1]:
1061
raise errors.BzrBadParameterContainsNewline("lines")
1063
def get_known_graph_ancestry(self, keys):
1064
"""Get a KnownGraph instance with the ancestry of keys."""
1065
# most basic implementation is a loop around get_parent_map
1069
this_parent_map = self.get_parent_map(pending)
1070
parent_map.update(this_parent_map)
1071
pending = set(itertools.chain.from_iterable(
1072
viewvalues(this_parent_map)))
1073
pending.difference_update(parent_map)
1074
kg = _mod_graph.KnownGraph(parent_map)
1077
def get_parent_map(self, keys):
1078
"""Get a map of the parents of keys.
1080
:param keys: The keys to look up parents for.
1081
:return: A mapping from keys to parents. Absent keys are absent from
1084
raise NotImplementedError(self.get_parent_map)
1086
def get_record_stream(self, keys, ordering, include_delta_closure):
1087
"""Get a stream of records for keys.
1089
:param keys: The keys to include.
1090
:param ordering: Either 'unordered' or 'topological'. A topologically
1091
sorted stream has compression parents strictly before their
1093
:param include_delta_closure: If True then the closure across any
1094
compression parents will be included (in the opaque data).
1095
:return: An iterator of ContentFactory objects, each of which is only
1096
valid until the iterator is advanced.
1098
raise NotImplementedError(self.get_record_stream)
1100
def get_sha1s(self, keys):
1101
"""Get the sha1's of the texts for the given keys.
1103
:param keys: The names of the keys to lookup
1104
:return: a dict from key to sha1 digest. Keys of texts which are not
1105
present in the store are not present in the returned
1108
raise NotImplementedError(self.get_sha1s)
1110
__contains__ = index._has_key_from_parent_map
1112
def get_missing_compression_parent_keys(self):
1113
"""Return an iterable of keys of missing compression parents.
1115
Check this after calling insert_record_stream to find out if there are
1116
any missing compression parents. If there are, the records that
1117
depend on them are not able to be inserted safely. The precise
1118
behaviour depends on the concrete VersionedFiles class in use.
1120
Classes that do not support this will raise NotImplementedError.
1122
raise NotImplementedError(self.get_missing_compression_parent_keys)
1124
def insert_record_stream(self, stream):
1125
"""Insert a record stream into this container.
1127
:param stream: A stream of records to insert.
1129
:seealso VersionedFile.get_record_stream:
1131
raise NotImplementedError
1133
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1134
"""Iterate over the lines in the versioned files from keys.
1136
This may return lines from other keys. Each item the returned
1137
iterator yields is a tuple of a line and a text version that that line
1138
is present in (not introduced in).
1140
Ordering of results is in whatever order is most suitable for the
1141
underlying storage format.
1143
If a progress bar is supplied, it may be used to indicate progress.
1144
The caller is responsible for cleaning up progress bars (because this
1148
* Lines are normalised by the underlying store: they will all have \n
1150
* Lines are returned in arbitrary order.
1152
:return: An iterator over (line, key).
1154
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
1157
"""Return a iterable of the keys for all the contained texts."""
1158
raise NotImplementedError(self.keys)
1160
def make_mpdiffs(self, keys):
1161
"""Create multiparent diffs for specified keys."""
1162
generator = _MPDiffGenerator(self, keys)
1163
return generator.compute_diffs()
1165
def get_annotator(self):
1166
return annotate.Annotator(self)
1168
missing_keys = index._missing_keys_from_parent_map
1170
def _extract_blocks(self, version_id, source, target):
1173
def _transitive_fallbacks(self):
1174
"""Return the whole stack of fallback versionedfiles.
1176
This VersionedFiles may have a list of fallbacks, but it doesn't
1177
necessarily know about the whole stack going down, and it can't know
1178
at open time because they may change after the objects are opened.
1181
for a_vfs in self._immediate_fallback_vfs:
1182
all_fallbacks.append(a_vfs)
1183
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1184
return all_fallbacks
1187
class ThunkedVersionedFiles(VersionedFiles):
1188
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1190
This object allows a single keyspace for accessing the history graph and
1191
contents of named bytestrings.
1193
Currently no implementation allows the graph of different key prefixes to
1194
intersect, but the API does allow such implementations in the future.
1197
def __init__(self, transport, file_factory, mapper, is_locked):
1198
"""Create a ThunkedVersionedFiles."""
1199
self._transport = transport
1200
self._file_factory = file_factory
1201
self._mapper = mapper
1202
self._is_locked = is_locked
1204
def add_lines(self, key, parents, lines, parent_texts=None,
1205
left_matching_blocks=None, nostore_sha=None, random_id=False,
1206
check_content=True):
1207
"""See VersionedFiles.add_lines()."""
1208
path = self._mapper.map(key)
1209
version_id = key[-1]
1210
parents = [parent[-1] for parent in parents]
1211
vf = self._get_vf(path)
1214
return vf.add_lines_with_ghosts(version_id, parents, lines,
1215
parent_texts=parent_texts,
1216
left_matching_blocks=left_matching_blocks,
1217
nostore_sha=nostore_sha, random_id=random_id,
1218
check_content=check_content)
1219
except NotImplementedError:
1220
return vf.add_lines(version_id, parents, lines,
1221
parent_texts=parent_texts,
1222
left_matching_blocks=left_matching_blocks,
1223
nostore_sha=nostore_sha, random_id=random_id,
1224
check_content=check_content)
1225
except errors.NoSuchFile:
1226
# parent directory may be missing, try again.
1227
self._transport.mkdir(osutils.dirname(path))
1229
return vf.add_lines_with_ghosts(version_id, parents, lines,
1230
parent_texts=parent_texts,
1231
left_matching_blocks=left_matching_blocks,
1232
nostore_sha=nostore_sha, random_id=random_id,
1233
check_content=check_content)
1234
except NotImplementedError:
1235
return vf.add_lines(version_id, parents, lines,
1236
parent_texts=parent_texts,
1237
left_matching_blocks=left_matching_blocks,
1238
nostore_sha=nostore_sha, random_id=random_id,
1239
check_content=check_content)
1241
def annotate(self, key):
1242
"""Return a list of (version-key, line) tuples for the text of key.
1244
:raise RevisionNotPresent: If the key is not present.
1247
path = self._mapper.map(prefix)
1248
vf = self._get_vf(path)
1249
origins = vf.annotate(key[-1])
1251
for origin, line in origins:
1252
result.append((prefix + (origin,), line))
1255
def check(self, progress_bar=None, keys=None):
1256
"""See VersionedFiles.check()."""
1257
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1258
# this is tolerable. Ideally we'd pass keys down to check() and
1259
# have the older VersiondFile interface updated too.
1260
for prefix, vf in self._iter_all_components():
1262
if keys is not None:
1263
return self.get_record_stream(keys, 'unordered', True)
1265
def get_parent_map(self, keys):
1266
"""Get a map of the parents of keys.
1268
:param keys: The keys to look up parents for.
1269
:return: A mapping from keys to parents. Absent keys are absent from
1272
prefixes = self._partition_keys(keys)
1274
for prefix, suffixes in viewitems(prefixes):
1275
path = self._mapper.map(prefix)
1276
vf = self._get_vf(path)
1277
parent_map = vf.get_parent_map(suffixes)
1278
for key, parents in viewitems(parent_map):
1279
result[prefix + (key,)] = tuple(
1280
prefix + (parent,) for parent in parents)
1283
def _get_vf(self, path):
1284
if not self._is_locked():
1285
raise errors.ObjectNotLocked(self)
1286
return self._file_factory(path, self._transport, create=True,
1287
get_scope=lambda: None)
1289
def _partition_keys(self, keys):
1290
"""Turn keys into a dict of prefix:suffix_list."""
1293
prefix_keys = result.setdefault(key[:-1], [])
1294
prefix_keys.append(key[-1])
1297
def _iter_all_prefixes(self):
1298
# Identify all key prefixes.
1299
# XXX: A bit hacky, needs polish.
1300
if isinstance(self._mapper, ConstantMapper):
1301
paths = [self._mapper.map(())]
1305
for quoted_relpath in self._transport.iter_files_recursive():
1306
path, ext = os.path.splitext(quoted_relpath)
1308
paths = list(relpaths)
1309
prefixes = [self._mapper.unmap(path) for path in paths]
1310
return zip(paths, prefixes)
1312
def get_record_stream(self, keys, ordering, include_delta_closure):
1313
"""See VersionedFiles.get_record_stream()."""
1314
# Ordering will be taken care of by each partitioned store; group keys
1317
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1318
suffixes = [(suffix,) for suffix in suffixes]
1319
for record in vf.get_record_stream(suffixes, ordering,
1320
include_delta_closure):
1321
if record.parents is not None:
1322
record.parents = tuple(
1323
prefix + parent for parent in record.parents)
1324
record.key = prefix + record.key
1327
def _iter_keys_vf(self, keys):
1328
prefixes = self._partition_keys(keys)
1330
for prefix, suffixes in viewitems(prefixes):
1331
path = self._mapper.map(prefix)
1332
vf = self._get_vf(path)
1333
yield prefix, suffixes, vf
1335
def get_sha1s(self, keys):
1336
"""See VersionedFiles.get_sha1s()."""
1338
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1339
vf_sha1s = vf.get_sha1s(suffixes)
1340
for suffix, sha1 in viewitems(vf_sha1s):
1341
sha1s[prefix + (suffix,)] = sha1
1344
def insert_record_stream(self, stream):
1345
"""Insert a record stream into this container.
1347
:param stream: A stream of records to insert.
1349
:seealso VersionedFile.get_record_stream:
1351
for record in stream:
1352
prefix = record.key[:-1]
1353
key = record.key[-1:]
1354
if record.parents is not None:
1355
parents = [parent[-1:] for parent in record.parents]
1358
thunk_record = AdapterFactory(key, parents, record)
1359
path = self._mapper.map(prefix)
1360
# Note that this parses the file many times; we can do better but
1361
# as this only impacts weaves in terms of performance, it is
1363
vf = self._get_vf(path)
1364
vf.insert_record_stream([thunk_record])
1366
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1367
"""Iterate over the lines in the versioned files from keys.
1369
This may return lines from other keys. Each item the returned
1370
iterator yields is a tuple of a line and a text version that that line
1371
is present in (not introduced in).
1373
Ordering of results is in whatever order is most suitable for the
1374
underlying storage format.
1376
If a progress bar is supplied, it may be used to indicate progress.
1377
The caller is responsible for cleaning up progress bars (because this
1381
* Lines are normalised by the underlying store: they will all have \n
1383
* Lines are returned in arbitrary order.
1385
:return: An iterator over (line, key).
1387
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1388
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1389
yield line, prefix + (version,)
1391
def _iter_all_components(self):
1392
for path, prefix in self._iter_all_prefixes():
1393
yield prefix, self._get_vf(path)
1396
"""See VersionedFiles.keys()."""
1398
for prefix, vf in self._iter_all_components():
1399
for suffix in vf.versions():
1400
result.add(prefix + (suffix,))
1404
class VersionedFilesWithFallbacks(VersionedFiles):
1406
def without_fallbacks(self):
1407
"""Return a clone of this object without any fallbacks configured."""
1408
raise NotImplementedError(self.without_fallbacks)
1410
def add_fallback_versioned_files(self, a_versioned_files):
1411
"""Add a source of texts for texts not present in this knit.
1413
:param a_versioned_files: A VersionedFiles object.
1415
raise NotImplementedError(self.add_fallback_versioned_files)
1417
def get_known_graph_ancestry(self, keys):
1418
"""Get a KnownGraph instance with the ancestry of keys."""
1419
parent_map, missing_keys = self._index.find_ancestry(keys)
1420
for fallback in self._transitive_fallbacks():
1421
if not missing_keys:
1423
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1425
parent_map.update(f_parent_map)
1426
missing_keys = f_missing_keys
1427
kg = _mod_graph.KnownGraph(parent_map)
1431
class _PlanMergeVersionedFile(VersionedFiles):
1432
"""A VersionedFile for uncommitted and committed texts.
1434
It is intended to allow merges to be planned with working tree texts.
1435
It implements only the small part of the VersionedFiles interface used by
1436
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1437
_PlanMergeVersionedFile itself.
1439
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1440
queried for missing texts.
1443
def __init__(self, file_id):
1444
"""Create a _PlanMergeVersionedFile.
1446
:param file_id: Used with _PlanMerge code which is not yet fully
1447
tuple-keyspace aware.
1449
self._file_id = file_id
1450
# fallback locations
1451
self.fallback_versionedfiles = []
1452
# Parents for locally held keys.
1454
# line data for locally held keys.
1456
# key lookup providers
1457
self._providers = [_mod_graph.DictParentsProvider(self._parents)]
1459
def plan_merge(self, ver_a, ver_b, base=None):
1460
"""See VersionedFile.plan_merge"""
1461
from ..merge import _PlanMerge
1463
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1464
old_plan = list(_PlanMerge(ver_a, base, self,
1465
(self._file_id,)).plan_merge())
1466
new_plan = list(_PlanMerge(ver_a, ver_b, self,
1467
(self._file_id,)).plan_merge())
1468
return _PlanMerge._subtract_plans(old_plan, new_plan)
1470
def plan_lca_merge(self, ver_a, ver_b, base=None):
1471
from ..merge import _PlanLCAMerge
1472
graph = _mod_graph.Graph(self)
1473
new_plan = _PlanLCAMerge(
1474
ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1477
old_plan = _PlanLCAMerge(
1478
ver_a, base, self, (self._file_id,), graph).plan_merge()
1479
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1481
def add_lines(self, key, parents, lines):
1482
"""See VersionedFiles.add_lines
1484
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1485
are permitted. Only reserved ids are permitted.
1487
if not isinstance(key, tuple):
1488
raise TypeError(key)
1489
if not revision.is_reserved_id(key[-1]):
1490
raise ValueError('Only reserved ids may be used')
1492
raise ValueError('Parents may not be None')
1494
raise ValueError('Lines may not be None')
1495
self._parents[key] = tuple(parents)
1496
self._lines[key] = lines
1498
def get_record_stream(self, keys, ordering, include_delta_closure):
1501
if key in self._lines:
1502
lines = self._lines[key]
1503
parents = self._parents[key]
1505
yield ChunkedContentFactory(key, parents, None, lines)
1506
for versionedfile in self.fallback_versionedfiles:
1507
for record in versionedfile.get_record_stream(
1508
pending, 'unordered', True):
1509
if record.storage_kind == 'absent':
1512
pending.remove(record.key)
1516
# report absent entries
1518
yield AbsentContentFactory(key)
1520
def get_parent_map(self, keys):
1521
"""See VersionedFiles.get_parent_map"""
1522
# We create a new provider because a fallback may have been added.
1523
# If we make fallbacks private we can update a stack list and avoid
1524
# object creation thrashing.
1527
if revision.NULL_REVISION in keys:
1528
keys.remove(revision.NULL_REVISION)
1529
result[revision.NULL_REVISION] = ()
1530
self._providers = self._providers[:1] + self.fallback_versionedfiles
1532
_mod_graph.StackedParentsProvider(
1533
self._providers).get_parent_map(keys))
1534
for key, parents in viewitems(result):
1536
result[key] = (revision.NULL_REVISION,)
1540
class PlanWeaveMerge(TextMerge):
1541
"""Weave merge that takes a plan as its input.
1543
This exists so that VersionedFile.plan_merge is implementable.
1544
Most callers will want to use WeaveMerge instead.
1547
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1548
b_marker=TextMerge.B_MARKER):
1549
TextMerge.__init__(self, a_marker, b_marker)
1550
self.plan = list(plan)
1552
def _merge_struct(self):
1557
def outstanding_struct():
1558
if not lines_a and not lines_b:
1560
elif ch_a and not ch_b:
1563
elif ch_b and not ch_a:
1565
elif lines_a == lines_b:
1568
yield (lines_a, lines_b)
1570
# We previously considered either 'unchanged' or 'killed-both' lines
1571
# to be possible places to resynchronize. However, assuming agreement
1572
# on killed-both lines may be too aggressive. -- mbp 20060324
1573
for state, line in self.plan:
1574
if state == 'unchanged':
1575
# resync and flush queued conflicts changes if any
1576
for struct in outstanding_struct():
1582
if state == 'unchanged':
1585
elif state == 'killed-a':
1587
lines_b.append(line)
1588
elif state == 'killed-b':
1590
lines_a.append(line)
1591
elif state == 'new-a':
1593
lines_a.append(line)
1594
elif state == 'new-b':
1596
lines_b.append(line)
1597
elif state == 'conflicted-a':
1599
lines_a.append(line)
1600
elif state == 'conflicted-b':
1602
lines_b.append(line)
1603
elif state == 'killed-both':
1604
# This counts as a change, even though there is no associated
1608
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1610
raise AssertionError(state)
1611
for struct in outstanding_struct():
1614
def base_from_plan(self):
1615
"""Construct a BASE file from the plan text."""
1617
for state, line in self.plan:
1618
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1619
# If unchanged, then this line is straight from base. If a or b
1620
# or both killed the line, then it *used* to be in base.
1621
base_lines.append(line)
1623
if state not in ('killed-base', 'irrelevant',
1624
'ghost-a', 'ghost-b',
1626
'conflicted-a', 'conflicted-b'):
1627
# killed-base, irrelevant means it doesn't apply
1628
# ghost-a/ghost-b are harder to say for sure, but they
1629
# aren't in the 'inc_c' which means they aren't in the
1630
# shared base of a & b. So we don't include them. And
1631
# obviously if the line is newly inserted, it isn't in base
1633
# If 'conflicted-a' or b, then it is new vs one base, but
1634
# old versus another base. However, if we make it present
1635
# in the base, it will be deleted from the target, and it
1636
# seems better to get a line doubled in the merge result,
1637
# rather than have it deleted entirely.
1638
# Example, each node is the 'text' at that point:
1646
# There was a criss-cross conflict merge. Both sides
1647
# include the other, but put themselves first.
1648
# Weave marks this as a 'clean' merge, picking OTHER over
1649
# THIS. (Though the details depend on order inserted into
1651
# LCA generates a plan:
1652
# [('unchanged', M),
1653
# ('conflicted-b', b),
1655
# ('conflicted-a', b),
1657
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1658
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1659
# removes one 'b', and BASE vs OTHER removes the other)
1660
# If you include neither, 3-way creates a clean "MbabN" as
1661
# THIS adds one 'b', and OTHER does too.
1662
# It seems that having the line 2 times is better than
1663
# having it omitted. (Easier to manually delete than notice
1664
# it needs to be added.)
1665
raise AssertionError('Unknown state: %s' % (state,))
1669
class WeaveMerge(PlanWeaveMerge):
1670
"""Weave merge that takes a VersionedFile and two versions as its input."""
1672
def __init__(self, versionedfile, ver_a, ver_b,
1673
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1674
plan = versionedfile.plan_merge(ver_a, ver_b)
1675
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1678
class VirtualVersionedFiles(VersionedFiles):
1679
"""Dummy implementation for VersionedFiles that uses other functions for
1680
obtaining fulltexts and parent maps.
1682
This is always on the bottom of the stack and uses string keys
1683
(rather than tuples) internally.
1686
def __init__(self, get_parent_map, get_lines):
1687
"""Create a VirtualVersionedFiles.
1689
:param get_parent_map: Same signature as Repository.get_parent_map.
1690
:param get_lines: Should return lines for specified key or None if
1693
super(VirtualVersionedFiles, self).__init__()
1694
self._get_parent_map = get_parent_map
1695
self._get_lines = get_lines
1697
def check(self, progressbar=None):
1698
"""See VersionedFiles.check.
1700
:note: Always returns True for VirtualVersionedFiles.
1704
def add_mpdiffs(self, records):
1705
"""See VersionedFiles.mpdiffs.
1707
:note: Not implemented for VirtualVersionedFiles.
1709
raise NotImplementedError(self.add_mpdiffs)
1711
def get_parent_map(self, keys):
1712
"""See VersionedFiles.get_parent_map."""
1713
parent_view = viewitems(self._get_parent_map(k for (k,) in keys))
1714
return dict(((k,), tuple((p,) for p in v)) for k, v in parent_view)
1716
def get_sha1s(self, keys):
1717
"""See VersionedFiles.get_sha1s."""
1720
lines = self._get_lines(k)
1721
if lines is not None:
1722
if not isinstance(lines, list):
1723
raise AssertionError
1724
ret[(k,)] = osutils.sha_strings(lines)
1727
def get_record_stream(self, keys, ordering, include_delta_closure):
1728
"""See VersionedFiles.get_record_stream."""
1729
for (k,) in list(keys):
1730
lines = self._get_lines(k)
1731
if lines is not None:
1732
if not isinstance(lines, list):
1733
raise AssertionError
1734
yield ChunkedContentFactory((k,), None,
1735
sha1=osutils.sha_strings(lines),
1738
yield AbsentContentFactory((k,))
1740
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1741
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1742
for i, (key,) in enumerate(keys):
1744
pb.update("Finding changed lines", i, len(keys))
1745
for l in self._get_lines(key):
1749
class NoDupeAddLinesDecorator(object):
1750
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1754
def __init__(self, store):
1757
def add_lines(self, key, parents, lines, parent_texts=None,
1758
left_matching_blocks=None, nostore_sha=None, random_id=False,
1759
check_content=True):
1760
"""See VersionedFiles.add_lines.
1762
This implementation may return None as the third element of the return
1763
value when the original store wouldn't.
1766
raise NotImplementedError(
1767
"NoDupeAddLinesDecorator.add_lines does not implement the "
1768
"nostore_sha behaviour.")
1770
sha1 = osutils.sha_strings(lines)
1771
key = (b"sha1:" + sha1,)
1774
if key in self._store.get_parent_map([key]):
1775
# This key has already been inserted, so don't do it again.
1777
sha1 = osutils.sha_strings(lines)
1778
return sha1, sum(map(len, lines)), None
1779
return self._store.add_lines(key, parents, lines,
1780
parent_texts=parent_texts,
1781
left_matching_blocks=left_matching_blocks,
1782
nostore_sha=nostore_sha, random_id=random_id,
1783
check_content=check_content)
1785
def __getattr__(self, name):
1786
return getattr(self._store, name)
1789
def network_bytes_to_kind_and_offset(network_bytes):
1790
"""Strip of a record kind from the front of network_bytes.
1792
:param network_bytes: The bytes of a record.
1793
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1795
line_end = network_bytes.find(b'\n')
1796
storage_kind = network_bytes[:line_end].decode('ascii')
1797
return storage_kind, line_end + 1
1800
class NetworkRecordStream(object):
1801
"""A record_stream which reconstitures a serialised stream."""
1803
def __init__(self, bytes_iterator):
1804
"""Create a NetworkRecordStream.
1806
:param bytes_iterator: An iterator of bytes. Each item in this
1807
iterator should have been obtained from a record_streams'
1808
record.get_bytes_as(record.storage_kind) call.
1810
self._bytes_iterator = bytes_iterator
1811
self._kind_factory = {
1812
'fulltext': fulltext_network_to_record,
1813
'groupcompress-block': groupcompress.network_block_to_records,
1814
'knit-ft-gz': knit.knit_network_to_record,
1815
'knit-delta-gz': knit.knit_network_to_record,
1816
'knit-annotated-ft-gz': knit.knit_network_to_record,
1817
'knit-annotated-delta-gz': knit.knit_network_to_record,
1818
'knit-delta-closure': knit.knit_delta_closure_to_records,
1824
:return: An iterator as per VersionedFiles.get_record_stream().
1826
for bytes in self._bytes_iterator:
1827
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1828
for record in self._kind_factory[storage_kind](
1829
storage_kind, bytes, line_end):
1833
def fulltext_network_to_record(kind, bytes, line_end):
1834
"""Convert a network fulltext record to record."""
1835
meta_len, = struct.unpack('!L', bytes[line_end:line_end + 4])
1836
record_meta = bytes[line_end + 4:line_end + 4 + meta_len]
1837
key, parents = bencode.bdecode_as_tuple(record_meta)
1838
if parents == b'nil':
1840
fulltext = bytes[line_end + 4 + meta_len:]
1841
return [FulltextContentFactory(key, parents, None, fulltext)]
1844
def _length_prefix(bytes):
1845
return struct.pack('!L', len(bytes))
1848
def record_to_fulltext_bytes(record):
1849
if record.parents is None:
1852
parents = record.parents
1853
record_meta = bencode.bencode((record.key, parents))
1854
record_content = record.get_bytes_as('fulltext')
1855
return b"fulltext\n%s%s%s" % (
1856
_length_prefix(record_meta), record_meta, record_content)
1859
def sort_groupcompress(parent_map):
1860
"""Sort and group the keys in parent_map into groupcompress order.
1862
groupcompress is defined (currently) as reverse-topological order, grouped
1865
:return: A sorted-list of keys
1867
# gc-optimal ordering is approximately reverse topological,
1868
# properly grouped by file-id.
1870
for item in viewitems(parent_map):
1872
if isinstance(key, bytes) or len(key) == 1:
1877
per_prefix_map[prefix].append(item)
1879
per_prefix_map[prefix] = [item]
1882
for prefix in sorted(per_prefix_map):
1883
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1887
class _KeyRefs(object):
1889
def __init__(self, track_new_keys=False):
1890
# dict mapping 'key' to 'set of keys referring to that key'
1893
# set remembering all new keys
1894
self.new_keys = set()
1896
self.new_keys = None
1902
self.new_keys.clear()
1904
def add_references(self, key, refs):
1905
# Record the new references
1906
for referenced in refs:
1908
needed_by = self.refs[referenced]
1910
needed_by = self.refs[referenced] = set()
1912
# Discard references satisfied by the new key
1915
def get_new_keys(self):
1916
return self.new_keys
1918
def get_unsatisfied_refs(self):
1919
return self.refs.keys()
1921
def _satisfy_refs_for_key(self, key):
1925
# No keys depended on this key. That's ok.
1928
def add_key(self, key):
1929
# satisfy refs for key, and remember that we've seen this key.
1930
self._satisfy_refs_for_key(key)
1931
if self.new_keys is not None:
1932
self.new_keys.add(key)
1934
def satisfy_refs_for_keys(self, keys):
1936
self._satisfy_refs_for_key(key)
1938
def get_referrers(self):
1939
return set(itertools.chain.from_iterable(viewvalues(self.refs)))