1
# Copyright (C) 2006-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
19
"""Versioned text file storage api."""
22
from cStringIO import StringIO
25
from zlib import adler32
27
from bzrlib.lazy_import import lazy_import
28
lazy_import(globals(), """
45
from bzrlib.registry import Registry
46
from bzrlib.textmerge import TextMerge
49
adapter_registry = Registry()
50
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
51
'DeltaPlainToFullText')
52
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
54
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
55
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
56
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
57
'bzrlib.knit', 'DeltaAnnotatedToFullText')
58
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
59
'bzrlib.knit', 'FTAnnotatedToUnannotated')
60
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
61
'bzrlib.knit', 'FTAnnotatedToFullText')
62
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
63
# 'bzrlib.knit', 'FTAnnotatedToChunked')
66
class ContentFactory(object):
67
"""Abstract interface for insertion and retrieval from a VersionedFile.
69
:ivar sha1: None, or the sha1 of the content fulltext.
70
:ivar storage_kind: The native storage kind of this factory. One of
71
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
72
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
73
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
74
:ivar key: The key of this content. Each key is a tuple with a single
76
:ivar parents: A tuple of parent keys for self.key. If the object has
77
no parent information, None (as opposed to () for an empty list of
82
"""Create a ContentFactory."""
84
self.storage_kind = None
89
class ChunkedContentFactory(ContentFactory):
90
"""Static data content factory.
92
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
93
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
94
satisfies this, as does a list of lines.
96
:ivar sha1: None, or the sha1 of the content fulltext.
97
:ivar storage_kind: The native storage kind of this factory. Always
99
:ivar key: The key of this content. Each key is a tuple with a single
101
:ivar parents: A tuple of parent keys for self.key. If the object has
102
no parent information, None (as opposed to () for an empty list of
106
def __init__(self, key, parents, sha1, chunks):
107
"""Create a ContentFactory."""
109
self.storage_kind = 'chunked'
111
self.parents = parents
112
self._chunks = chunks
114
def get_bytes_as(self, storage_kind):
115
if storage_kind == 'chunked':
117
elif storage_kind == 'fulltext':
118
return ''.join(self._chunks)
119
raise errors.UnavailableRepresentation(self.key, storage_kind,
123
class FulltextContentFactory(ContentFactory):
124
"""Static data content factory.
126
This takes a fulltext when created and just returns that during
127
get_bytes_as('fulltext').
129
:ivar sha1: None, or the sha1 of the content fulltext.
130
:ivar storage_kind: The native storage kind of this factory. Always
132
:ivar key: The key of this content. Each key is a tuple with a single
134
:ivar parents: A tuple of parent keys for self.key. If the object has
135
no parent information, None (as opposed to () for an empty list of
139
def __init__(self, key, parents, sha1, text):
140
"""Create a ContentFactory."""
142
self.storage_kind = 'fulltext'
144
self.parents = parents
147
def get_bytes_as(self, storage_kind):
148
if storage_kind == self.storage_kind:
150
elif storage_kind == 'chunked':
152
raise errors.UnavailableRepresentation(self.key, storage_kind,
156
class AbsentContentFactory(ContentFactory):
157
"""A placeholder content factory for unavailable texts.
160
:ivar storage_kind: 'absent'.
161
:ivar key: The key of this content. Each key is a tuple with a single
166
def __init__(self, key):
167
"""Create a ContentFactory."""
169
self.storage_kind = 'absent'
173
def get_bytes_as(self, storage_kind):
174
raise ValueError('A request was made for key: %s, but that'
175
' content is not available, and the calling'
176
' code does not handle if it is missing.'
180
class AdapterFactory(ContentFactory):
181
"""A content factory to adapt between key prefix's."""
183
def __init__(self, key, parents, adapted):
184
"""Create an adapter factory instance."""
186
self.parents = parents
187
self._adapted = adapted
189
def __getattr__(self, attr):
190
"""Return a member from the adapted object."""
191
if attr in ('key', 'parents'):
192
return self.__dict__[attr]
194
return getattr(self._adapted, attr)
197
def filter_absent(record_stream):
198
"""Adapt a record stream to remove absent records."""
199
for record in record_stream:
200
if record.storage_kind != 'absent':
204
class _MPDiffGenerator(object):
205
"""Pull out the functionality for generating mp_diffs."""
207
def __init__(self, vf, keys):
209
# This is the order the keys were requested in
210
self.ordered_keys = tuple(keys)
211
# keys + their parents, what we need to compute the diffs
212
self.needed_keys = ()
213
# Map from key: mp_diff
215
# Map from key: parents_needed (may have ghosts)
217
# Parents that aren't present
218
self.ghost_parents = ()
219
# Map from parent_key => number of children for this text
221
# Content chunks that are cached while we still need them
224
def _find_needed_keys(self):
225
"""Find the set of keys we need to request.
227
This includes all the original keys passed in, and the non-ghost
228
parents of those keys.
230
:return: (needed_keys, refcounts)
231
needed_keys is the set of all texts we need to extract
232
refcounts is a dict of {key: num_children} letting us know when we
233
no longer need to cache a given parent text
235
# All the keys and their parents
236
needed_keys = set(self.ordered_keys)
237
parent_map = self.vf.get_parent_map(needed_keys)
238
self.parent_map = parent_map
239
# TODO: Should we be using a different construct here? I think this
240
# uses difference_update internally, and we expect the result to
242
missing_keys = needed_keys.difference(parent_map)
244
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
245
# Parents that might be missing. They are allowed to be ghosts, but we
246
# should check for them
248
setdefault = refcounts.setdefault
250
for child_key, parent_keys in parent_map.iteritems():
252
# parent_keys may be None if a given VersionedFile claims to
253
# not support graph operations.
255
just_parents.update(parent_keys)
256
needed_keys.update(parent_keys)
257
for p in parent_keys:
258
refcounts[p] = setdefault(p, 0) + 1
259
just_parents.difference_update(parent_map)
260
# Remove any parents that are actually ghosts from the needed set
261
self.present_parents = set(self.vf.get_parent_map(just_parents))
262
self.ghost_parents = just_parents.difference(self.present_parents)
263
needed_keys.difference_update(self.ghost_parents)
264
self.needed_keys = needed_keys
265
self.refcounts = refcounts
266
return needed_keys, refcounts
268
def _compute_diff(self, key, parent_lines, lines):
269
"""Compute a single mp_diff, and store it in self._diffs"""
270
if len(parent_lines) > 0:
271
# XXX: _extract_blocks is not usefully defined anywhere...
272
# It was meant to extract the left-parent diff without
273
# having to recompute it for Knit content (pack-0.92,
274
# etc). That seems to have regressed somewhere
275
left_parent_blocks = self.vf._extract_blocks(key,
276
parent_lines[0], lines)
278
left_parent_blocks = None
279
diff = multiparent.MultiParent.from_lines(lines,
280
parent_lines, left_parent_blocks)
281
self.diffs[key] = diff
283
def _process_one_record(self, key, this_chunks):
285
if key in self.parent_map:
286
# This record should be ready to diff, since we requested
287
# content in 'topological' order
288
parent_keys = self.parent_map.pop(key)
289
# If a VersionedFile claims 'no-graph' support, then it may return
290
# None for any parent request, so we replace it with an empty tuple
291
if parent_keys is None:
294
for p in parent_keys:
295
# Alternatively we could check p not in self.needed_keys, but
296
# ghost_parents should be tiny versus huge
297
if p in self.ghost_parents:
299
refcount = self.refcounts[p]
300
if refcount == 1: # Last child reference
301
self.refcounts.pop(p)
302
parent_chunks = self.chunks.pop(p)
304
self.refcounts[p] = refcount - 1
305
parent_chunks = self.chunks[p]
306
p_lines = osutils.chunks_to_lines(parent_chunks)
307
# TODO: Should we cache the line form? We did the
308
# computation to get it, but storing it this way will
309
# be less memory efficient...
310
parent_lines.append(p_lines)
312
lines = osutils.chunks_to_lines(this_chunks)
313
# Since we needed the lines, we'll go ahead and cache them this way
315
self._compute_diff(key, parent_lines, lines)
317
# Is this content required for any more children?
318
if key in self.refcounts:
319
self.chunks[key] = this_chunks
321
def _extract_diffs(self):
322
needed_keys, refcounts = self._find_needed_keys()
323
for record in self.vf.get_record_stream(needed_keys,
324
'topological', True):
325
if record.storage_kind == 'absent':
326
raise errors.RevisionNotPresent(record.key, self.vf)
327
self._process_one_record(record.key,
328
record.get_bytes_as('chunked'))
330
def compute_diffs(self):
331
self._extract_diffs()
332
dpop = self.diffs.pop
333
return [dpop(k) for k in self.ordered_keys]
336
class VersionedFile(object):
337
"""Versioned text file storage.
339
A versioned file manages versions of line-based text files,
340
keeping track of the originating version for each line.
342
To clients the "lines" of the file are represented as a list of
343
strings. These strings will typically have terminal newline
344
characters, but this is not required. In particular files commonly
345
do not have a newline at the end of the file.
347
Texts are identified by a version-id string.
351
def check_not_reserved_id(version_id):
352
revision.check_not_reserved_id(version_id)
354
def copy_to(self, name, transport):
355
"""Copy this versioned file to name on transport."""
356
raise NotImplementedError(self.copy_to)
358
def get_record_stream(self, versions, ordering, include_delta_closure):
359
"""Get a stream of records for versions.
361
:param versions: The versions to include. Each version is a tuple
363
:param ordering: Either 'unordered' or 'topological'. A topologically
364
sorted stream has compression parents strictly before their
366
:param include_delta_closure: If True then the closure across any
367
compression parents will be included (in the data content of the
368
stream, not in the emitted records). This guarantees that
369
'fulltext' can be used successfully on every record.
370
:return: An iterator of ContentFactory objects, each of which is only
371
valid until the iterator is advanced.
373
raise NotImplementedError(self.get_record_stream)
375
def has_version(self, version_id):
376
"""Returns whether version is present."""
377
raise NotImplementedError(self.has_version)
379
def insert_record_stream(self, stream):
380
"""Insert a record stream into this versioned file.
382
:param stream: A stream of records to insert.
384
:seealso VersionedFile.get_record_stream:
386
raise NotImplementedError
388
def add_lines(self, version_id, parents, lines, parent_texts=None,
389
left_matching_blocks=None, nostore_sha=None, random_id=False,
391
"""Add a single text on top of the versioned file.
393
Must raise RevisionAlreadyPresent if the new version is
394
already present in file history.
396
Must raise RevisionNotPresent if any of the given parents are
397
not present in file history.
399
:param lines: A list of lines. Each line must be a bytestring. And all
400
of them except the last must be terminated with \n and contain no
401
other \n's. The last line may either contain no \n's or a single
402
terminated \n. If the lines list does meet this constraint the add
403
routine may error or may succeed - but you will be unable to read
404
the data back accurately. (Checking the lines have been split
405
correctly is expensive and extremely unlikely to catch bugs so it
406
is not done at runtime unless check_content is True.)
407
:param parent_texts: An optional dictionary containing the opaque
408
representations of some or all of the parents of version_id to
409
allow delta optimisations. VERY IMPORTANT: the texts must be those
410
returned by add_lines or data corruption can be caused.
411
:param left_matching_blocks: a hint about which areas are common
412
between the text and its left-hand-parent. The format is
413
the SequenceMatcher.get_matching_blocks format.
414
:param nostore_sha: Raise ExistingContent and do not add the lines to
415
the versioned file if the digest of the lines matches this.
416
:param random_id: If True a random id has been selected rather than
417
an id determined by some deterministic process such as a converter
418
from a foreign VCS. When True the backend may choose not to check
419
for uniqueness of the resulting key within the versioned file, so
420
this should only be done when the result is expected to be unique
422
:param check_content: If True, the lines supplied are verified to be
423
bytestrings that are correctly formed lines.
424
:return: The text sha1, the number of bytes in the text, and an opaque
425
representation of the inserted version which can be provided
426
back to future add_lines calls in the parent_texts dictionary.
428
self._check_write_ok()
429
return self._add_lines(version_id, parents, lines, parent_texts,
430
left_matching_blocks, nostore_sha, random_id, check_content)
432
def _add_lines(self, version_id, parents, lines, parent_texts,
433
left_matching_blocks, nostore_sha, random_id, check_content):
434
"""Helper to do the class specific add_lines."""
435
raise NotImplementedError(self.add_lines)
437
def add_lines_with_ghosts(self, version_id, parents, lines,
438
parent_texts=None, nostore_sha=None, random_id=False,
439
check_content=True, left_matching_blocks=None):
440
"""Add lines to the versioned file, allowing ghosts to be present.
442
This takes the same parameters as add_lines and returns the same.
444
self._check_write_ok()
445
return self._add_lines_with_ghosts(version_id, parents, lines,
446
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
448
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
449
nostore_sha, random_id, check_content, left_matching_blocks):
450
"""Helper to do class specific add_lines_with_ghosts."""
451
raise NotImplementedError(self.add_lines_with_ghosts)
453
def check(self, progress_bar=None):
454
"""Check the versioned file for integrity."""
455
raise NotImplementedError(self.check)
457
def _check_lines_not_unicode(self, lines):
458
"""Check that lines being added to a versioned file are not unicode."""
460
if line.__class__ is not str:
461
raise errors.BzrBadParameterUnicode("lines")
463
def _check_lines_are_lines(self, lines):
464
"""Check that the lines really are full lines without inline EOL."""
466
if '\n' in line[:-1]:
467
raise errors.BzrBadParameterContainsNewline("lines")
469
def get_format_signature(self):
470
"""Get a text description of the data encoding in this file.
474
raise NotImplementedError(self.get_format_signature)
476
def make_mpdiffs(self, version_ids):
477
"""Create multiparent diffs for specified versions."""
478
# XXX: Can't use _MPDiffGenerator just yet. This is because version_ids
479
# is a list of strings, not keys. And while self.get_record_stream
480
# is supported, it takes *keys*, while self.get_parent_map() takes
482
knit_versions = set()
483
knit_versions.update(version_ids)
484
parent_map = self.get_parent_map(version_ids)
485
for version_id in version_ids:
487
knit_versions.update(parent_map[version_id])
489
raise errors.RevisionNotPresent(version_id, self)
490
# We need to filter out ghosts, because we can't diff against them.
491
knit_versions = set(self.get_parent_map(knit_versions).keys())
492
lines = dict(zip(knit_versions,
493
self._get_lf_split_line_list(knit_versions)))
495
for version_id in version_ids:
496
target = lines[version_id]
498
parents = [lines[p] for p in parent_map[version_id] if p in
501
# I don't know how this could ever trigger.
502
# parent_map[version_id] was already triggered in the previous
503
# for loop, and lines[p] has the 'if p in knit_versions' check,
504
# so we again won't have a KeyError.
505
raise errors.RevisionNotPresent(version_id, self)
507
left_parent_blocks = self._extract_blocks(version_id,
510
left_parent_blocks = None
511
diffs.append(multiparent.MultiParent.from_lines(target, parents,
515
def _extract_blocks(self, version_id, source, target):
518
def add_mpdiffs(self, records):
519
"""Add mpdiffs to this VersionedFile.
521
Records should be iterables of version, parents, expected_sha1,
522
mpdiff. mpdiff should be a MultiParent instance.
524
# Does this need to call self._check_write_ok()? (IanC 20070919)
526
mpvf = multiparent.MultiMemoryVersionedFile()
528
for version, parent_ids, expected_sha1, mpdiff in records:
529
versions.append(version)
530
mpvf.add_diff(mpdiff, version, parent_ids)
531
needed_parents = set()
532
for version, parent_ids, expected_sha1, mpdiff in records:
533
needed_parents.update(p for p in parent_ids
534
if not mpvf.has_version(p))
535
present_parents = set(self.get_parent_map(needed_parents).keys())
536
for parent_id, lines in zip(present_parents,
537
self._get_lf_split_line_list(present_parents)):
538
mpvf.add_version(lines, parent_id, [])
539
for (version, parent_ids, expected_sha1, mpdiff), lines in\
540
zip(records, mpvf.get_line_list(versions)):
541
if len(parent_ids) == 1:
542
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
543
mpvf.get_diff(parent_ids[0]).num_lines()))
545
left_matching_blocks = None
547
_, _, version_text = self.add_lines_with_ghosts(version,
548
parent_ids, lines, vf_parents,
549
left_matching_blocks=left_matching_blocks)
550
except NotImplementedError:
551
# The vf can't handle ghosts, so add lines normally, which will
552
# (reasonably) fail if there are ghosts in the data.
553
_, _, version_text = self.add_lines(version,
554
parent_ids, lines, vf_parents,
555
left_matching_blocks=left_matching_blocks)
556
vf_parents[version] = version_text
557
sha1s = self.get_sha1s(versions)
558
for version, parent_ids, expected_sha1, mpdiff in records:
559
if expected_sha1 != sha1s[version]:
560
raise errors.VersionedFileInvalidChecksum(version)
562
def get_text(self, version_id):
563
"""Return version contents as a text string.
565
Raises RevisionNotPresent if version is not present in
568
return ''.join(self.get_lines(version_id))
569
get_string = get_text
571
def get_texts(self, version_ids):
572
"""Return the texts of listed versions as a list of strings.
574
Raises RevisionNotPresent if version is not present in
577
return [''.join(self.get_lines(v)) for v in version_ids]
579
def get_lines(self, version_id):
580
"""Return version contents as a sequence of lines.
582
Raises RevisionNotPresent if version is not present in
585
raise NotImplementedError(self.get_lines)
587
def _get_lf_split_line_list(self, version_ids):
588
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
590
def get_ancestry(self, version_ids, topo_sorted=True):
591
"""Return a list of all ancestors of given version(s). This
592
will not include the null revision.
594
This list will not be topologically sorted if topo_sorted=False is
597
Must raise RevisionNotPresent if any of the given versions are
598
not present in file history."""
599
if isinstance(version_ids, basestring):
600
version_ids = [version_ids]
601
raise NotImplementedError(self.get_ancestry)
603
def get_ancestry_with_ghosts(self, version_ids):
604
"""Return a list of all ancestors of given version(s). This
605
will not include the null revision.
607
Must raise RevisionNotPresent if any of the given versions are
608
not present in file history.
610
Ghosts that are known about will be included in ancestry list,
611
but are not explicitly marked.
613
raise NotImplementedError(self.get_ancestry_with_ghosts)
615
def get_parent_map(self, version_ids):
616
"""Get a map of the parents of version_ids.
618
:param version_ids: The version ids to look up parents for.
619
:return: A mapping from version id to parents.
621
raise NotImplementedError(self.get_parent_map)
623
def get_parents_with_ghosts(self, version_id):
624
"""Return version names for parents of version_id.
626
Will raise RevisionNotPresent if version_id is not present
629
Ghosts that are known about will be included in the parent list,
630
but are not explicitly marked.
633
return list(self.get_parent_map([version_id])[version_id])
635
raise errors.RevisionNotPresent(version_id, self)
637
def annotate(self, version_id):
638
"""Return a list of (version-id, line) tuples for version_id.
640
:raise RevisionNotPresent: If the given version is
641
not present in file history.
643
raise NotImplementedError(self.annotate)
645
def iter_lines_added_or_present_in_versions(self, version_ids=None,
647
"""Iterate over the lines in the versioned file from version_ids.
649
This may return lines from other versions. Each item the returned
650
iterator yields is a tuple of a line and a text version that that line
651
is present in (not introduced in).
653
Ordering of results is in whatever order is most suitable for the
654
underlying storage format.
656
If a progress bar is supplied, it may be used to indicate progress.
657
The caller is responsible for cleaning up progress bars (because this
660
NOTES: Lines are normalised: they will all have \n terminators.
661
Lines are returned in arbitrary order.
663
:return: An iterator over (line, version_id).
665
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
667
def plan_merge(self, ver_a, ver_b):
668
"""Return pseudo-annotation indicating how the two versions merge.
670
This is computed between versions a and b and their common
673
Weave lines present in none of them are skipped entirely.
676
killed-base Dead in base revision
677
killed-both Killed in each revision
680
unchanged Alive in both a and b (possibly created in both)
683
ghost-a Killed in a, unborn in b
684
ghost-b Killed in b, unborn in a
685
irrelevant Not in either revision
687
raise NotImplementedError(VersionedFile.plan_merge)
689
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
690
b_marker=TextMerge.B_MARKER):
691
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
694
class RecordingVersionedFilesDecorator(object):
695
"""A minimal versioned files that records calls made on it.
697
Only enough methods have been added to support tests using it to date.
699
:ivar calls: A list of the calls made; can be reset at any time by
703
def __init__(self, backing_vf):
704
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
706
:param backing_vf: The versioned file to answer all methods.
708
self._backing_vf = backing_vf
711
def add_lines(self, key, parents, lines, parent_texts=None,
712
left_matching_blocks=None, nostore_sha=None, random_id=False,
714
self.calls.append(("add_lines", key, parents, lines, parent_texts,
715
left_matching_blocks, nostore_sha, random_id, check_content))
716
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
717
left_matching_blocks, nostore_sha, random_id, check_content)
720
self._backing_vf.check()
722
def get_parent_map(self, keys):
723
self.calls.append(("get_parent_map", copy(keys)))
724
return self._backing_vf.get_parent_map(keys)
726
def get_record_stream(self, keys, sort_order, include_delta_closure):
727
self.calls.append(("get_record_stream", list(keys), sort_order,
728
include_delta_closure))
729
return self._backing_vf.get_record_stream(keys, sort_order,
730
include_delta_closure)
732
def get_sha1s(self, keys):
733
self.calls.append(("get_sha1s", copy(keys)))
734
return self._backing_vf.get_sha1s(keys)
736
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
737
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
738
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
741
self.calls.append(("keys",))
742
return self._backing_vf.keys()
745
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
746
"""A VF that records calls, and returns keys in specific order.
748
:ivar calls: A list of the calls made; can be reset at any time by
752
def __init__(self, backing_vf, key_priority):
753
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
755
:param backing_vf: The versioned file to answer all methods.
756
:param key_priority: A dictionary defining what order keys should be
757
returned from an 'unordered' get_record_stream request.
758
Keys with lower priority are returned first, keys not present in
759
the map get an implicit priority of 0, and are returned in
760
lexicographical order.
762
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
763
self._key_priority = key_priority
765
def get_record_stream(self, keys, sort_order, include_delta_closure):
766
self.calls.append(("get_record_stream", list(keys), sort_order,
767
include_delta_closure))
768
if sort_order == 'unordered':
770
return (self._key_priority.get(key, 0), key)
771
# Use a defined order by asking for the keys one-by-one from the
773
for key in sorted(keys, key=sort_key):
774
for record in self._backing_vf.get_record_stream([key],
775
'unordered', include_delta_closure):
778
for record in self._backing_vf.get_record_stream(keys, sort_order,
779
include_delta_closure):
783
class KeyMapper(object):
784
"""KeyMappers map between keys and underlying partitioned storage."""
787
"""Map key to an underlying storage identifier.
789
:param key: A key tuple e.g. ('file-id', 'revision-id').
790
:return: An underlying storage identifier, specific to the partitioning
793
raise NotImplementedError(self.map)
795
def unmap(self, partition_id):
796
"""Map a partitioned storage id back to a key prefix.
798
:param partition_id: The underlying partition id.
799
:return: As much of a key (or prefix) as is derivable from the partition
802
raise NotImplementedError(self.unmap)
805
class ConstantMapper(KeyMapper):
806
"""A key mapper that maps to a constant result."""
808
def __init__(self, result):
809
"""Create a ConstantMapper which will return result for all maps."""
810
self._result = result
813
"""See KeyMapper.map()."""
817
class URLEscapeMapper(KeyMapper):
818
"""Base class for use with transport backed storage.
820
This provides a map and unmap wrapper that respectively url escape and
821
unescape their outputs and inputs.
825
"""See KeyMapper.map()."""
826
return urllib.quote(self._map(key))
828
def unmap(self, partition_id):
829
"""See KeyMapper.unmap()."""
830
return self._unmap(urllib.unquote(partition_id))
833
class PrefixMapper(URLEscapeMapper):
834
"""A key mapper that extracts the first component of a key.
836
This mapper is for use with a transport based backend.
840
"""See KeyMapper.map()."""
843
def _unmap(self, partition_id):
844
"""See KeyMapper.unmap()."""
845
return (partition_id,)
848
class HashPrefixMapper(URLEscapeMapper):
849
"""A key mapper that combines the first component of a key with a hash.
851
This mapper is for use with a transport based backend.
855
"""See KeyMapper.map()."""
856
prefix = self._escape(key[0])
857
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
859
def _escape(self, prefix):
860
"""No escaping needed here."""
863
def _unmap(self, partition_id):
864
"""See KeyMapper.unmap()."""
865
return (self._unescape(osutils.basename(partition_id)),)
867
def _unescape(self, basename):
868
"""No unescaping needed for HashPrefixMapper."""
872
class HashEscapedPrefixMapper(HashPrefixMapper):
873
"""Combines the escaped first component of a key with a hash.
875
This mapper is for use with a transport based backend.
878
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
880
def _escape(self, prefix):
881
"""Turn a key element into a filesystem safe string.
883
This is similar to a plain urllib.quote, except
884
it uses specific safe characters, so that it doesn't
885
have to translate a lot of valid file ids.
887
# @ does not get escaped. This is because it is a valid
888
# filesystem character we use all the time, and it looks
889
# a lot better than seeing %40 all the time.
890
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
894
def _unescape(self, basename):
895
"""Escaped names are easily unescaped by urlutils."""
896
return urllib.unquote(basename)
899
def make_versioned_files_factory(versioned_file_factory, mapper):
900
"""Create a ThunkedVersionedFiles factory.
902
This will create a callable which when called creates a
903
ThunkedVersionedFiles on a transport, using mapper to access individual
904
versioned files, and versioned_file_factory to create each individual file.
906
def factory(transport):
907
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
912
class VersionedFiles(object):
913
"""Storage for many versioned files.
915
This object allows a single keyspace for accessing the history graph and
916
contents of named bytestrings.
918
Currently no implementation allows the graph of different key prefixes to
919
intersect, but the API does allow such implementations in the future.
921
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
922
may have a different length key-size, but that size will be constant for
923
all texts added to or retrieved from it. For instance, bzrlib uses
924
instances with a key-size of 2 for storing user files in a repository, with
925
the first element the fileid, and the second the version of that file.
927
The use of tuples allows a single code base to support several different
928
uses with only the mapping logic changing from instance to instance.
930
:ivar _immediate_fallback_vfs: For subclasses that support stacking,
931
this is a list of other VersionedFiles immediately underneath this
932
one. They may in turn each have further fallbacks.
935
def add_lines(self, key, parents, lines, parent_texts=None,
936
left_matching_blocks=None, nostore_sha=None, random_id=False,
938
"""Add a text to the store.
940
:param key: The key tuple of the text to add. If the last element is
941
None, a CHK string will be generated during the addition.
942
:param parents: The parents key tuples of the text to add.
943
:param lines: A list of lines. Each line must be a bytestring. And all
944
of them except the last must be terminated with \n and contain no
945
other \n's. The last line may either contain no \n's or a single
946
terminating \n. If the lines list does meet this constraint the add
947
routine may error or may succeed - but you will be unable to read
948
the data back accurately. (Checking the lines have been split
949
correctly is expensive and extremely unlikely to catch bugs so it
950
is not done at runtime unless check_content is True.)
951
:param parent_texts: An optional dictionary containing the opaque
952
representations of some or all of the parents of version_id to
953
allow delta optimisations. VERY IMPORTANT: the texts must be those
954
returned by add_lines or data corruption can be caused.
955
:param left_matching_blocks: a hint about which areas are common
956
between the text and its left-hand-parent. The format is
957
the SequenceMatcher.get_matching_blocks format.
958
:param nostore_sha: Raise ExistingContent and do not add the lines to
959
the versioned file if the digest of the lines matches this.
960
:param random_id: If True a random id has been selected rather than
961
an id determined by some deterministic process such as a converter
962
from a foreign VCS. When True the backend may choose not to check
963
for uniqueness of the resulting key within the versioned file, so
964
this should only be done when the result is expected to be unique
966
:param check_content: If True, the lines supplied are verified to be
967
bytestrings that are correctly formed lines.
968
:return: The text sha1, the number of bytes in the text, and an opaque
969
representation of the inserted version which can be provided
970
back to future add_lines calls in the parent_texts dictionary.
972
raise NotImplementedError(self.add_lines)
974
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
975
"""Add a text to the store.
977
This is a private function for use by VersionedFileCommitBuilder.
979
:param key: The key tuple of the text to add. If the last element is
980
None, a CHK string will be generated during the addition.
981
:param parents: The parents key tuples of the text to add.
982
:param text: A string containing the text to be committed.
983
:param nostore_sha: Raise ExistingContent and do not add the lines to
984
the versioned file if the digest of the lines matches this.
985
:param random_id: If True a random id has been selected rather than
986
an id determined by some deterministic process such as a converter
987
from a foreign VCS. When True the backend may choose not to check
988
for uniqueness of the resulting key within the versioned file, so
989
this should only be done when the result is expected to be unique
991
:param check_content: If True, the lines supplied are verified to be
992
bytestrings that are correctly formed lines.
993
:return: The text sha1, the number of bytes in the text, and an opaque
994
representation of the inserted version which can be provided
995
back to future _add_text calls in the parent_texts dictionary.
997
# The default implementation just thunks over to .add_lines(),
998
# inefficient, but it works.
999
return self.add_lines(key, parents, osutils.split_lines(text),
1000
nostore_sha=nostore_sha,
1001
random_id=random_id,
1004
def add_mpdiffs(self, records):
1005
"""Add mpdiffs to this VersionedFile.
1007
Records should be iterables of version, parents, expected_sha1,
1008
mpdiff. mpdiff should be a MultiParent instance.
1011
mpvf = multiparent.MultiMemoryVersionedFile()
1013
for version, parent_ids, expected_sha1, mpdiff in records:
1014
versions.append(version)
1015
mpvf.add_diff(mpdiff, version, parent_ids)
1016
needed_parents = set()
1017
for version, parent_ids, expected_sha1, mpdiff in records:
1018
needed_parents.update(p for p in parent_ids
1019
if not mpvf.has_version(p))
1020
# It seems likely that adding all the present parents as fulltexts can
1021
# easily exhaust memory.
1022
chunks_to_lines = osutils.chunks_to_lines
1023
for record in self.get_record_stream(needed_parents, 'unordered',
1025
if record.storage_kind == 'absent':
1027
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
1029
for (key, parent_keys, expected_sha1, mpdiff), lines in\
1030
zip(records, mpvf.get_line_list(versions)):
1031
if len(parent_keys) == 1:
1032
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
1033
mpvf.get_diff(parent_keys[0]).num_lines()))
1035
left_matching_blocks = None
1036
version_sha1, _, version_text = self.add_lines(key,
1037
parent_keys, lines, vf_parents,
1038
left_matching_blocks=left_matching_blocks)
1039
if version_sha1 != expected_sha1:
1040
raise errors.VersionedFileInvalidChecksum(version)
1041
vf_parents[key] = version_text
1043
def annotate(self, key):
1044
"""Return a list of (version-key, line) tuples for the text of key.
1046
:raise RevisionNotPresent: If the key is not present.
1048
raise NotImplementedError(self.annotate)
1050
def check(self, progress_bar=None):
1051
"""Check this object for integrity.
1053
:param progress_bar: A progress bar to output as the check progresses.
1054
:param keys: Specific keys within the VersionedFiles to check. When
1055
this parameter is not None, check() becomes a generator as per
1056
get_record_stream. The difference to get_record_stream is that
1057
more or deeper checks will be performed.
1058
:return: None, or if keys was supplied a generator as per
1061
raise NotImplementedError(self.check)
1064
def check_not_reserved_id(version_id):
1065
revision.check_not_reserved_id(version_id)
1067
def clear_cache(self):
1068
"""Clear whatever caches this VersionedFile holds.
1070
This is generally called after an operation has been performed, when we
1071
don't expect to be using this versioned file again soon.
1074
def _check_lines_not_unicode(self, lines):
1075
"""Check that lines being added to a versioned file are not unicode."""
1077
if line.__class__ is not str:
1078
raise errors.BzrBadParameterUnicode("lines")
1080
def _check_lines_are_lines(self, lines):
1081
"""Check that the lines really are full lines without inline EOL."""
1083
if '\n' in line[:-1]:
1084
raise errors.BzrBadParameterContainsNewline("lines")
1086
def get_known_graph_ancestry(self, keys):
1087
"""Get a KnownGraph instance with the ancestry of keys."""
1088
# most basic implementation is a loop around get_parent_map
1092
this_parent_map = self.get_parent_map(pending)
1093
parent_map.update(this_parent_map)
1095
map(pending.update, this_parent_map.itervalues())
1096
pending = pending.difference(parent_map)
1097
kg = _mod_graph.KnownGraph(parent_map)
1100
def get_parent_map(self, keys):
1101
"""Get a map of the parents of keys.
1103
:param keys: The keys to look up parents for.
1104
:return: A mapping from keys to parents. Absent keys are absent from
1107
raise NotImplementedError(self.get_parent_map)
1109
def get_record_stream(self, keys, ordering, include_delta_closure):
1110
"""Get a stream of records for keys.
1112
:param keys: The keys to include.
1113
:param ordering: Either 'unordered' or 'topological'. A topologically
1114
sorted stream has compression parents strictly before their
1116
:param include_delta_closure: If True then the closure across any
1117
compression parents will be included (in the opaque data).
1118
:return: An iterator of ContentFactory objects, each of which is only
1119
valid until the iterator is advanced.
1121
raise NotImplementedError(self.get_record_stream)
1123
def get_sha1s(self, keys):
1124
"""Get the sha1's of the texts for the given keys.
1126
:param keys: The names of the keys to lookup
1127
:return: a dict from key to sha1 digest. Keys of texts which are not
1128
present in the store are not present in the returned
1131
raise NotImplementedError(self.get_sha1s)
1133
has_key = index._has_key_from_parent_map
1135
def get_missing_compression_parent_keys(self):
1136
"""Return an iterable of keys of missing compression parents.
1138
Check this after calling insert_record_stream to find out if there are
1139
any missing compression parents. If there are, the records that
1140
depend on them are not able to be inserted safely. The precise
1141
behaviour depends on the concrete VersionedFiles class in use.
1143
Classes that do not support this will raise NotImplementedError.
1145
raise NotImplementedError(self.get_missing_compression_parent_keys)
1147
def insert_record_stream(self, stream):
1148
"""Insert a record stream into this container.
1150
:param stream: A stream of records to insert.
1152
:seealso VersionedFile.get_record_stream:
1154
raise NotImplementedError
1156
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1157
"""Iterate over the lines in the versioned files from keys.
1159
This may return lines from other keys. Each item the returned
1160
iterator yields is a tuple of a line and a text version that that line
1161
is present in (not introduced in).
1163
Ordering of results is in whatever order is most suitable for the
1164
underlying storage format.
1166
If a progress bar is supplied, it may be used to indicate progress.
1167
The caller is responsible for cleaning up progress bars (because this
1171
* Lines are normalised by the underlying store: they will all have \n
1173
* Lines are returned in arbitrary order.
1175
:return: An iterator over (line, key).
1177
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
1180
"""Return a iterable of the keys for all the contained texts."""
1181
raise NotImplementedError(self.keys)
1183
def make_mpdiffs(self, keys):
1184
"""Create multiparent diffs for specified keys."""
1185
generator = _MPDiffGenerator(self, keys)
1186
return generator.compute_diffs()
1188
def get_annotator(self):
1189
return annotate.Annotator(self)
1191
missing_keys = index._missing_keys_from_parent_map
1193
def _extract_blocks(self, version_id, source, target):
1196
def _transitive_fallbacks(self):
1197
"""Return the whole stack of fallback versionedfiles.
1199
This VersionedFiles may have a list of fallbacks, but it doesn't
1200
necessarily know about the whole stack going down, and it can't know
1201
at open time because they may change after the objects are opened.
1204
for a_vfs in self._immediate_fallback_vfs:
1205
all_fallbacks.append(a_vfs)
1206
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1207
return all_fallbacks
1210
class ThunkedVersionedFiles(VersionedFiles):
1211
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1213
This object allows a single keyspace for accessing the history graph and
1214
contents of named bytestrings.
1216
Currently no implementation allows the graph of different key prefixes to
1217
intersect, but the API does allow such implementations in the future.
1220
def __init__(self, transport, file_factory, mapper, is_locked):
1221
"""Create a ThunkedVersionedFiles."""
1222
self._transport = transport
1223
self._file_factory = file_factory
1224
self._mapper = mapper
1225
self._is_locked = is_locked
1227
def add_lines(self, key, parents, lines, parent_texts=None,
1228
left_matching_blocks=None, nostore_sha=None, random_id=False,
1229
check_content=True):
1230
"""See VersionedFiles.add_lines()."""
1231
path = self._mapper.map(key)
1232
version_id = key[-1]
1233
parents = [parent[-1] for parent in parents]
1234
vf = self._get_vf(path)
1237
return vf.add_lines_with_ghosts(version_id, parents, lines,
1238
parent_texts=parent_texts,
1239
left_matching_blocks=left_matching_blocks,
1240
nostore_sha=nostore_sha, random_id=random_id,
1241
check_content=check_content)
1242
except NotImplementedError:
1243
return vf.add_lines(version_id, parents, lines,
1244
parent_texts=parent_texts,
1245
left_matching_blocks=left_matching_blocks,
1246
nostore_sha=nostore_sha, random_id=random_id,
1247
check_content=check_content)
1248
except errors.NoSuchFile:
1249
# parent directory may be missing, try again.
1250
self._transport.mkdir(osutils.dirname(path))
1252
return vf.add_lines_with_ghosts(version_id, parents, lines,
1253
parent_texts=parent_texts,
1254
left_matching_blocks=left_matching_blocks,
1255
nostore_sha=nostore_sha, random_id=random_id,
1256
check_content=check_content)
1257
except NotImplementedError:
1258
return vf.add_lines(version_id, parents, lines,
1259
parent_texts=parent_texts,
1260
left_matching_blocks=left_matching_blocks,
1261
nostore_sha=nostore_sha, random_id=random_id,
1262
check_content=check_content)
1264
def annotate(self, key):
1265
"""Return a list of (version-key, line) tuples for the text of key.
1267
:raise RevisionNotPresent: If the key is not present.
1270
path = self._mapper.map(prefix)
1271
vf = self._get_vf(path)
1272
origins = vf.annotate(key[-1])
1274
for origin, line in origins:
1275
result.append((prefix + (origin,), line))
1278
def check(self, progress_bar=None, keys=None):
1279
"""See VersionedFiles.check()."""
1280
# XXX: This is over-enthusiastic but as we only thunk for Weaves today
1281
# this is tolerable. Ideally we'd pass keys down to check() and
1282
# have the older VersiondFile interface updated too.
1283
for prefix, vf in self._iter_all_components():
1285
if keys is not None:
1286
return self.get_record_stream(keys, 'unordered', True)
1288
def get_parent_map(self, keys):
1289
"""Get a map of the parents of keys.
1291
:param keys: The keys to look up parents for.
1292
:return: A mapping from keys to parents. Absent keys are absent from
1295
prefixes = self._partition_keys(keys)
1297
for prefix, suffixes in prefixes.items():
1298
path = self._mapper.map(prefix)
1299
vf = self._get_vf(path)
1300
parent_map = vf.get_parent_map(suffixes)
1301
for key, parents in parent_map.items():
1302
result[prefix + (key,)] = tuple(
1303
prefix + (parent,) for parent in parents)
1306
def _get_vf(self, path):
1307
if not self._is_locked():
1308
raise errors.ObjectNotLocked(self)
1309
return self._file_factory(path, self._transport, create=True,
1310
get_scope=lambda:None)
1312
def _partition_keys(self, keys):
1313
"""Turn keys into a dict of prefix:suffix_list."""
1316
prefix_keys = result.setdefault(key[:-1], [])
1317
prefix_keys.append(key[-1])
1320
def _get_all_prefixes(self):
1321
# Identify all key prefixes.
1322
# XXX: A bit hacky, needs polish.
1323
if type(self._mapper) == ConstantMapper:
1324
paths = [self._mapper.map(())]
1328
for quoted_relpath in self._transport.iter_files_recursive():
1329
path, ext = os.path.splitext(quoted_relpath)
1331
paths = list(relpaths)
1332
prefixes = [self._mapper.unmap(path) for path in paths]
1333
return zip(paths, prefixes)
1335
def get_record_stream(self, keys, ordering, include_delta_closure):
1336
"""See VersionedFiles.get_record_stream()."""
1337
# Ordering will be taken care of by each partitioned store; group keys
1340
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1341
suffixes = [(suffix,) for suffix in suffixes]
1342
for record in vf.get_record_stream(suffixes, ordering,
1343
include_delta_closure):
1344
if record.parents is not None:
1345
record.parents = tuple(
1346
prefix + parent for parent in record.parents)
1347
record.key = prefix + record.key
1350
def _iter_keys_vf(self, keys):
1351
prefixes = self._partition_keys(keys)
1353
for prefix, suffixes in prefixes.items():
1354
path = self._mapper.map(prefix)
1355
vf = self._get_vf(path)
1356
yield prefix, suffixes, vf
1358
def get_sha1s(self, keys):
1359
"""See VersionedFiles.get_sha1s()."""
1361
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1362
vf_sha1s = vf.get_sha1s(suffixes)
1363
for suffix, sha1 in vf_sha1s.iteritems():
1364
sha1s[prefix + (suffix,)] = sha1
1367
def insert_record_stream(self, stream):
1368
"""Insert a record stream into this container.
1370
:param stream: A stream of records to insert.
1372
:seealso VersionedFile.get_record_stream:
1374
for record in stream:
1375
prefix = record.key[:-1]
1376
key = record.key[-1:]
1377
if record.parents is not None:
1378
parents = [parent[-1:] for parent in record.parents]
1381
thunk_record = AdapterFactory(key, parents, record)
1382
path = self._mapper.map(prefix)
1383
# Note that this parses the file many times; we can do better but
1384
# as this only impacts weaves in terms of performance, it is
1386
vf = self._get_vf(path)
1387
vf.insert_record_stream([thunk_record])
1389
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1390
"""Iterate over the lines in the versioned files from keys.
1392
This may return lines from other keys. Each item the returned
1393
iterator yields is a tuple of a line and a text version that that line
1394
is present in (not introduced in).
1396
Ordering of results is in whatever order is most suitable for the
1397
underlying storage format.
1399
If a progress bar is supplied, it may be used to indicate progress.
1400
The caller is responsible for cleaning up progress bars (because this
1404
* Lines are normalised by the underlying store: they will all have \n
1406
* Lines are returned in arbitrary order.
1408
:return: An iterator over (line, key).
1410
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1411
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1412
yield line, prefix + (version,)
1414
def _iter_all_components(self):
1415
for path, prefix in self._get_all_prefixes():
1416
yield prefix, self._get_vf(path)
1419
"""See VersionedFiles.keys()."""
1421
for prefix, vf in self._iter_all_components():
1422
for suffix in vf.versions():
1423
result.add(prefix + (suffix,))
1427
class VersionedFilesWithFallbacks(VersionedFiles):
1429
def without_fallbacks(self):
1430
"""Return a clone of this object without any fallbacks configured."""
1431
raise NotImplementedError(self.without_fallbacks)
1433
def add_fallback_versioned_files(self, a_versioned_files):
1434
"""Add a source of texts for texts not present in this knit.
1436
:param a_versioned_files: A VersionedFiles object.
1438
raise NotImplementedError(self.add_fallback_versioned_files)
1440
def get_known_graph_ancestry(self, keys):
1441
"""Get a KnownGraph instance with the ancestry of keys."""
1442
parent_map, missing_keys = self._index.find_ancestry(keys)
1443
for fallback in self._transitive_fallbacks():
1444
if not missing_keys:
1446
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1448
parent_map.update(f_parent_map)
1449
missing_keys = f_missing_keys
1450
kg = _mod_graph.KnownGraph(parent_map)
1454
class _PlanMergeVersionedFile(VersionedFiles):
1455
"""A VersionedFile for uncommitted and committed texts.
1457
It is intended to allow merges to be planned with working tree texts.
1458
It implements only the small part of the VersionedFiles interface used by
1459
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1460
_PlanMergeVersionedFile itself.
1462
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1463
queried for missing texts.
1466
def __init__(self, file_id):
1467
"""Create a _PlanMergeVersionedFile.
1469
:param file_id: Used with _PlanMerge code which is not yet fully
1470
tuple-keyspace aware.
1472
self._file_id = file_id
1473
# fallback locations
1474
self.fallback_versionedfiles = []
1475
# Parents for locally held keys.
1477
# line data for locally held keys.
1479
# key lookup providers
1480
self._providers = [_mod_graph.DictParentsProvider(self._parents)]
1482
def plan_merge(self, ver_a, ver_b, base=None):
1483
"""See VersionedFile.plan_merge"""
1484
from bzrlib.merge import _PlanMerge
1486
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1487
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1488
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1489
return _PlanMerge._subtract_plans(old_plan, new_plan)
1491
def plan_lca_merge(self, ver_a, ver_b, base=None):
1492
from bzrlib.merge import _PlanLCAMerge
1493
graph = _mod_graph.Graph(self)
1494
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1497
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1498
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1500
def add_lines(self, key, parents, lines):
1501
"""See VersionedFiles.add_lines
1503
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1504
are permitted. Only reserved ids are permitted.
1506
if type(key) is not tuple:
1507
raise TypeError(key)
1508
if not revision.is_reserved_id(key[-1]):
1509
raise ValueError('Only reserved ids may be used')
1511
raise ValueError('Parents may not be None')
1513
raise ValueError('Lines may not be None')
1514
self._parents[key] = tuple(parents)
1515
self._lines[key] = lines
1517
def get_record_stream(self, keys, ordering, include_delta_closure):
1520
if key in self._lines:
1521
lines = self._lines[key]
1522
parents = self._parents[key]
1524
yield ChunkedContentFactory(key, parents, None, lines)
1525
for versionedfile in self.fallback_versionedfiles:
1526
for record in versionedfile.get_record_stream(
1527
pending, 'unordered', True):
1528
if record.storage_kind == 'absent':
1531
pending.remove(record.key)
1535
# report absent entries
1537
yield AbsentContentFactory(key)
1539
def get_parent_map(self, keys):
1540
"""See VersionedFiles.get_parent_map"""
1541
# We create a new provider because a fallback may have been added.
1542
# If we make fallbacks private we can update a stack list and avoid
1543
# object creation thrashing.
1546
if revision.NULL_REVISION in keys:
1547
keys.remove(revision.NULL_REVISION)
1548
result[revision.NULL_REVISION] = ()
1549
self._providers = self._providers[:1] + self.fallback_versionedfiles
1551
_mod_graph.StackedParentsProvider(
1552
self._providers).get_parent_map(keys))
1553
for key, parents in result.iteritems():
1555
result[key] = (revision.NULL_REVISION,)
1559
class PlanWeaveMerge(TextMerge):
1560
"""Weave merge that takes a plan as its input.
1562
This exists so that VersionedFile.plan_merge is implementable.
1563
Most callers will want to use WeaveMerge instead.
1566
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1567
b_marker=TextMerge.B_MARKER):
1568
TextMerge.__init__(self, a_marker, b_marker)
1569
self.plan = list(plan)
1571
def _merge_struct(self):
1576
def outstanding_struct():
1577
if not lines_a and not lines_b:
1579
elif ch_a and not ch_b:
1582
elif ch_b and not ch_a:
1584
elif lines_a == lines_b:
1587
yield (lines_a, lines_b)
1589
# We previously considered either 'unchanged' or 'killed-both' lines
1590
# to be possible places to resynchronize. However, assuming agreement
1591
# on killed-both lines may be too aggressive. -- mbp 20060324
1592
for state, line in self.plan:
1593
if state == 'unchanged':
1594
# resync and flush queued conflicts changes if any
1595
for struct in outstanding_struct():
1601
if state == 'unchanged':
1604
elif state == 'killed-a':
1606
lines_b.append(line)
1607
elif state == 'killed-b':
1609
lines_a.append(line)
1610
elif state == 'new-a':
1612
lines_a.append(line)
1613
elif state == 'new-b':
1615
lines_b.append(line)
1616
elif state == 'conflicted-a':
1618
lines_a.append(line)
1619
elif state == 'conflicted-b':
1621
lines_b.append(line)
1622
elif state == 'killed-both':
1623
# This counts as a change, even though there is no associated
1627
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1629
raise AssertionError(state)
1630
for struct in outstanding_struct():
1633
def base_from_plan(self):
1634
"""Construct a BASE file from the plan text."""
1636
for state, line in self.plan:
1637
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1638
# If unchanged, then this line is straight from base. If a or b
1639
# or both killed the line, then it *used* to be in base.
1640
base_lines.append(line)
1642
if state not in ('killed-base', 'irrelevant',
1643
'ghost-a', 'ghost-b',
1645
'conflicted-a', 'conflicted-b'):
1646
# killed-base, irrelevant means it doesn't apply
1647
# ghost-a/ghost-b are harder to say for sure, but they
1648
# aren't in the 'inc_c' which means they aren't in the
1649
# shared base of a & b. So we don't include them. And
1650
# obviously if the line is newly inserted, it isn't in base
1652
# If 'conflicted-a' or b, then it is new vs one base, but
1653
# old versus another base. However, if we make it present
1654
# in the base, it will be deleted from the target, and it
1655
# seems better to get a line doubled in the merge result,
1656
# rather than have it deleted entirely.
1657
# Example, each node is the 'text' at that point:
1665
# There was a criss-cross conflict merge. Both sides
1666
# include the other, but put themselves first.
1667
# Weave marks this as a 'clean' merge, picking OTHER over
1668
# THIS. (Though the details depend on order inserted into
1670
# LCA generates a plan:
1671
# [('unchanged', M),
1672
# ('conflicted-b', b),
1674
# ('conflicted-a', b),
1676
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1677
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1678
# removes one 'b', and BASE vs OTHER removes the other)
1679
# If you include neither, 3-way creates a clean "MbabN" as
1680
# THIS adds one 'b', and OTHER does too.
1681
# It seems that having the line 2 times is better than
1682
# having it omitted. (Easier to manually delete than notice
1683
# it needs to be added.)
1684
raise AssertionError('Unknown state: %s' % (state,))
1688
class WeaveMerge(PlanWeaveMerge):
1689
"""Weave merge that takes a VersionedFile and two versions as its input."""
1691
def __init__(self, versionedfile, ver_a, ver_b,
1692
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1693
plan = versionedfile.plan_merge(ver_a, ver_b)
1694
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1697
class VirtualVersionedFiles(VersionedFiles):
1698
"""Dummy implementation for VersionedFiles that uses other functions for
1699
obtaining fulltexts and parent maps.
1701
This is always on the bottom of the stack and uses string keys
1702
(rather than tuples) internally.
1705
def __init__(self, get_parent_map, get_lines):
1706
"""Create a VirtualVersionedFiles.
1708
:param get_parent_map: Same signature as Repository.get_parent_map.
1709
:param get_lines: Should return lines for specified key or None if
1712
super(VirtualVersionedFiles, self).__init__()
1713
self._get_parent_map = get_parent_map
1714
self._get_lines = get_lines
1716
def check(self, progressbar=None):
1717
"""See VersionedFiles.check.
1719
:note: Always returns True for VirtualVersionedFiles.
1723
def add_mpdiffs(self, records):
1724
"""See VersionedFiles.mpdiffs.
1726
:note: Not implemented for VirtualVersionedFiles.
1728
raise NotImplementedError(self.add_mpdiffs)
1730
def get_parent_map(self, keys):
1731
"""See VersionedFiles.get_parent_map."""
1732
return dict([((k,), tuple([(p,) for p in v]))
1733
for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
1735
def get_sha1s(self, keys):
1736
"""See VersionedFiles.get_sha1s."""
1739
lines = self._get_lines(k)
1740
if lines is not None:
1741
if not isinstance(lines, list):
1742
raise AssertionError
1743
ret[(k,)] = osutils.sha_strings(lines)
1746
def get_record_stream(self, keys, ordering, include_delta_closure):
1747
"""See VersionedFiles.get_record_stream."""
1748
for (k,) in list(keys):
1749
lines = self._get_lines(k)
1750
if lines is not None:
1751
if not isinstance(lines, list):
1752
raise AssertionError
1753
yield ChunkedContentFactory((k,), None,
1754
sha1=osutils.sha_strings(lines),
1757
yield AbsentContentFactory((k,))
1759
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1760
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1761
for i, (key,) in enumerate(keys):
1763
pb.update("Finding changed lines", i, len(keys))
1764
for l in self._get_lines(key):
1768
class NoDupeAddLinesDecorator(object):
1769
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1773
def __init__(self, store):
1776
def add_lines(self, key, parents, lines, parent_texts=None,
1777
left_matching_blocks=None, nostore_sha=None, random_id=False,
1778
check_content=True):
1779
"""See VersionedFiles.add_lines.
1781
This implementation may return None as the third element of the return
1782
value when the original store wouldn't.
1785
raise NotImplementedError(
1786
"NoDupeAddLinesDecorator.add_lines does not implement the "
1787
"nostore_sha behaviour.")
1789
sha1 = osutils.sha_strings(lines)
1790
key = ("sha1:" + sha1,)
1793
if key in self._store.get_parent_map([key]):
1794
# This key has already been inserted, so don't do it again.
1796
sha1 = osutils.sha_strings(lines)
1797
return sha1, sum(map(len, lines)), None
1798
return self._store.add_lines(key, parents, lines,
1799
parent_texts=parent_texts,
1800
left_matching_blocks=left_matching_blocks,
1801
nostore_sha=nostore_sha, random_id=random_id,
1802
check_content=check_content)
1804
def __getattr__(self, name):
1805
return getattr(self._store, name)
1808
def network_bytes_to_kind_and_offset(network_bytes):
1809
"""Strip of a record kind from the front of network_bytes.
1811
:param network_bytes: The bytes of a record.
1812
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1814
line_end = network_bytes.find('\n')
1815
storage_kind = network_bytes[:line_end]
1816
return storage_kind, line_end + 1
1819
class NetworkRecordStream(object):
1820
"""A record_stream which reconstitures a serialised stream."""
1822
def __init__(self, bytes_iterator):
1823
"""Create a NetworkRecordStream.
1825
:param bytes_iterator: An iterator of bytes. Each item in this
1826
iterator should have been obtained from a record_streams'
1827
record.get_bytes_as(record.storage_kind) call.
1829
self._bytes_iterator = bytes_iterator
1830
self._kind_factory = {
1831
'fulltext': fulltext_network_to_record,
1832
'groupcompress-block': groupcompress.network_block_to_records,
1833
'knit-ft-gz': knit.knit_network_to_record,
1834
'knit-delta-gz': knit.knit_network_to_record,
1835
'knit-annotated-ft-gz': knit.knit_network_to_record,
1836
'knit-annotated-delta-gz': knit.knit_network_to_record,
1837
'knit-delta-closure': knit.knit_delta_closure_to_records,
1843
:return: An iterator as per VersionedFiles.get_record_stream().
1845
for bytes in self._bytes_iterator:
1846
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1847
for record in self._kind_factory[storage_kind](
1848
storage_kind, bytes, line_end):
1852
def fulltext_network_to_record(kind, bytes, line_end):
1853
"""Convert a network fulltext record to record."""
1854
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1855
record_meta = bytes[line_end+4:line_end+4+meta_len]
1856
key, parents = bencode.bdecode_as_tuple(record_meta)
1857
if parents == 'nil':
1859
fulltext = bytes[line_end+4+meta_len:]
1860
return [FulltextContentFactory(key, parents, None, fulltext)]
1863
def _length_prefix(bytes):
1864
return struct.pack('!L', len(bytes))
1867
def record_to_fulltext_bytes(record):
1868
if record.parents is None:
1871
parents = record.parents
1872
record_meta = bencode.bencode((record.key, parents))
1873
record_content = record.get_bytes_as('fulltext')
1874
return "fulltext\n%s%s%s" % (
1875
_length_prefix(record_meta), record_meta, record_content)
1878
def sort_groupcompress(parent_map):
1879
"""Sort and group the keys in parent_map into groupcompress order.
1881
groupcompress is defined (currently) as reverse-topological order, grouped
1884
:return: A sorted-list of keys
1886
# gc-optimal ordering is approximately reverse topological,
1887
# properly grouped by file-id.
1889
for item in parent_map.iteritems():
1891
if isinstance(key, str) or len(key) == 1:
1896
per_prefix_map[prefix].append(item)
1898
per_prefix_map[prefix] = [item]
1901
for prefix in sorted(per_prefix_map):
1902
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1906
class _KeyRefs(object):
1908
def __init__(self, track_new_keys=False):
1909
# dict mapping 'key' to 'set of keys referring to that key'
1912
# set remembering all new keys
1913
self.new_keys = set()
1915
self.new_keys = None
1921
self.new_keys.clear()
1923
def add_references(self, key, refs):
1924
# Record the new references
1925
for referenced in refs:
1927
needed_by = self.refs[referenced]
1929
needed_by = self.refs[referenced] = set()
1931
# Discard references satisfied by the new key
1934
def get_new_keys(self):
1935
return self.new_keys
1937
def get_unsatisfied_refs(self):
1938
return self.refs.iterkeys()
1940
def _satisfy_refs_for_key(self, key):
1944
# No keys depended on this key. That's ok.
1947
def add_key(self, key):
1948
# satisfy refs for key, and remember that we've seen this key.
1949
self._satisfy_refs_for_key(key)
1950
if self.new_keys is not None:
1951
self.new_keys.add(key)
1953
def satisfy_refs_for_keys(self, keys):
1955
self._satisfy_refs_for_key(key)
1957
def get_referrers(self):
1959
for referrers in self.refs.itervalues():
1960
result.update(referrers)