1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
"""Versioned text file storage api."""
23
from cStringIO import StringIO
26
from zlib import adler32
28
from bzrlib.lazy_import import lazy_import
29
lazy_import(globals(), """
43
from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider
44
from bzrlib.transport.memory import MemoryTransport
46
from bzrlib.inter import InterObject
47
from bzrlib.registry import Registry
48
from bzrlib.symbol_versioning import *
49
from bzrlib.textmerge import TextMerge
50
from bzrlib.util import bencode
53
adapter_registry = Registry()
54
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
55
'DeltaPlainToFullText')
56
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
58
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
59
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
60
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
61
'bzrlib.knit', 'DeltaAnnotatedToFullText')
62
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
63
'bzrlib.knit', 'FTAnnotatedToUnannotated')
64
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
65
'bzrlib.knit', 'FTAnnotatedToFullText')
66
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
67
# 'bzrlib.knit', 'FTAnnotatedToChunked')
70
class ContentFactory(object):
71
"""Abstract interface for insertion and retrieval from a VersionedFile.
73
:ivar sha1: None, or the sha1 of the content fulltext.
74
:ivar storage_kind: The native storage kind of this factory. One of
75
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
76
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
77
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
78
:ivar key: The key of this content. Each key is a tuple with a single
80
:ivar parents: A tuple of parent keys for self.key. If the object has
81
no parent information, None (as opposed to () for an empty list of
86
"""Create a ContentFactory."""
88
self.storage_kind = None
93
class ChunkedContentFactory(ContentFactory):
94
"""Static data content factory.
96
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
97
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
98
satisfies this, as does a list of lines.
100
:ivar sha1: None, or the sha1 of the content fulltext.
101
:ivar storage_kind: The native storage kind of this factory. Always
103
:ivar key: The key of this content. Each key is a tuple with a single
105
:ivar parents: A tuple of parent keys for self.key. If the object has
106
no parent information, None (as opposed to () for an empty list of
110
def __init__(self, key, parents, sha1, chunks):
111
"""Create a ContentFactory."""
113
self.storage_kind = 'chunked'
115
self.parents = parents
116
self._chunks = chunks
118
def get_bytes_as(self, storage_kind):
119
if storage_kind == 'chunked':
121
elif storage_kind == 'fulltext':
122
return ''.join(self._chunks)
123
raise errors.UnavailableRepresentation(self.key, storage_kind,
127
class FulltextContentFactory(ContentFactory):
128
"""Static data content factory.
130
This takes a fulltext when created and just returns that during
131
get_bytes_as('fulltext').
133
:ivar sha1: None, or the sha1 of the content fulltext.
134
:ivar storage_kind: The native storage kind of this factory. Always
136
:ivar key: The key of this content. Each key is a tuple with a single
138
:ivar parents: A tuple of parent keys for self.key. If the object has
139
no parent information, None (as opposed to () for an empty list of
143
def __init__(self, key, parents, sha1, text):
144
"""Create a ContentFactory."""
146
self.storage_kind = 'fulltext'
148
self.parents = parents
151
def get_bytes_as(self, storage_kind):
152
if storage_kind == self.storage_kind:
154
elif storage_kind == 'chunked':
156
raise errors.UnavailableRepresentation(self.key, storage_kind,
160
class AbsentContentFactory(ContentFactory):
161
"""A placeholder content factory for unavailable texts.
164
:ivar storage_kind: 'absent'.
165
:ivar key: The key of this content. Each key is a tuple with a single
170
def __init__(self, key):
171
"""Create a ContentFactory."""
173
self.storage_kind = 'absent'
178
class AdapterFactory(ContentFactory):
179
"""A content factory to adapt between key prefix's."""
181
def __init__(self, key, parents, adapted):
182
"""Create an adapter factory instance."""
184
self.parents = parents
185
self._adapted = adapted
187
def __getattr__(self, attr):
188
"""Return a member from the adapted object."""
189
if attr in ('key', 'parents'):
190
return self.__dict__[attr]
192
return getattr(self._adapted, attr)
195
def filter_absent(record_stream):
196
"""Adapt a record stream to remove absent records."""
197
for record in record_stream:
198
if record.storage_kind != 'absent':
202
class VersionedFile(object):
203
"""Versioned text file storage.
205
A versioned file manages versions of line-based text files,
206
keeping track of the originating version for each line.
208
To clients the "lines" of the file are represented as a list of
209
strings. These strings will typically have terminal newline
210
characters, but this is not required. In particular files commonly
211
do not have a newline at the end of the file.
213
Texts are identified by a version-id string.
217
def check_not_reserved_id(version_id):
218
revision.check_not_reserved_id(version_id)
220
def copy_to(self, name, transport):
221
"""Copy this versioned file to name on transport."""
222
raise NotImplementedError(self.copy_to)
224
def get_record_stream(self, versions, ordering, include_delta_closure):
225
"""Get a stream of records for versions.
227
:param versions: The versions to include. Each version is a tuple
229
:param ordering: Either 'unordered' or 'topological'. A topologically
230
sorted stream has compression parents strictly before their
232
:param include_delta_closure: If True then the closure across any
233
compression parents will be included (in the data content of the
234
stream, not in the emitted records). This guarantees that
235
'fulltext' can be used successfully on every record.
236
:return: An iterator of ContentFactory objects, each of which is only
237
valid until the iterator is advanced.
239
raise NotImplementedError(self.get_record_stream)
241
def has_version(self, version_id):
242
"""Returns whether version is present."""
243
raise NotImplementedError(self.has_version)
245
def insert_record_stream(self, stream):
246
"""Insert a record stream into this versioned file.
248
:param stream: A stream of records to insert.
250
:seealso VersionedFile.get_record_stream:
252
raise NotImplementedError
254
def add_lines(self, version_id, parents, lines, parent_texts=None,
255
left_matching_blocks=None, nostore_sha=None, random_id=False,
257
"""Add a single text on top of the versioned file.
259
Must raise RevisionAlreadyPresent if the new version is
260
already present in file history.
262
Must raise RevisionNotPresent if any of the given parents are
263
not present in file history.
265
:param lines: A list of lines. Each line must be a bytestring. And all
266
of them except the last must be terminated with \n and contain no
267
other \n's. The last line may either contain no \n's or a single
268
terminated \n. If the lines list does meet this constraint the add
269
routine may error or may succeed - but you will be unable to read
270
the data back accurately. (Checking the lines have been split
271
correctly is expensive and extremely unlikely to catch bugs so it
272
is not done at runtime unless check_content is True.)
273
:param parent_texts: An optional dictionary containing the opaque
274
representations of some or all of the parents of version_id to
275
allow delta optimisations. VERY IMPORTANT: the texts must be those
276
returned by add_lines or data corruption can be caused.
277
:param left_matching_blocks: a hint about which areas are common
278
between the text and its left-hand-parent. The format is
279
the SequenceMatcher.get_matching_blocks format.
280
:param nostore_sha: Raise ExistingContent and do not add the lines to
281
the versioned file if the digest of the lines matches this.
282
:param random_id: If True a random id has been selected rather than
283
an id determined by some deterministic process such as a converter
284
from a foreign VCS. When True the backend may choose not to check
285
for uniqueness of the resulting key within the versioned file, so
286
this should only be done when the result is expected to be unique
288
:param check_content: If True, the lines supplied are verified to be
289
bytestrings that are correctly formed lines.
290
:return: The text sha1, the number of bytes in the text, and an opaque
291
representation of the inserted version which can be provided
292
back to future add_lines calls in the parent_texts dictionary.
294
self._check_write_ok()
295
return self._add_lines(version_id, parents, lines, parent_texts,
296
left_matching_blocks, nostore_sha, random_id, check_content)
298
def _add_lines(self, version_id, parents, lines, parent_texts,
299
left_matching_blocks, nostore_sha, random_id, check_content):
300
"""Helper to do the class specific add_lines."""
301
raise NotImplementedError(self.add_lines)
303
def add_lines_with_ghosts(self, version_id, parents, lines,
304
parent_texts=None, nostore_sha=None, random_id=False,
305
check_content=True, left_matching_blocks=None):
306
"""Add lines to the versioned file, allowing ghosts to be present.
308
This takes the same parameters as add_lines and returns the same.
310
self._check_write_ok()
311
return self._add_lines_with_ghosts(version_id, parents, lines,
312
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
314
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
315
nostore_sha, random_id, check_content, left_matching_blocks):
316
"""Helper to do class specific add_lines_with_ghosts."""
317
raise NotImplementedError(self.add_lines_with_ghosts)
319
def check(self, progress_bar=None):
320
"""Check the versioned file for integrity."""
321
raise NotImplementedError(self.check)
323
def _check_lines_not_unicode(self, lines):
324
"""Check that lines being added to a versioned file are not unicode."""
326
if line.__class__ is not str:
327
raise errors.BzrBadParameterUnicode("lines")
329
def _check_lines_are_lines(self, lines):
330
"""Check that the lines really are full lines without inline EOL."""
332
if '\n' in line[:-1]:
333
raise errors.BzrBadParameterContainsNewline("lines")
335
def get_format_signature(self):
336
"""Get a text description of the data encoding in this file.
340
raise NotImplementedError(self.get_format_signature)
342
def make_mpdiffs(self, version_ids):
343
"""Create multiparent diffs for specified versions."""
344
knit_versions = set()
345
knit_versions.update(version_ids)
346
parent_map = self.get_parent_map(version_ids)
347
for version_id in version_ids:
349
knit_versions.update(parent_map[version_id])
351
raise errors.RevisionNotPresent(version_id, self)
352
# We need to filter out ghosts, because we can't diff against them.
353
knit_versions = set(self.get_parent_map(knit_versions).keys())
354
lines = dict(zip(knit_versions,
355
self._get_lf_split_line_list(knit_versions)))
357
for version_id in version_ids:
358
target = lines[version_id]
360
parents = [lines[p] for p in parent_map[version_id] if p in
363
# I don't know how this could ever trigger.
364
# parent_map[version_id] was already triggered in the previous
365
# for loop, and lines[p] has the 'if p in knit_versions' check,
366
# so we again won't have a KeyError.
367
raise errors.RevisionNotPresent(version_id, self)
369
left_parent_blocks = self._extract_blocks(version_id,
372
left_parent_blocks = None
373
diffs.append(multiparent.MultiParent.from_lines(target, parents,
377
def _extract_blocks(self, version_id, source, target):
380
def add_mpdiffs(self, records):
381
"""Add mpdiffs to this VersionedFile.
383
Records should be iterables of version, parents, expected_sha1,
384
mpdiff. mpdiff should be a MultiParent instance.
386
# Does this need to call self._check_write_ok()? (IanC 20070919)
388
mpvf = multiparent.MultiMemoryVersionedFile()
390
for version, parent_ids, expected_sha1, mpdiff in records:
391
versions.append(version)
392
mpvf.add_diff(mpdiff, version, parent_ids)
393
needed_parents = set()
394
for version, parent_ids, expected_sha1, mpdiff in records:
395
needed_parents.update(p for p in parent_ids
396
if not mpvf.has_version(p))
397
present_parents = set(self.get_parent_map(needed_parents).keys())
398
for parent_id, lines in zip(present_parents,
399
self._get_lf_split_line_list(present_parents)):
400
mpvf.add_version(lines, parent_id, [])
401
for (version, parent_ids, expected_sha1, mpdiff), lines in\
402
zip(records, mpvf.get_line_list(versions)):
403
if len(parent_ids) == 1:
404
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
405
mpvf.get_diff(parent_ids[0]).num_lines()))
407
left_matching_blocks = None
409
_, _, version_text = self.add_lines_with_ghosts(version,
410
parent_ids, lines, vf_parents,
411
left_matching_blocks=left_matching_blocks)
412
except NotImplementedError:
413
# The vf can't handle ghosts, so add lines normally, which will
414
# (reasonably) fail if there are ghosts in the data.
415
_, _, version_text = self.add_lines(version,
416
parent_ids, lines, vf_parents,
417
left_matching_blocks=left_matching_blocks)
418
vf_parents[version] = version_text
419
sha1s = self.get_sha1s(versions)
420
for version, parent_ids, expected_sha1, mpdiff in records:
421
if expected_sha1 != sha1s[version]:
422
raise errors.VersionedFileInvalidChecksum(version)
424
def get_text(self, version_id):
425
"""Return version contents as a text string.
427
Raises RevisionNotPresent if version is not present in
430
return ''.join(self.get_lines(version_id))
431
get_string = get_text
433
def get_texts(self, version_ids):
434
"""Return the texts of listed versions as a list of strings.
436
Raises RevisionNotPresent if version is not present in
439
return [''.join(self.get_lines(v)) for v in version_ids]
441
def get_lines(self, version_id):
442
"""Return version contents as a sequence of lines.
444
Raises RevisionNotPresent if version is not present in
447
raise NotImplementedError(self.get_lines)
449
def _get_lf_split_line_list(self, version_ids):
450
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
452
def get_ancestry(self, version_ids, topo_sorted=True):
453
"""Return a list of all ancestors of given version(s). This
454
will not include the null revision.
456
This list will not be topologically sorted if topo_sorted=False is
459
Must raise RevisionNotPresent if any of the given versions are
460
not present in file history."""
461
if isinstance(version_ids, basestring):
462
version_ids = [version_ids]
463
raise NotImplementedError(self.get_ancestry)
465
def get_ancestry_with_ghosts(self, version_ids):
466
"""Return a list of all ancestors of given version(s). This
467
will not include the null revision.
469
Must raise RevisionNotPresent if any of the given versions are
470
not present in file history.
472
Ghosts that are known about will be included in ancestry list,
473
but are not explicitly marked.
475
raise NotImplementedError(self.get_ancestry_with_ghosts)
477
def get_parent_map(self, version_ids):
478
"""Get a map of the parents of version_ids.
480
:param version_ids: The version ids to look up parents for.
481
:return: A mapping from version id to parents.
483
raise NotImplementedError(self.get_parent_map)
485
def get_parents_with_ghosts(self, version_id):
486
"""Return version names for parents of version_id.
488
Will raise RevisionNotPresent if version_id is not present
491
Ghosts that are known about will be included in the parent list,
492
but are not explicitly marked.
495
return list(self.get_parent_map([version_id])[version_id])
497
raise errors.RevisionNotPresent(version_id, self)
499
def annotate(self, version_id):
500
"""Return a list of (version-id, line) tuples for version_id.
502
:raise RevisionNotPresent: If the given version is
503
not present in file history.
505
raise NotImplementedError(self.annotate)
507
def iter_lines_added_or_present_in_versions(self, version_ids=None,
509
"""Iterate over the lines in the versioned file from version_ids.
511
This may return lines from other versions. Each item the returned
512
iterator yields is a tuple of a line and a text version that that line
513
is present in (not introduced in).
515
Ordering of results is in whatever order is most suitable for the
516
underlying storage format.
518
If a progress bar is supplied, it may be used to indicate progress.
519
The caller is responsible for cleaning up progress bars (because this
522
NOTES: Lines are normalised: they will all have \n terminators.
523
Lines are returned in arbitrary order.
525
:return: An iterator over (line, version_id).
527
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
529
def plan_merge(self, ver_a, ver_b):
530
"""Return pseudo-annotation indicating how the two versions merge.
532
This is computed between versions a and b and their common
535
Weave lines present in none of them are skipped entirely.
538
killed-base Dead in base revision
539
killed-both Killed in each revision
542
unchanged Alive in both a and b (possibly created in both)
545
ghost-a Killed in a, unborn in b
546
ghost-b Killed in b, unborn in a
547
irrelevant Not in either revision
549
raise NotImplementedError(VersionedFile.plan_merge)
551
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
552
b_marker=TextMerge.B_MARKER):
553
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
556
class RecordingVersionedFilesDecorator(object):
557
"""A minimal versioned files that records calls made on it.
559
Only enough methods have been added to support tests using it to date.
561
:ivar calls: A list of the calls made; can be reset at any time by
565
def __init__(self, backing_vf):
566
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
568
:param backing_vf: The versioned file to answer all methods.
570
self._backing_vf = backing_vf
573
def add_lines(self, key, parents, lines, parent_texts=None,
574
left_matching_blocks=None, nostore_sha=None, random_id=False,
576
self.calls.append(("add_lines", key, parents, lines, parent_texts,
577
left_matching_blocks, nostore_sha, random_id, check_content))
578
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
579
left_matching_blocks, nostore_sha, random_id, check_content)
582
self._backing_vf.check()
584
def get_parent_map(self, keys):
585
self.calls.append(("get_parent_map", copy(keys)))
586
return self._backing_vf.get_parent_map(keys)
588
def get_record_stream(self, keys, sort_order, include_delta_closure):
589
self.calls.append(("get_record_stream", list(keys), sort_order,
590
include_delta_closure))
591
return self._backing_vf.get_record_stream(keys, sort_order,
592
include_delta_closure)
594
def get_sha1s(self, keys):
595
self.calls.append(("get_sha1s", copy(keys)))
596
return self._backing_vf.get_sha1s(keys)
598
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
599
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
600
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
603
self.calls.append(("keys",))
604
return self._backing_vf.keys()
607
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
608
"""A VF that records calls, and returns keys in specific order.
610
:ivar calls: A list of the calls made; can be reset at any time by
614
def __init__(self, backing_vf, key_priority):
615
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
617
:param backing_vf: The versioned file to answer all methods.
618
:param key_priority: A dictionary defining what order keys should be
619
returned from an 'unordered' get_record_stream request.
620
Keys with lower priority are returned first, keys not present in
621
the map get an implicit priority of 0, and are returned in
622
lexicographical order.
624
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
625
self._key_priority = key_priority
627
def get_record_stream(self, keys, sort_order, include_delta_closure):
628
self.calls.append(("get_record_stream", list(keys), sort_order,
629
include_delta_closure))
630
if sort_order == 'unordered':
632
return (self._key_priority.get(key, 0), key)
633
# Use a defined order by asking for the keys one-by-one from the
635
for key in sorted(keys, key=sort_key):
636
for record in self._backing_vf.get_record_stream([key],
637
'unordered', include_delta_closure):
640
for record in self._backing_vf.get_record_stream(keys, sort_order,
641
include_delta_closure):
645
class KeyMapper(object):
646
"""KeyMappers map between keys and underlying partitioned storage."""
649
"""Map key to an underlying storage identifier.
651
:param key: A key tuple e.g. ('file-id', 'revision-id').
652
:return: An underlying storage identifier, specific to the partitioning
655
raise NotImplementedError(self.map)
657
def unmap(self, partition_id):
658
"""Map a partitioned storage id back to a key prefix.
660
:param partition_id: The underlying partition id.
661
:return: As much of a key (or prefix) as is derivable from the partition
664
raise NotImplementedError(self.unmap)
667
class ConstantMapper(KeyMapper):
668
"""A key mapper that maps to a constant result."""
670
def __init__(self, result):
671
"""Create a ConstantMapper which will return result for all maps."""
672
self._result = result
675
"""See KeyMapper.map()."""
679
class URLEscapeMapper(KeyMapper):
680
"""Base class for use with transport backed storage.
682
This provides a map and unmap wrapper that respectively url escape and
683
unescape their outputs and inputs.
687
"""See KeyMapper.map()."""
688
return urllib.quote(self._map(key))
690
def unmap(self, partition_id):
691
"""See KeyMapper.unmap()."""
692
return self._unmap(urllib.unquote(partition_id))
695
class PrefixMapper(URLEscapeMapper):
696
"""A key mapper that extracts the first component of a key.
698
This mapper is for use with a transport based backend.
702
"""See KeyMapper.map()."""
705
def _unmap(self, partition_id):
706
"""See KeyMapper.unmap()."""
707
return (partition_id,)
710
class HashPrefixMapper(URLEscapeMapper):
711
"""A key mapper that combines the first component of a key with a hash.
713
This mapper is for use with a transport based backend.
717
"""See KeyMapper.map()."""
718
prefix = self._escape(key[0])
719
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
721
def _escape(self, prefix):
722
"""No escaping needed here."""
725
def _unmap(self, partition_id):
726
"""See KeyMapper.unmap()."""
727
return (self._unescape(osutils.basename(partition_id)),)
729
def _unescape(self, basename):
730
"""No unescaping needed for HashPrefixMapper."""
734
class HashEscapedPrefixMapper(HashPrefixMapper):
735
"""Combines the escaped first component of a key with a hash.
737
This mapper is for use with a transport based backend.
740
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
742
def _escape(self, prefix):
743
"""Turn a key element into a filesystem safe string.
745
This is similar to a plain urllib.quote, except
746
it uses specific safe characters, so that it doesn't
747
have to translate a lot of valid file ids.
749
# @ does not get escaped. This is because it is a valid
750
# filesystem character we use all the time, and it looks
751
# a lot better than seeing %40 all the time.
752
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
756
def _unescape(self, basename):
757
"""Escaped names are easily unescaped by urlutils."""
758
return urllib.unquote(basename)
761
def make_versioned_files_factory(versioned_file_factory, mapper):
762
"""Create a ThunkedVersionedFiles factory.
764
This will create a callable which when called creates a
765
ThunkedVersionedFiles on a transport, using mapper to access individual
766
versioned files, and versioned_file_factory to create each individual file.
768
def factory(transport):
769
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
774
class VersionedFiles(object):
775
"""Storage for many versioned files.
777
This object allows a single keyspace for accessing the history graph and
778
contents of named bytestrings.
780
Currently no implementation allows the graph of different key prefixes to
781
intersect, but the API does allow such implementations in the future.
783
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
784
may have a different length key-size, but that size will be constant for
785
all texts added to or retrieved from it. For instance, bzrlib uses
786
instances with a key-size of 2 for storing user files in a repository, with
787
the first element the fileid, and the second the version of that file.
789
The use of tuples allows a single code base to support several different
790
uses with only the mapping logic changing from instance to instance.
793
def add_lines(self, key, parents, lines, parent_texts=None,
794
left_matching_blocks=None, nostore_sha=None, random_id=False,
796
"""Add a text to the store.
798
:param key: The key tuple of the text to add. If the last element is
799
None, a CHK string will be generated during the addition.
800
:param parents: The parents key tuples of the text to add.
801
:param lines: A list of lines. Each line must be a bytestring. And all
802
of them except the last must be terminated with \n and contain no
803
other \n's. The last line may either contain no \n's or a single
804
terminating \n. If the lines list does meet this constraint the add
805
routine may error or may succeed - but you will be unable to read
806
the data back accurately. (Checking the lines have been split
807
correctly is expensive and extremely unlikely to catch bugs so it
808
is not done at runtime unless check_content is True.)
809
:param parent_texts: An optional dictionary containing the opaque
810
representations of some or all of the parents of version_id to
811
allow delta optimisations. VERY IMPORTANT: the texts must be those
812
returned by add_lines or data corruption can be caused.
813
:param left_matching_blocks: a hint about which areas are common
814
between the text and its left-hand-parent. The format is
815
the SequenceMatcher.get_matching_blocks format.
816
:param nostore_sha: Raise ExistingContent and do not add the lines to
817
the versioned file if the digest of the lines matches this.
818
:param random_id: If True a random id has been selected rather than
819
an id determined by some deterministic process such as a converter
820
from a foreign VCS. When True the backend may choose not to check
821
for uniqueness of the resulting key within the versioned file, so
822
this should only be done when the result is expected to be unique
824
:param check_content: If True, the lines supplied are verified to be
825
bytestrings that are correctly formed lines.
826
:return: The text sha1, the number of bytes in the text, and an opaque
827
representation of the inserted version which can be provided
828
back to future add_lines calls in the parent_texts dictionary.
830
raise NotImplementedError(self.add_lines)
832
def add_text(self, key, parents, text, parent_texts=None,
833
nostore_sha=None, random_id=False, check_content=True):
834
return self.add_lines(key, parents, osutils.split_lines(text),
835
parent_texts=parent_texts,
836
nostore_sha=nostore_sha,
838
check_content=check_content)
840
def add_mpdiffs(self, records):
841
"""Add mpdiffs to this VersionedFile.
843
Records should be iterables of version, parents, expected_sha1,
844
mpdiff. mpdiff should be a MultiParent instance.
847
mpvf = multiparent.MultiMemoryVersionedFile()
849
for version, parent_ids, expected_sha1, mpdiff in records:
850
versions.append(version)
851
mpvf.add_diff(mpdiff, version, parent_ids)
852
needed_parents = set()
853
for version, parent_ids, expected_sha1, mpdiff in records:
854
needed_parents.update(p for p in parent_ids
855
if not mpvf.has_version(p))
856
# It seems likely that adding all the present parents as fulltexts can
857
# easily exhaust memory.
858
chunks_to_lines = osutils.chunks_to_lines
859
for record in self.get_record_stream(needed_parents, 'unordered',
861
if record.storage_kind == 'absent':
863
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
865
for (key, parent_keys, expected_sha1, mpdiff), lines in\
866
zip(records, mpvf.get_line_list(versions)):
867
if len(parent_keys) == 1:
868
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
869
mpvf.get_diff(parent_keys[0]).num_lines()))
871
left_matching_blocks = None
872
version_sha1, _, version_text = self.add_lines(key,
873
parent_keys, lines, vf_parents,
874
left_matching_blocks=left_matching_blocks)
875
if version_sha1 != expected_sha1:
876
raise errors.VersionedFileInvalidChecksum(version)
877
vf_parents[key] = version_text
879
def annotate(self, key):
880
"""Return a list of (version-key, line) tuples for the text of key.
882
:raise RevisionNotPresent: If the key is not present.
884
raise NotImplementedError(self.annotate)
886
def check(self, progress_bar=None):
887
"""Check this object for integrity."""
888
raise NotImplementedError(self.check)
891
def check_not_reserved_id(version_id):
892
revision.check_not_reserved_id(version_id)
894
def _check_lines_not_unicode(self, lines):
895
"""Check that lines being added to a versioned file are not unicode."""
897
if line.__class__ is not str:
898
raise errors.BzrBadParameterUnicode("lines")
900
def _check_lines_are_lines(self, lines):
901
"""Check that the lines really are full lines without inline EOL."""
903
if '\n' in line[:-1]:
904
raise errors.BzrBadParameterContainsNewline("lines")
906
def get_parent_map(self, keys):
907
"""Get a map of the parents of keys.
909
:param keys: The keys to look up parents for.
910
:return: A mapping from keys to parents. Absent keys are absent from
913
raise NotImplementedError(self.get_parent_map)
915
def get_record_stream(self, keys, ordering, include_delta_closure):
916
"""Get a stream of records for keys.
918
:param keys: The keys to include.
919
:param ordering: Either 'unordered' or 'topological'. A topologically
920
sorted stream has compression parents strictly before their
922
:param include_delta_closure: If True then the closure across any
923
compression parents will be included (in the opaque data).
924
:return: An iterator of ContentFactory objects, each of which is only
925
valid until the iterator is advanced.
927
raise NotImplementedError(self.get_record_stream)
929
def get_sha1s(self, keys):
930
"""Get the sha1's of the texts for the given keys.
932
:param keys: The names of the keys to lookup
933
:return: a dict from key to sha1 digest. Keys of texts which are not
934
present in the store are not present in the returned
937
raise NotImplementedError(self.get_sha1s)
939
has_key = index._has_key_from_parent_map
941
def get_missing_compression_parent_keys(self):
942
"""Return an iterable of keys of missing compression parents.
944
Check this after calling insert_record_stream to find out if there are
945
any missing compression parents. If there are, the records that
946
depend on them are not able to be inserted safely. The precise
947
behaviour depends on the concrete VersionedFiles class in use.
949
Classes that do not support this will raise NotImplementedError.
951
raise NotImplementedError(self.get_missing_compression_parent_keys)
953
def insert_record_stream(self, stream):
954
"""Insert a record stream into this container.
956
:param stream: A stream of records to insert.
958
:seealso VersionedFile.get_record_stream:
960
raise NotImplementedError
962
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
963
"""Iterate over the lines in the versioned files from keys.
965
This may return lines from other keys. Each item the returned
966
iterator yields is a tuple of a line and a text version that that line
967
is present in (not introduced in).
969
Ordering of results is in whatever order is most suitable for the
970
underlying storage format.
972
If a progress bar is supplied, it may be used to indicate progress.
973
The caller is responsible for cleaning up progress bars (because this
977
* Lines are normalised by the underlying store: they will all have \n
979
* Lines are returned in arbitrary order.
981
:return: An iterator over (line, key).
983
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
986
"""Return a iterable of the keys for all the contained texts."""
987
raise NotImplementedError(self.keys)
989
def make_mpdiffs(self, keys):
990
"""Create multiparent diffs for specified keys."""
991
keys_order = tuple(keys)
992
keys = frozenset(keys)
993
knit_keys = set(keys)
994
parent_map = self.get_parent_map(keys)
995
for parent_keys in parent_map.itervalues():
997
knit_keys.update(parent_keys)
998
missing_keys = keys - set(parent_map)
1000
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1001
# We need to filter out ghosts, because we can't diff against them.
1002
maybe_ghosts = knit_keys - keys
1003
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1004
knit_keys.difference_update(ghosts)
1006
chunks_to_lines = osutils.chunks_to_lines
1007
for record in self.get_record_stream(knit_keys, 'topological', True):
1008
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1009
# line_block_dict = {}
1010
# for parent, blocks in record.extract_line_blocks():
1011
# line_blocks[parent] = blocks
1012
# line_blocks[record.key] = line_block_dict
1014
for key in keys_order:
1016
parents = parent_map[key] or []
1017
# Note that filtering knit_keys can lead to a parent difference
1018
# between the creation and the application of the mpdiff.
1019
parent_lines = [lines[p] for p in parents if p in knit_keys]
1020
if len(parent_lines) > 0:
1021
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1024
left_parent_blocks = None
1025
diffs.append(multiparent.MultiParent.from_lines(target,
1026
parent_lines, left_parent_blocks))
1029
missing_keys = index._missing_keys_from_parent_map
1031
def _extract_blocks(self, version_id, source, target):
1035
class ThunkedVersionedFiles(VersionedFiles):
1036
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1038
This object allows a single keyspace for accessing the history graph and
1039
contents of named bytestrings.
1041
Currently no implementation allows the graph of different key prefixes to
1042
intersect, but the API does allow such implementations in the future.
1045
def __init__(self, transport, file_factory, mapper, is_locked):
1046
"""Create a ThunkedVersionedFiles."""
1047
self._transport = transport
1048
self._file_factory = file_factory
1049
self._mapper = mapper
1050
self._is_locked = is_locked
1052
def add_lines(self, key, parents, lines, parent_texts=None,
1053
left_matching_blocks=None, nostore_sha=None, random_id=False,
1054
check_content=True):
1055
"""See VersionedFiles.add_lines()."""
1056
path = self._mapper.map(key)
1057
version_id = key[-1]
1058
parents = [parent[-1] for parent in parents]
1059
vf = self._get_vf(path)
1062
return vf.add_lines_with_ghosts(version_id, parents, lines,
1063
parent_texts=parent_texts,
1064
left_matching_blocks=left_matching_blocks,
1065
nostore_sha=nostore_sha, random_id=random_id,
1066
check_content=check_content)
1067
except NotImplementedError:
1068
return vf.add_lines(version_id, parents, lines,
1069
parent_texts=parent_texts,
1070
left_matching_blocks=left_matching_blocks,
1071
nostore_sha=nostore_sha, random_id=random_id,
1072
check_content=check_content)
1073
except errors.NoSuchFile:
1074
# parent directory may be missing, try again.
1075
self._transport.mkdir(osutils.dirname(path))
1077
return vf.add_lines_with_ghosts(version_id, parents, lines,
1078
parent_texts=parent_texts,
1079
left_matching_blocks=left_matching_blocks,
1080
nostore_sha=nostore_sha, random_id=random_id,
1081
check_content=check_content)
1082
except NotImplementedError:
1083
return vf.add_lines(version_id, parents, lines,
1084
parent_texts=parent_texts,
1085
left_matching_blocks=left_matching_blocks,
1086
nostore_sha=nostore_sha, random_id=random_id,
1087
check_content=check_content)
1089
def annotate(self, key):
1090
"""Return a list of (version-key, line) tuples for the text of key.
1092
:raise RevisionNotPresent: If the key is not present.
1095
path = self._mapper.map(prefix)
1096
vf = self._get_vf(path)
1097
origins = vf.annotate(key[-1])
1099
for origin, line in origins:
1100
result.append((prefix + (origin,), line))
1103
def check(self, progress_bar=None):
1104
"""See VersionedFiles.check()."""
1105
for prefix, vf in self._iter_all_components():
1108
def get_parent_map(self, keys):
1109
"""Get a map of the parents of keys.
1111
:param keys: The keys to look up parents for.
1112
:return: A mapping from keys to parents. Absent keys are absent from
1115
prefixes = self._partition_keys(keys)
1117
for prefix, suffixes in prefixes.items():
1118
path = self._mapper.map(prefix)
1119
vf = self._get_vf(path)
1120
parent_map = vf.get_parent_map(suffixes)
1121
for key, parents in parent_map.items():
1122
result[prefix + (key,)] = tuple(
1123
prefix + (parent,) for parent in parents)
1126
def _get_vf(self, path):
1127
if not self._is_locked():
1128
raise errors.ObjectNotLocked(self)
1129
return self._file_factory(path, self._transport, create=True,
1130
get_scope=lambda:None)
1132
def _partition_keys(self, keys):
1133
"""Turn keys into a dict of prefix:suffix_list."""
1136
prefix_keys = result.setdefault(key[:-1], [])
1137
prefix_keys.append(key[-1])
1140
def _get_all_prefixes(self):
1141
# Identify all key prefixes.
1142
# XXX: A bit hacky, needs polish.
1143
if type(self._mapper) == ConstantMapper:
1144
paths = [self._mapper.map(())]
1148
for quoted_relpath in self._transport.iter_files_recursive():
1149
path, ext = os.path.splitext(quoted_relpath)
1151
paths = list(relpaths)
1152
prefixes = [self._mapper.unmap(path) for path in paths]
1153
return zip(paths, prefixes)
1155
def get_record_stream(self, keys, ordering, include_delta_closure):
1156
"""See VersionedFiles.get_record_stream()."""
1157
# Ordering will be taken care of by each partitioned store; group keys
1160
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1161
suffixes = [(suffix,) for suffix in suffixes]
1162
for record in vf.get_record_stream(suffixes, ordering,
1163
include_delta_closure):
1164
if record.parents is not None:
1165
record.parents = tuple(
1166
prefix + parent for parent in record.parents)
1167
record.key = prefix + record.key
1170
def _iter_keys_vf(self, keys):
1171
prefixes = self._partition_keys(keys)
1173
for prefix, suffixes in prefixes.items():
1174
path = self._mapper.map(prefix)
1175
vf = self._get_vf(path)
1176
yield prefix, suffixes, vf
1178
def get_sha1s(self, keys):
1179
"""See VersionedFiles.get_sha1s()."""
1181
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1182
vf_sha1s = vf.get_sha1s(suffixes)
1183
for suffix, sha1 in vf_sha1s.iteritems():
1184
sha1s[prefix + (suffix,)] = sha1
1187
def insert_record_stream(self, stream):
1188
"""Insert a record stream into this container.
1190
:param stream: A stream of records to insert.
1192
:seealso VersionedFile.get_record_stream:
1194
for record in stream:
1195
prefix = record.key[:-1]
1196
key = record.key[-1:]
1197
if record.parents is not None:
1198
parents = [parent[-1:] for parent in record.parents]
1201
thunk_record = AdapterFactory(key, parents, record)
1202
path = self._mapper.map(prefix)
1203
# Note that this parses the file many times; we can do better but
1204
# as this only impacts weaves in terms of performance, it is
1206
vf = self._get_vf(path)
1207
vf.insert_record_stream([thunk_record])
1209
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1210
"""Iterate over the lines in the versioned files from keys.
1212
This may return lines from other keys. Each item the returned
1213
iterator yields is a tuple of a line and a text version that that line
1214
is present in (not introduced in).
1216
Ordering of results is in whatever order is most suitable for the
1217
underlying storage format.
1219
If a progress bar is supplied, it may be used to indicate progress.
1220
The caller is responsible for cleaning up progress bars (because this
1224
* Lines are normalised by the underlying store: they will all have \n
1226
* Lines are returned in arbitrary order.
1228
:return: An iterator over (line, key).
1230
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1231
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1232
yield line, prefix + (version,)
1234
def _iter_all_components(self):
1235
for path, prefix in self._get_all_prefixes():
1236
yield prefix, self._get_vf(path)
1239
"""See VersionedFiles.keys()."""
1241
for prefix, vf in self._iter_all_components():
1242
for suffix in vf.versions():
1243
result.add(prefix + (suffix,))
1247
class _PlanMergeVersionedFile(VersionedFiles):
1248
"""A VersionedFile for uncommitted and committed texts.
1250
It is intended to allow merges to be planned with working tree texts.
1251
It implements only the small part of the VersionedFiles interface used by
1252
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1253
_PlanMergeVersionedFile itself.
1255
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1256
queried for missing texts.
1259
def __init__(self, file_id):
1260
"""Create a _PlanMergeVersionedFile.
1262
:param file_id: Used with _PlanMerge code which is not yet fully
1263
tuple-keyspace aware.
1265
self._file_id = file_id
1266
# fallback locations
1267
self.fallback_versionedfiles = []
1268
# Parents for locally held keys.
1270
# line data for locally held keys.
1272
# key lookup providers
1273
self._providers = [DictParentsProvider(self._parents)]
1275
def plan_merge(self, ver_a, ver_b, base=None):
1276
"""See VersionedFile.plan_merge"""
1277
from bzrlib.merge import _PlanMerge
1279
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1280
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1281
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1282
return _PlanMerge._subtract_plans(old_plan, new_plan)
1284
def plan_lca_merge(self, ver_a, ver_b, base=None):
1285
from bzrlib.merge import _PlanLCAMerge
1287
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1290
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1291
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1293
def add_lines(self, key, parents, lines):
1294
"""See VersionedFiles.add_lines
1296
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1297
are permitted. Only reserved ids are permitted.
1299
if type(key) is not tuple:
1300
raise TypeError(key)
1301
if not revision.is_reserved_id(key[-1]):
1302
raise ValueError('Only reserved ids may be used')
1304
raise ValueError('Parents may not be None')
1306
raise ValueError('Lines may not be None')
1307
self._parents[key] = tuple(parents)
1308
self._lines[key] = lines
1310
def get_record_stream(self, keys, ordering, include_delta_closure):
1313
if key in self._lines:
1314
lines = self._lines[key]
1315
parents = self._parents[key]
1317
yield ChunkedContentFactory(key, parents, None, lines)
1318
for versionedfile in self.fallback_versionedfiles:
1319
for record in versionedfile.get_record_stream(
1320
pending, 'unordered', True):
1321
if record.storage_kind == 'absent':
1324
pending.remove(record.key)
1328
# report absent entries
1330
yield AbsentContentFactory(key)
1332
def get_parent_map(self, keys):
1333
"""See VersionedFiles.get_parent_map"""
1334
# We create a new provider because a fallback may have been added.
1335
# If we make fallbacks private we can update a stack list and avoid
1336
# object creation thrashing.
1339
if revision.NULL_REVISION in keys:
1340
keys.remove(revision.NULL_REVISION)
1341
result[revision.NULL_REVISION] = ()
1342
self._providers = self._providers[:1] + self.fallback_versionedfiles
1344
_StackedParentsProvider(self._providers).get_parent_map(keys))
1345
for key, parents in result.iteritems():
1347
result[key] = (revision.NULL_REVISION,)
1351
class PlanWeaveMerge(TextMerge):
1352
"""Weave merge that takes a plan as its input.
1354
This exists so that VersionedFile.plan_merge is implementable.
1355
Most callers will want to use WeaveMerge instead.
1358
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1359
b_marker=TextMerge.B_MARKER):
1360
TextMerge.__init__(self, a_marker, b_marker)
1363
def _merge_struct(self):
1368
def outstanding_struct():
1369
if not lines_a and not lines_b:
1371
elif ch_a and not ch_b:
1374
elif ch_b and not ch_a:
1376
elif lines_a == lines_b:
1379
yield (lines_a, lines_b)
1381
# We previously considered either 'unchanged' or 'killed-both' lines
1382
# to be possible places to resynchronize. However, assuming agreement
1383
# on killed-both lines may be too aggressive. -- mbp 20060324
1384
for state, line in self.plan:
1385
if state == 'unchanged':
1386
# resync and flush queued conflicts changes if any
1387
for struct in outstanding_struct():
1393
if state == 'unchanged':
1396
elif state == 'killed-a':
1398
lines_b.append(line)
1399
elif state == 'killed-b':
1401
lines_a.append(line)
1402
elif state == 'new-a':
1404
lines_a.append(line)
1405
elif state == 'new-b':
1407
lines_b.append(line)
1408
elif state == 'conflicted-a':
1410
lines_a.append(line)
1411
elif state == 'conflicted-b':
1413
lines_b.append(line)
1414
elif state == 'killed-both':
1415
# This counts as a change, even though there is no associated
1419
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1421
raise AssertionError(state)
1422
for struct in outstanding_struct():
1426
class WeaveMerge(PlanWeaveMerge):
1427
"""Weave merge that takes a VersionedFile and two versions as its input."""
1429
def __init__(self, versionedfile, ver_a, ver_b,
1430
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1431
plan = versionedfile.plan_merge(ver_a, ver_b)
1432
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1435
class VirtualVersionedFiles(VersionedFiles):
1436
"""Dummy implementation for VersionedFiles that uses other functions for
1437
obtaining fulltexts and parent maps.
1439
This is always on the bottom of the stack and uses string keys
1440
(rather than tuples) internally.
1443
def __init__(self, get_parent_map, get_lines):
1444
"""Create a VirtualVersionedFiles.
1446
:param get_parent_map: Same signature as Repository.get_parent_map.
1447
:param get_lines: Should return lines for specified key or None if
1450
super(VirtualVersionedFiles, self).__init__()
1451
self._get_parent_map = get_parent_map
1452
self._get_lines = get_lines
1454
def check(self, progressbar=None):
1455
"""See VersionedFiles.check.
1457
:note: Always returns True for VirtualVersionedFiles.
1461
def add_mpdiffs(self, records):
1462
"""See VersionedFiles.mpdiffs.
1464
:note: Not implemented for VirtualVersionedFiles.
1466
raise NotImplementedError(self.add_mpdiffs)
1468
def get_parent_map(self, keys):
1469
"""See VersionedFiles.get_parent_map."""
1470
return dict([((k,), tuple([(p,) for p in v]))
1471
for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
1473
def get_sha1s(self, keys):
1474
"""See VersionedFiles.get_sha1s."""
1477
lines = self._get_lines(k)
1478
if lines is not None:
1479
if not isinstance(lines, list):
1480
raise AssertionError
1481
ret[(k,)] = osutils.sha_strings(lines)
1484
def get_record_stream(self, keys, ordering, include_delta_closure):
1485
"""See VersionedFiles.get_record_stream."""
1486
for (k,) in list(keys):
1487
lines = self._get_lines(k)
1488
if lines is not None:
1489
if not isinstance(lines, list):
1490
raise AssertionError
1491
yield ChunkedContentFactory((k,), None,
1492
sha1=osutils.sha_strings(lines),
1495
yield AbsentContentFactory((k,))
1497
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1498
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1499
for i, (key,) in enumerate(keys):
1501
pb.update("Finding changed lines", i, len(keys))
1502
for l in self._get_lines(key):
1506
def network_bytes_to_kind_and_offset(network_bytes):
1507
"""Strip of a record kind from the front of network_bytes.
1509
:param network_bytes: The bytes of a record.
1510
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1512
line_end = network_bytes.find('\n')
1513
storage_kind = network_bytes[:line_end]
1514
return storage_kind, line_end + 1
1517
class NetworkRecordStream(object):
1518
"""A record_stream which reconstitures a serialised stream."""
1520
def __init__(self, bytes_iterator):
1521
"""Create a NetworkRecordStream.
1523
:param bytes_iterator: An iterator of bytes. Each item in this
1524
iterator should have been obtained from a record_streams'
1525
record.get_bytes_as(record.storage_kind) call.
1527
self._bytes_iterator = bytes_iterator
1528
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1529
'knit-delta-gz':knit.knit_network_to_record,
1530
'knit-annotated-ft-gz':knit.knit_network_to_record,
1531
'knit-annotated-delta-gz':knit.knit_network_to_record,
1532
'knit-delta-closure':knit.knit_delta_closure_to_records,
1533
'fulltext':fulltext_network_to_record,
1534
'groupcompress-block':groupcompress.network_block_to_records,
1540
:return: An iterator as per VersionedFiles.get_record_stream().
1542
for bytes in self._bytes_iterator:
1543
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1544
for record in self._kind_factory[storage_kind](
1545
storage_kind, bytes, line_end):
1549
def fulltext_network_to_record(kind, bytes, line_end):
1550
"""Convert a network fulltext record to record."""
1551
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1552
record_meta = bytes[line_end+4:line_end+4+meta_len]
1553
key, parents = bencode.bdecode_as_tuple(record_meta)
1554
if parents == 'nil':
1556
fulltext = bytes[line_end+4+meta_len:]
1557
return [FulltextContentFactory(key, parents, None, fulltext)]
1560
def _length_prefix(bytes):
1561
return struct.pack('!L', len(bytes))
1564
def record_to_fulltext_bytes(record):
1565
if record.parents is None:
1568
parents = record.parents
1569
record_meta = bencode.bencode((record.key, parents))
1570
record_content = record.get_bytes_as('fulltext')
1571
return "fulltext\n%s%s%s" % (
1572
_length_prefix(record_meta), record_meta, record_content)
1575
def sort_groupcompress(parent_map):
1576
"""Sort and group the keys in parent_map into groupcompress order.
1578
groupcompress is defined (currently) as reverse-topological order, grouped
1581
:return: A sorted-list of keys
1583
# gc-optimal ordering is approximately reverse topological,
1584
# properly grouped by file-id.
1586
for item in parent_map.iteritems():
1588
if isinstance(key, str) or len(key) == 1:
1593
per_prefix_map[prefix].append(item)
1595
per_prefix_map[prefix] = [item]
1598
for prefix in sorted(per_prefix_map):
1599
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))