1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
"""Versioned text file storage api."""
23
from cStringIO import StringIO
26
from zlib import adler32
28
from bzrlib.lazy_import import lazy_import
29
lazy_import(globals(), """
42
from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider
43
from bzrlib.transport.memory import MemoryTransport
45
from bzrlib.inter import InterObject
46
from bzrlib.registry import Registry
47
from bzrlib.symbol_versioning import *
48
from bzrlib.textmerge import TextMerge
49
from bzrlib.util import bencode
52
adapter_registry = Registry()
53
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
54
'DeltaPlainToFullText')
55
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
57
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
58
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
59
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
60
'bzrlib.knit', 'DeltaAnnotatedToFullText')
61
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
62
'bzrlib.knit', 'FTAnnotatedToUnannotated')
63
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
64
'bzrlib.knit', 'FTAnnotatedToFullText')
65
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
66
# 'bzrlib.knit', 'FTAnnotatedToChunked')
69
class ContentFactory(object):
70
"""Abstract interface for insertion and retrieval from a VersionedFile.
72
:ivar sha1: None, or the sha1 of the content fulltext.
73
:ivar storage_kind: The native storage kind of this factory. One of
74
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
75
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
76
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
77
:ivar key: The key of this content. Each key is a tuple with a single
79
:ivar parents: A tuple of parent keys for self.key. If the object has
80
no parent information, None (as opposed to () for an empty list of
85
"""Create a ContentFactory."""
87
self.storage_kind = None
92
class ChunkedContentFactory(ContentFactory):
93
"""Static data content factory.
95
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
96
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
97
satisfies this, as does a list of lines.
99
:ivar sha1: None, or the sha1 of the content fulltext.
100
:ivar storage_kind: The native storage kind of this factory. Always
102
:ivar key: The key of this content. Each key is a tuple with a single
104
:ivar parents: A tuple of parent keys for self.key. If the object has
105
no parent information, None (as opposed to () for an empty list of
109
def __init__(self, key, parents, sha1, chunks):
110
"""Create a ContentFactory."""
112
self.storage_kind = 'chunked'
114
self.parents = parents
115
self._chunks = chunks
117
def get_bytes_as(self, storage_kind):
118
if storage_kind == 'chunked':
120
elif storage_kind == 'fulltext':
121
return ''.join(self._chunks)
122
raise errors.UnavailableRepresentation(self.key, storage_kind,
126
class FulltextContentFactory(ContentFactory):
127
"""Static data content factory.
129
This takes a fulltext when created and just returns that during
130
get_bytes_as('fulltext').
132
:ivar sha1: None, or the sha1 of the content fulltext.
133
:ivar storage_kind: The native storage kind of this factory. Always
135
:ivar key: The key of this content. Each key is a tuple with a single
137
:ivar parents: A tuple of parent keys for self.key. If the object has
138
no parent information, None (as opposed to () for an empty list of
142
def __init__(self, key, parents, sha1, text):
143
"""Create a ContentFactory."""
145
self.storage_kind = 'fulltext'
147
self.parents = parents
150
def get_bytes_as(self, storage_kind):
151
if storage_kind == self.storage_kind:
153
elif storage_kind == 'chunked':
155
raise errors.UnavailableRepresentation(self.key, storage_kind,
159
class AbsentContentFactory(ContentFactory):
160
"""A placeholder content factory for unavailable texts.
163
:ivar storage_kind: 'absent'.
164
:ivar key: The key of this content. Each key is a tuple with a single
169
def __init__(self, key):
170
"""Create a ContentFactory."""
172
self.storage_kind = 'absent'
177
class AdapterFactory(ContentFactory):
178
"""A content factory to adapt between key prefix's."""
180
def __init__(self, key, parents, adapted):
181
"""Create an adapter factory instance."""
183
self.parents = parents
184
self._adapted = adapted
186
def __getattr__(self, attr):
187
"""Return a member from the adapted object."""
188
if attr in ('key', 'parents'):
189
return self.__dict__[attr]
191
return getattr(self._adapted, attr)
194
def filter_absent(record_stream):
195
"""Adapt a record stream to remove absent records."""
196
for record in record_stream:
197
if record.storage_kind != 'absent':
201
class VersionedFile(object):
202
"""Versioned text file storage.
204
A versioned file manages versions of line-based text files,
205
keeping track of the originating version for each line.
207
To clients the "lines" of the file are represented as a list of
208
strings. These strings will typically have terminal newline
209
characters, but this is not required. In particular files commonly
210
do not have a newline at the end of the file.
212
Texts are identified by a version-id string.
216
def check_not_reserved_id(version_id):
217
revision.check_not_reserved_id(version_id)
219
def copy_to(self, name, transport):
220
"""Copy this versioned file to name on transport."""
221
raise NotImplementedError(self.copy_to)
223
def get_record_stream(self, versions, ordering, include_delta_closure):
224
"""Get a stream of records for versions.
226
:param versions: The versions to include. Each version is a tuple
228
:param ordering: Either 'unordered' or 'topological'. A topologically
229
sorted stream has compression parents strictly before their
231
:param include_delta_closure: If True then the closure across any
232
compression parents will be included (in the data content of the
233
stream, not in the emitted records). This guarantees that
234
'fulltext' can be used successfully on every record.
235
:return: An iterator of ContentFactory objects, each of which is only
236
valid until the iterator is advanced.
238
raise NotImplementedError(self.get_record_stream)
240
def has_version(self, version_id):
241
"""Returns whether version is present."""
242
raise NotImplementedError(self.has_version)
244
def insert_record_stream(self, stream):
245
"""Insert a record stream into this versioned file.
247
:param stream: A stream of records to insert.
249
:seealso VersionedFile.get_record_stream:
251
raise NotImplementedError
253
def add_lines(self, version_id, parents, lines, parent_texts=None,
254
left_matching_blocks=None, nostore_sha=None, random_id=False,
256
"""Add a single text on top of the versioned file.
258
Must raise RevisionAlreadyPresent if the new version is
259
already present in file history.
261
Must raise RevisionNotPresent if any of the given parents are
262
not present in file history.
264
:param lines: A list of lines. Each line must be a bytestring. And all
265
of them except the last must be terminated with \n and contain no
266
other \n's. The last line may either contain no \n's or a single
267
terminated \n. If the lines list does meet this constraint the add
268
routine may error or may succeed - but you will be unable to read
269
the data back accurately. (Checking the lines have been split
270
correctly is expensive and extremely unlikely to catch bugs so it
271
is not done at runtime unless check_content is True.)
272
:param parent_texts: An optional dictionary containing the opaque
273
representations of some or all of the parents of version_id to
274
allow delta optimisations. VERY IMPORTANT: the texts must be those
275
returned by add_lines or data corruption can be caused.
276
:param left_matching_blocks: a hint about which areas are common
277
between the text and its left-hand-parent. The format is
278
the SequenceMatcher.get_matching_blocks format.
279
:param nostore_sha: Raise ExistingContent and do not add the lines to
280
the versioned file if the digest of the lines matches this.
281
:param random_id: If True a random id has been selected rather than
282
an id determined by some deterministic process such as a converter
283
from a foreign VCS. When True the backend may choose not to check
284
for uniqueness of the resulting key within the versioned file, so
285
this should only be done when the result is expected to be unique
287
:param check_content: If True, the lines supplied are verified to be
288
bytestrings that are correctly formed lines.
289
:return: The text sha1, the number of bytes in the text, and an opaque
290
representation of the inserted version which can be provided
291
back to future add_lines calls in the parent_texts dictionary.
293
self._check_write_ok()
294
return self._add_lines(version_id, parents, lines, parent_texts,
295
left_matching_blocks, nostore_sha, random_id, check_content)
297
def _add_lines(self, version_id, parents, lines, parent_texts,
298
left_matching_blocks, nostore_sha, random_id, check_content):
299
"""Helper to do the class specific add_lines."""
300
raise NotImplementedError(self.add_lines)
302
def add_lines_with_ghosts(self, version_id, parents, lines,
303
parent_texts=None, nostore_sha=None, random_id=False,
304
check_content=True, left_matching_blocks=None):
305
"""Add lines to the versioned file, allowing ghosts to be present.
307
This takes the same parameters as add_lines and returns the same.
309
self._check_write_ok()
310
return self._add_lines_with_ghosts(version_id, parents, lines,
311
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
313
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
314
nostore_sha, random_id, check_content, left_matching_blocks):
315
"""Helper to do class specific add_lines_with_ghosts."""
316
raise NotImplementedError(self.add_lines_with_ghosts)
318
def check(self, progress_bar=None):
319
"""Check the versioned file for integrity."""
320
raise NotImplementedError(self.check)
322
def _check_lines_not_unicode(self, lines):
323
"""Check that lines being added to a versioned file are not unicode."""
325
if line.__class__ is not str:
326
raise errors.BzrBadParameterUnicode("lines")
328
def _check_lines_are_lines(self, lines):
329
"""Check that the lines really are full lines without inline EOL."""
331
if '\n' in line[:-1]:
332
raise errors.BzrBadParameterContainsNewline("lines")
334
def get_format_signature(self):
335
"""Get a text description of the data encoding in this file.
339
raise NotImplementedError(self.get_format_signature)
341
def make_mpdiffs(self, version_ids):
342
"""Create multiparent diffs for specified versions."""
343
knit_versions = set()
344
knit_versions.update(version_ids)
345
parent_map = self.get_parent_map(version_ids)
346
for version_id in version_ids:
348
knit_versions.update(parent_map[version_id])
350
raise errors.RevisionNotPresent(version_id, self)
351
# We need to filter out ghosts, because we can't diff against them.
352
knit_versions = set(self.get_parent_map(knit_versions).keys())
353
lines = dict(zip(knit_versions,
354
self._get_lf_split_line_list(knit_versions)))
356
for version_id in version_ids:
357
target = lines[version_id]
359
parents = [lines[p] for p in parent_map[version_id] if p in
362
# I don't know how this could ever trigger.
363
# parent_map[version_id] was already triggered in the previous
364
# for loop, and lines[p] has the 'if p in knit_versions' check,
365
# so we again won't have a KeyError.
366
raise errors.RevisionNotPresent(version_id, self)
368
left_parent_blocks = self._extract_blocks(version_id,
371
left_parent_blocks = None
372
diffs.append(multiparent.MultiParent.from_lines(target, parents,
376
def _extract_blocks(self, version_id, source, target):
379
def add_mpdiffs(self, records):
380
"""Add mpdiffs to this VersionedFile.
382
Records should be iterables of version, parents, expected_sha1,
383
mpdiff. mpdiff should be a MultiParent instance.
385
# Does this need to call self._check_write_ok()? (IanC 20070919)
387
mpvf = multiparent.MultiMemoryVersionedFile()
389
for version, parent_ids, expected_sha1, mpdiff in records:
390
versions.append(version)
391
mpvf.add_diff(mpdiff, version, parent_ids)
392
needed_parents = set()
393
for version, parent_ids, expected_sha1, mpdiff in records:
394
needed_parents.update(p for p in parent_ids
395
if not mpvf.has_version(p))
396
present_parents = set(self.get_parent_map(needed_parents).keys())
397
for parent_id, lines in zip(present_parents,
398
self._get_lf_split_line_list(present_parents)):
399
mpvf.add_version(lines, parent_id, [])
400
for (version, parent_ids, expected_sha1, mpdiff), lines in\
401
zip(records, mpvf.get_line_list(versions)):
402
if len(parent_ids) == 1:
403
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
404
mpvf.get_diff(parent_ids[0]).num_lines()))
406
left_matching_blocks = None
408
_, _, version_text = self.add_lines_with_ghosts(version,
409
parent_ids, lines, vf_parents,
410
left_matching_blocks=left_matching_blocks)
411
except NotImplementedError:
412
# The vf can't handle ghosts, so add lines normally, which will
413
# (reasonably) fail if there are ghosts in the data.
414
_, _, version_text = self.add_lines(version,
415
parent_ids, lines, vf_parents,
416
left_matching_blocks=left_matching_blocks)
417
vf_parents[version] = version_text
418
sha1s = self.get_sha1s(versions)
419
for version, parent_ids, expected_sha1, mpdiff in records:
420
if expected_sha1 != sha1s[version]:
421
raise errors.VersionedFileInvalidChecksum(version)
423
def get_text(self, version_id):
424
"""Return version contents as a text string.
426
Raises RevisionNotPresent if version is not present in
429
return ''.join(self.get_lines(version_id))
430
get_string = get_text
432
def get_texts(self, version_ids):
433
"""Return the texts of listed versions as a list of strings.
435
Raises RevisionNotPresent if version is not present in
438
return [''.join(self.get_lines(v)) for v in version_ids]
440
def get_lines(self, version_id):
441
"""Return version contents as a sequence of lines.
443
Raises RevisionNotPresent if version is not present in
446
raise NotImplementedError(self.get_lines)
448
def _get_lf_split_line_list(self, version_ids):
449
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
451
def get_ancestry(self, version_ids, topo_sorted=True):
452
"""Return a list of all ancestors of given version(s). This
453
will not include the null revision.
455
This list will not be topologically sorted if topo_sorted=False is
458
Must raise RevisionNotPresent if any of the given versions are
459
not present in file history."""
460
if isinstance(version_ids, basestring):
461
version_ids = [version_ids]
462
raise NotImplementedError(self.get_ancestry)
464
def get_ancestry_with_ghosts(self, version_ids):
465
"""Return a list of all ancestors of given version(s). This
466
will not include the null revision.
468
Must raise RevisionNotPresent if any of the given versions are
469
not present in file history.
471
Ghosts that are known about will be included in ancestry list,
472
but are not explicitly marked.
474
raise NotImplementedError(self.get_ancestry_with_ghosts)
476
def get_parent_map(self, version_ids):
477
"""Get a map of the parents of version_ids.
479
:param version_ids: The version ids to look up parents for.
480
:return: A mapping from version id to parents.
482
raise NotImplementedError(self.get_parent_map)
484
def get_parents_with_ghosts(self, version_id):
485
"""Return version names for parents of version_id.
487
Will raise RevisionNotPresent if version_id is not present
490
Ghosts that are known about will be included in the parent list,
491
but are not explicitly marked.
494
return list(self.get_parent_map([version_id])[version_id])
496
raise errors.RevisionNotPresent(version_id, self)
498
def annotate(self, version_id):
499
"""Return a list of (version-id, line) tuples for version_id.
501
:raise RevisionNotPresent: If the given version is
502
not present in file history.
504
raise NotImplementedError(self.annotate)
506
def iter_lines_added_or_present_in_versions(self, version_ids=None,
508
"""Iterate over the lines in the versioned file from version_ids.
510
This may return lines from other versions. Each item the returned
511
iterator yields is a tuple of a line and a text version that that line
512
is present in (not introduced in).
514
Ordering of results is in whatever order is most suitable for the
515
underlying storage format.
517
If a progress bar is supplied, it may be used to indicate progress.
518
The caller is responsible for cleaning up progress bars (because this
521
NOTES: Lines are normalised: they will all have \n terminators.
522
Lines are returned in arbitrary order.
524
:return: An iterator over (line, version_id).
526
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
528
def plan_merge(self, ver_a, ver_b):
529
"""Return pseudo-annotation indicating how the two versions merge.
531
This is computed between versions a and b and their common
534
Weave lines present in none of them are skipped entirely.
537
killed-base Dead in base revision
538
killed-both Killed in each revision
541
unchanged Alive in both a and b (possibly created in both)
544
ghost-a Killed in a, unborn in b
545
ghost-b Killed in b, unborn in a
546
irrelevant Not in either revision
548
raise NotImplementedError(VersionedFile.plan_merge)
550
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
551
b_marker=TextMerge.B_MARKER):
552
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
555
class RecordingVersionedFilesDecorator(object):
556
"""A minimal versioned files that records calls made on it.
558
Only enough methods have been added to support tests using it to date.
560
:ivar calls: A list of the calls made; can be reset at any time by
564
def __init__(self, backing_vf):
565
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
567
:param backing_vf: The versioned file to answer all methods.
569
self._backing_vf = backing_vf
572
def add_lines(self, key, parents, lines, parent_texts=None,
573
left_matching_blocks=None, nostore_sha=None, random_id=False,
575
self.calls.append(("add_lines", key, parents, lines, parent_texts,
576
left_matching_blocks, nostore_sha, random_id, check_content))
577
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
578
left_matching_blocks, nostore_sha, random_id, check_content)
581
self._backing_vf.check()
583
def get_parent_map(self, keys):
584
self.calls.append(("get_parent_map", copy(keys)))
585
return self._backing_vf.get_parent_map(keys)
587
def get_record_stream(self, keys, sort_order, include_delta_closure):
588
self.calls.append(("get_record_stream", list(keys), sort_order,
589
include_delta_closure))
590
return self._backing_vf.get_record_stream(keys, sort_order,
591
include_delta_closure)
593
def get_sha1s(self, keys):
594
self.calls.append(("get_sha1s", copy(keys)))
595
return self._backing_vf.get_sha1s(keys)
597
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
598
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
599
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
602
self.calls.append(("keys",))
603
return self._backing_vf.keys()
606
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
607
"""A VF that records calls, and returns keys in specific order.
609
:ivar calls: A list of the calls made; can be reset at any time by
613
def __init__(self, backing_vf, key_priority):
614
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
616
:param backing_vf: The versioned file to answer all methods.
617
:param key_priority: A dictionary defining what order keys should be
618
returned from an 'unordered' get_record_stream request.
619
Keys with lower priority are returned first, keys not present in
620
the map get an implicit priority of 0, and are returned in
621
lexicographical order.
623
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
624
self._key_priority = key_priority
626
def get_record_stream(self, keys, sort_order, include_delta_closure):
627
self.calls.append(("get_record_stream", list(keys), sort_order,
628
include_delta_closure))
629
if sort_order == 'unordered':
631
return (self._key_priority.get(key, 0), key)
632
# Use a defined order by asking for the keys one-by-one from the
634
for key in sorted(keys, key=sort_key):
635
for record in self._backing_vf.get_record_stream([key],
636
'unordered', include_delta_closure):
639
for record in self._backing_vf.get_record_stream(keys, sort_order,
640
include_delta_closure):
644
class KeyMapper(object):
645
"""KeyMappers map between keys and underlying partitioned storage."""
648
"""Map key to an underlying storage identifier.
650
:param key: A key tuple e.g. ('file-id', 'revision-id').
651
:return: An underlying storage identifier, specific to the partitioning
654
raise NotImplementedError(self.map)
656
def unmap(self, partition_id):
657
"""Map a partitioned storage id back to a key prefix.
659
:param partition_id: The underlying partition id.
660
:return: As much of a key (or prefix) as is derivable from the partition
663
raise NotImplementedError(self.unmap)
666
class ConstantMapper(KeyMapper):
667
"""A key mapper that maps to a constant result."""
669
def __init__(self, result):
670
"""Create a ConstantMapper which will return result for all maps."""
671
self._result = result
674
"""See KeyMapper.map()."""
678
class URLEscapeMapper(KeyMapper):
679
"""Base class for use with transport backed storage.
681
This provides a map and unmap wrapper that respectively url escape and
682
unescape their outputs and inputs.
686
"""See KeyMapper.map()."""
687
return urllib.quote(self._map(key))
689
def unmap(self, partition_id):
690
"""See KeyMapper.unmap()."""
691
return self._unmap(urllib.unquote(partition_id))
694
class PrefixMapper(URLEscapeMapper):
695
"""A key mapper that extracts the first component of a key.
697
This mapper is for use with a transport based backend.
701
"""See KeyMapper.map()."""
704
def _unmap(self, partition_id):
705
"""See KeyMapper.unmap()."""
706
return (partition_id,)
709
class HashPrefixMapper(URLEscapeMapper):
710
"""A key mapper that combines the first component of a key with a hash.
712
This mapper is for use with a transport based backend.
716
"""See KeyMapper.map()."""
717
prefix = self._escape(key[0])
718
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
720
def _escape(self, prefix):
721
"""No escaping needed here."""
724
def _unmap(self, partition_id):
725
"""See KeyMapper.unmap()."""
726
return (self._unescape(osutils.basename(partition_id)),)
728
def _unescape(self, basename):
729
"""No unescaping needed for HashPrefixMapper."""
733
class HashEscapedPrefixMapper(HashPrefixMapper):
734
"""Combines the escaped first component of a key with a hash.
736
This mapper is for use with a transport based backend.
739
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
741
def _escape(self, prefix):
742
"""Turn a key element into a filesystem safe string.
744
This is similar to a plain urllib.quote, except
745
it uses specific safe characters, so that it doesn't
746
have to translate a lot of valid file ids.
748
# @ does not get escaped. This is because it is a valid
749
# filesystem character we use all the time, and it looks
750
# a lot better than seeing %40 all the time.
751
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
755
def _unescape(self, basename):
756
"""Escaped names are easily unescaped by urlutils."""
757
return urllib.unquote(basename)
760
def make_versioned_files_factory(versioned_file_factory, mapper):
761
"""Create a ThunkedVersionedFiles factory.
763
This will create a callable which when called creates a
764
ThunkedVersionedFiles on a transport, using mapper to access individual
765
versioned files, and versioned_file_factory to create each individual file.
767
def factory(transport):
768
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
773
class VersionedFiles(object):
774
"""Storage for many versioned files.
776
This object allows a single keyspace for accessing the history graph and
777
contents of named bytestrings.
779
Currently no implementation allows the graph of different key prefixes to
780
intersect, but the API does allow such implementations in the future.
782
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
783
may have a different length key-size, but that size will be constant for
784
all texts added to or retrieved from it. For instance, bzrlib uses
785
instances with a key-size of 2 for storing user files in a repository, with
786
the first element the fileid, and the second the version of that file.
788
The use of tuples allows a single code base to support several different
789
uses with only the mapping logic changing from instance to instance.
792
def add_lines(self, key, parents, lines, parent_texts=None,
793
left_matching_blocks=None, nostore_sha=None, random_id=False,
795
"""Add a text to the store.
797
:param key: The key tuple of the text to add.
798
:param parents: The parents key tuples of the text to add.
799
:param lines: A list of lines. Each line must be a bytestring. And all
800
of them except the last must be terminated with \n and contain no
801
other \n's. The last line may either contain no \n's or a single
802
terminating \n. If the lines list does meet this constraint the add
803
routine may error or may succeed - but you will be unable to read
804
the data back accurately. (Checking the lines have been split
805
correctly is expensive and extremely unlikely to catch bugs so it
806
is not done at runtime unless check_content is True.)
807
:param parent_texts: An optional dictionary containing the opaque
808
representations of some or all of the parents of version_id to
809
allow delta optimisations. VERY IMPORTANT: the texts must be those
810
returned by add_lines or data corruption can be caused.
811
:param left_matching_blocks: a hint about which areas are common
812
between the text and its left-hand-parent. The format is
813
the SequenceMatcher.get_matching_blocks format.
814
:param nostore_sha: Raise ExistingContent and do not add the lines to
815
the versioned file if the digest of the lines matches this.
816
:param random_id: If True a random id has been selected rather than
817
an id determined by some deterministic process such as a converter
818
from a foreign VCS. When True the backend may choose not to check
819
for uniqueness of the resulting key within the versioned file, so
820
this should only be done when the result is expected to be unique
822
:param check_content: If True, the lines supplied are verified to be
823
bytestrings that are correctly formed lines.
824
:return: The text sha1, the number of bytes in the text, and an opaque
825
representation of the inserted version which can be provided
826
back to future add_lines calls in the parent_texts dictionary.
828
raise NotImplementedError(self.add_lines)
830
def add_mpdiffs(self, records):
831
"""Add mpdiffs to this VersionedFile.
833
Records should be iterables of version, parents, expected_sha1,
834
mpdiff. mpdiff should be a MultiParent instance.
837
mpvf = multiparent.MultiMemoryVersionedFile()
839
for version, parent_ids, expected_sha1, mpdiff in records:
840
versions.append(version)
841
mpvf.add_diff(mpdiff, version, parent_ids)
842
needed_parents = set()
843
for version, parent_ids, expected_sha1, mpdiff in records:
844
needed_parents.update(p for p in parent_ids
845
if not mpvf.has_version(p))
846
# It seems likely that adding all the present parents as fulltexts can
847
# easily exhaust memory.
848
chunks_to_lines = osutils.chunks_to_lines
849
for record in self.get_record_stream(needed_parents, 'unordered',
851
if record.storage_kind == 'absent':
853
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
855
for (key, parent_keys, expected_sha1, mpdiff), lines in\
856
zip(records, mpvf.get_line_list(versions)):
857
if len(parent_keys) == 1:
858
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
859
mpvf.get_diff(parent_keys[0]).num_lines()))
861
left_matching_blocks = None
862
version_sha1, _, version_text = self.add_lines(key,
863
parent_keys, lines, vf_parents,
864
left_matching_blocks=left_matching_blocks)
865
if version_sha1 != expected_sha1:
866
raise errors.VersionedFileInvalidChecksum(version)
867
vf_parents[key] = version_text
869
def annotate(self, key):
870
"""Return a list of (version-key, line) tuples for the text of key.
872
:raise RevisionNotPresent: If the key is not present.
874
raise NotImplementedError(self.annotate)
876
def check(self, progress_bar=None):
877
"""Check this object for integrity."""
878
raise NotImplementedError(self.check)
881
def check_not_reserved_id(version_id):
882
revision.check_not_reserved_id(version_id)
884
def _check_lines_not_unicode(self, lines):
885
"""Check that lines being added to a versioned file are not unicode."""
887
if line.__class__ is not str:
888
raise errors.BzrBadParameterUnicode("lines")
890
def _check_lines_are_lines(self, lines):
891
"""Check that the lines really are full lines without inline EOL."""
893
if '\n' in line[:-1]:
894
raise errors.BzrBadParameterContainsNewline("lines")
896
def get_parent_map(self, keys):
897
"""Get a map of the parents of keys.
899
:param keys: The keys to look up parents for.
900
:return: A mapping from keys to parents. Absent keys are absent from
903
raise NotImplementedError(self.get_parent_map)
905
def get_record_stream(self, keys, ordering, include_delta_closure):
906
"""Get a stream of records for keys.
908
:param keys: The keys to include.
909
:param ordering: Either 'unordered' or 'topological'. A topologically
910
sorted stream has compression parents strictly before their
912
:param include_delta_closure: If True then the closure across any
913
compression parents will be included (in the opaque data).
914
:return: An iterator of ContentFactory objects, each of which is only
915
valid until the iterator is advanced.
917
raise NotImplementedError(self.get_record_stream)
919
def get_sha1s(self, keys):
920
"""Get the sha1's of the texts for the given keys.
922
:param keys: The names of the keys to lookup
923
:return: a dict from key to sha1 digest. Keys of texts which are not
924
present in the store are not present in the returned
927
raise NotImplementedError(self.get_sha1s)
929
has_key = index._has_key_from_parent_map
931
def get_missing_compression_parent_keys(self):
932
"""Return an iterable of keys of missing compression parents.
934
Check this after calling insert_record_stream to find out if there are
935
any missing compression parents. If there are, the records that
936
depend on them are not able to be inserted safely. The precise
937
behaviour depends on the concrete VersionedFiles class in use.
939
Classes that do not support this will raise NotImplementedError.
941
raise NotImplementedError(self.get_missing_compression_parent_keys)
943
def insert_record_stream(self, stream):
944
"""Insert a record stream into this container.
946
:param stream: A stream of records to insert.
948
:seealso VersionedFile.get_record_stream:
950
raise NotImplementedError
952
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
953
"""Iterate over the lines in the versioned files from keys.
955
This may return lines from other keys. Each item the returned
956
iterator yields is a tuple of a line and a text version that that line
957
is present in (not introduced in).
959
Ordering of results is in whatever order is most suitable for the
960
underlying storage format.
962
If a progress bar is supplied, it may be used to indicate progress.
963
The caller is responsible for cleaning up progress bars (because this
967
* Lines are normalised by the underlying store: they will all have \n
969
* Lines are returned in arbitrary order.
971
:return: An iterator over (line, key).
973
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
976
"""Return a iterable of the keys for all the contained texts."""
977
raise NotImplementedError(self.keys)
979
def make_mpdiffs(self, keys):
980
"""Create multiparent diffs for specified keys."""
981
keys_order = tuple(keys)
982
keys = frozenset(keys)
983
knit_keys = set(keys)
984
parent_map = self.get_parent_map(keys)
985
for parent_keys in parent_map.itervalues():
987
knit_keys.update(parent_keys)
988
missing_keys = keys - set(parent_map)
990
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
991
# We need to filter out ghosts, because we can't diff against them.
992
maybe_ghosts = knit_keys - keys
993
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
994
knit_keys.difference_update(ghosts)
996
chunks_to_lines = osutils.chunks_to_lines
997
for record in self.get_record_stream(knit_keys, 'topological', True):
998
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
999
# line_block_dict = {}
1000
# for parent, blocks in record.extract_line_blocks():
1001
# line_blocks[parent] = blocks
1002
# line_blocks[record.key] = line_block_dict
1004
for key in keys_order:
1006
parents = parent_map[key] or []
1007
# Note that filtering knit_keys can lead to a parent difference
1008
# between the creation and the application of the mpdiff.
1009
parent_lines = [lines[p] for p in parents if p in knit_keys]
1010
if len(parent_lines) > 0:
1011
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1014
left_parent_blocks = None
1015
diffs.append(multiparent.MultiParent.from_lines(target,
1016
parent_lines, left_parent_blocks))
1019
missing_keys = index._missing_keys_from_parent_map
1021
def _extract_blocks(self, version_id, source, target):
1025
class ThunkedVersionedFiles(VersionedFiles):
1026
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1028
This object allows a single keyspace for accessing the history graph and
1029
contents of named bytestrings.
1031
Currently no implementation allows the graph of different key prefixes to
1032
intersect, but the API does allow such implementations in the future.
1035
def __init__(self, transport, file_factory, mapper, is_locked):
1036
"""Create a ThunkedVersionedFiles."""
1037
self._transport = transport
1038
self._file_factory = file_factory
1039
self._mapper = mapper
1040
self._is_locked = is_locked
1042
def add_lines(self, key, parents, lines, parent_texts=None,
1043
left_matching_blocks=None, nostore_sha=None, random_id=False,
1044
check_content=True):
1045
"""See VersionedFiles.add_lines()."""
1046
path = self._mapper.map(key)
1047
version_id = key[-1]
1048
parents = [parent[-1] for parent in parents]
1049
vf = self._get_vf(path)
1052
return vf.add_lines_with_ghosts(version_id, parents, lines,
1053
parent_texts=parent_texts,
1054
left_matching_blocks=left_matching_blocks,
1055
nostore_sha=nostore_sha, random_id=random_id,
1056
check_content=check_content)
1057
except NotImplementedError:
1058
return vf.add_lines(version_id, parents, lines,
1059
parent_texts=parent_texts,
1060
left_matching_blocks=left_matching_blocks,
1061
nostore_sha=nostore_sha, random_id=random_id,
1062
check_content=check_content)
1063
except errors.NoSuchFile:
1064
# parent directory may be missing, try again.
1065
self._transport.mkdir(osutils.dirname(path))
1067
return vf.add_lines_with_ghosts(version_id, parents, lines,
1068
parent_texts=parent_texts,
1069
left_matching_blocks=left_matching_blocks,
1070
nostore_sha=nostore_sha, random_id=random_id,
1071
check_content=check_content)
1072
except NotImplementedError:
1073
return vf.add_lines(version_id, parents, lines,
1074
parent_texts=parent_texts,
1075
left_matching_blocks=left_matching_blocks,
1076
nostore_sha=nostore_sha, random_id=random_id,
1077
check_content=check_content)
1079
def annotate(self, key):
1080
"""Return a list of (version-key, line) tuples for the text of key.
1082
:raise RevisionNotPresent: If the key is not present.
1085
path = self._mapper.map(prefix)
1086
vf = self._get_vf(path)
1087
origins = vf.annotate(key[-1])
1089
for origin, line in origins:
1090
result.append((prefix + (origin,), line))
1093
def check(self, progress_bar=None):
1094
"""See VersionedFiles.check()."""
1095
for prefix, vf in self._iter_all_components():
1098
def get_parent_map(self, keys):
1099
"""Get a map of the parents of keys.
1101
:param keys: The keys to look up parents for.
1102
:return: A mapping from keys to parents. Absent keys are absent from
1105
prefixes = self._partition_keys(keys)
1107
for prefix, suffixes in prefixes.items():
1108
path = self._mapper.map(prefix)
1109
vf = self._get_vf(path)
1110
parent_map = vf.get_parent_map(suffixes)
1111
for key, parents in parent_map.items():
1112
result[prefix + (key,)] = tuple(
1113
prefix + (parent,) for parent in parents)
1116
def _get_vf(self, path):
1117
if not self._is_locked():
1118
raise errors.ObjectNotLocked(self)
1119
return self._file_factory(path, self._transport, create=True,
1120
get_scope=lambda:None)
1122
def _partition_keys(self, keys):
1123
"""Turn keys into a dict of prefix:suffix_list."""
1126
prefix_keys = result.setdefault(key[:-1], [])
1127
prefix_keys.append(key[-1])
1130
def _get_all_prefixes(self):
1131
# Identify all key prefixes.
1132
# XXX: A bit hacky, needs polish.
1133
if type(self._mapper) == ConstantMapper:
1134
paths = [self._mapper.map(())]
1138
for quoted_relpath in self._transport.iter_files_recursive():
1139
path, ext = os.path.splitext(quoted_relpath)
1141
paths = list(relpaths)
1142
prefixes = [self._mapper.unmap(path) for path in paths]
1143
return zip(paths, prefixes)
1145
def get_record_stream(self, keys, ordering, include_delta_closure):
1146
"""See VersionedFiles.get_record_stream()."""
1147
# Ordering will be taken care of by each partitioned store; group keys
1150
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1151
suffixes = [(suffix,) for suffix in suffixes]
1152
for record in vf.get_record_stream(suffixes, ordering,
1153
include_delta_closure):
1154
if record.parents is not None:
1155
record.parents = tuple(
1156
prefix + parent for parent in record.parents)
1157
record.key = prefix + record.key
1160
def _iter_keys_vf(self, keys):
1161
prefixes = self._partition_keys(keys)
1163
for prefix, suffixes in prefixes.items():
1164
path = self._mapper.map(prefix)
1165
vf = self._get_vf(path)
1166
yield prefix, suffixes, vf
1168
def get_sha1s(self, keys):
1169
"""See VersionedFiles.get_sha1s()."""
1171
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1172
vf_sha1s = vf.get_sha1s(suffixes)
1173
for suffix, sha1 in vf_sha1s.iteritems():
1174
sha1s[prefix + (suffix,)] = sha1
1177
def insert_record_stream(self, stream):
1178
"""Insert a record stream into this container.
1180
:param stream: A stream of records to insert.
1182
:seealso VersionedFile.get_record_stream:
1184
for record in stream:
1185
prefix = record.key[:-1]
1186
key = record.key[-1:]
1187
if record.parents is not None:
1188
parents = [parent[-1:] for parent in record.parents]
1191
thunk_record = AdapterFactory(key, parents, record)
1192
path = self._mapper.map(prefix)
1193
# Note that this parses the file many times; we can do better but
1194
# as this only impacts weaves in terms of performance, it is
1196
vf = self._get_vf(path)
1197
vf.insert_record_stream([thunk_record])
1199
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1200
"""Iterate over the lines in the versioned files from keys.
1202
This may return lines from other keys. Each item the returned
1203
iterator yields is a tuple of a line and a text version that that line
1204
is present in (not introduced in).
1206
Ordering of results is in whatever order is most suitable for the
1207
underlying storage format.
1209
If a progress bar is supplied, it may be used to indicate progress.
1210
The caller is responsible for cleaning up progress bars (because this
1214
* Lines are normalised by the underlying store: they will all have \n
1216
* Lines are returned in arbitrary order.
1218
:return: An iterator over (line, key).
1220
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1221
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1222
yield line, prefix + (version,)
1224
def _iter_all_components(self):
1225
for path, prefix in self._get_all_prefixes():
1226
yield prefix, self._get_vf(path)
1229
"""See VersionedFiles.keys()."""
1231
for prefix, vf in self._iter_all_components():
1232
for suffix in vf.versions():
1233
result.add(prefix + (suffix,))
1237
class _PlanMergeVersionedFile(VersionedFiles):
1238
"""A VersionedFile for uncommitted and committed texts.
1240
It is intended to allow merges to be planned with working tree texts.
1241
It implements only the small part of the VersionedFiles interface used by
1242
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1243
_PlanMergeVersionedFile itself.
1245
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1246
queried for missing texts.
1249
def __init__(self, file_id):
1250
"""Create a _PlanMergeVersionedFile.
1252
:param file_id: Used with _PlanMerge code which is not yet fully
1253
tuple-keyspace aware.
1255
self._file_id = file_id
1256
# fallback locations
1257
self.fallback_versionedfiles = []
1258
# Parents for locally held keys.
1260
# line data for locally held keys.
1262
# key lookup providers
1263
self._providers = [DictParentsProvider(self._parents)]
1265
def plan_merge(self, ver_a, ver_b, base=None):
1266
"""See VersionedFile.plan_merge"""
1267
from bzrlib.merge import _PlanMerge
1269
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1270
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1271
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1272
return _PlanMerge._subtract_plans(old_plan, new_plan)
1274
def plan_lca_merge(self, ver_a, ver_b, base=None):
1275
from bzrlib.merge import _PlanLCAMerge
1277
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1280
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1281
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1283
def add_lines(self, key, parents, lines):
1284
"""See VersionedFiles.add_lines
1286
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1287
are permitted. Only reserved ids are permitted.
1289
if type(key) is not tuple:
1290
raise TypeError(key)
1291
if not revision.is_reserved_id(key[-1]):
1292
raise ValueError('Only reserved ids may be used')
1294
raise ValueError('Parents may not be None')
1296
raise ValueError('Lines may not be None')
1297
self._parents[key] = tuple(parents)
1298
self._lines[key] = lines
1300
def get_record_stream(self, keys, ordering, include_delta_closure):
1303
if key in self._lines:
1304
lines = self._lines[key]
1305
parents = self._parents[key]
1307
yield ChunkedContentFactory(key, parents, None, lines)
1308
for versionedfile in self.fallback_versionedfiles:
1309
for record in versionedfile.get_record_stream(
1310
pending, 'unordered', True):
1311
if record.storage_kind == 'absent':
1314
pending.remove(record.key)
1318
# report absent entries
1320
yield AbsentContentFactory(key)
1322
def get_parent_map(self, keys):
1323
"""See VersionedFiles.get_parent_map"""
1324
# We create a new provider because a fallback may have been added.
1325
# If we make fallbacks private we can update a stack list and avoid
1326
# object creation thrashing.
1329
if revision.NULL_REVISION in keys:
1330
keys.remove(revision.NULL_REVISION)
1331
result[revision.NULL_REVISION] = ()
1332
self._providers = self._providers[:1] + self.fallback_versionedfiles
1334
_StackedParentsProvider(self._providers).get_parent_map(keys))
1335
for key, parents in result.iteritems():
1337
result[key] = (revision.NULL_REVISION,)
1341
class PlanWeaveMerge(TextMerge):
1342
"""Weave merge that takes a plan as its input.
1344
This exists so that VersionedFile.plan_merge is implementable.
1345
Most callers will want to use WeaveMerge instead.
1348
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1349
b_marker=TextMerge.B_MARKER):
1350
TextMerge.__init__(self, a_marker, b_marker)
1353
def _merge_struct(self):
1358
def outstanding_struct():
1359
if not lines_a and not lines_b:
1361
elif ch_a and not ch_b:
1364
elif ch_b and not ch_a:
1366
elif lines_a == lines_b:
1369
yield (lines_a, lines_b)
1371
# We previously considered either 'unchanged' or 'killed-both' lines
1372
# to be possible places to resynchronize. However, assuming agreement
1373
# on killed-both lines may be too aggressive. -- mbp 20060324
1374
for state, line in self.plan:
1375
if state == 'unchanged':
1376
# resync and flush queued conflicts changes if any
1377
for struct in outstanding_struct():
1383
if state == 'unchanged':
1386
elif state == 'killed-a':
1388
lines_b.append(line)
1389
elif state == 'killed-b':
1391
lines_a.append(line)
1392
elif state == 'new-a':
1394
lines_a.append(line)
1395
elif state == 'new-b':
1397
lines_b.append(line)
1398
elif state == 'conflicted-a':
1400
lines_a.append(line)
1401
elif state == 'conflicted-b':
1403
lines_b.append(line)
1405
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1406
'killed-base', 'killed-both'):
1407
raise AssertionError(state)
1408
for struct in outstanding_struct():
1412
class WeaveMerge(PlanWeaveMerge):
1413
"""Weave merge that takes a VersionedFile and two versions as its input."""
1415
def __init__(self, versionedfile, ver_a, ver_b,
1416
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1417
plan = versionedfile.plan_merge(ver_a, ver_b)
1418
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1421
class VirtualVersionedFiles(VersionedFiles):
1422
"""Dummy implementation for VersionedFiles that uses other functions for
1423
obtaining fulltexts and parent maps.
1425
This is always on the bottom of the stack and uses string keys
1426
(rather than tuples) internally.
1429
def __init__(self, get_parent_map, get_lines):
1430
"""Create a VirtualVersionedFiles.
1432
:param get_parent_map: Same signature as Repository.get_parent_map.
1433
:param get_lines: Should return lines for specified key or None if
1436
super(VirtualVersionedFiles, self).__init__()
1437
self._get_parent_map = get_parent_map
1438
self._get_lines = get_lines
1440
def check(self, progressbar=None):
1441
"""See VersionedFiles.check.
1443
:note: Always returns True for VirtualVersionedFiles.
1447
def add_mpdiffs(self, records):
1448
"""See VersionedFiles.mpdiffs.
1450
:note: Not implemented for VirtualVersionedFiles.
1452
raise NotImplementedError(self.add_mpdiffs)
1454
def get_parent_map(self, keys):
1455
"""See VersionedFiles.get_parent_map."""
1456
return dict([((k,), tuple([(p,) for p in v]))
1457
for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
1459
def get_sha1s(self, keys):
1460
"""See VersionedFiles.get_sha1s."""
1463
lines = self._get_lines(k)
1464
if lines is not None:
1465
if not isinstance(lines, list):
1466
raise AssertionError
1467
ret[(k,)] = osutils.sha_strings(lines)
1470
def get_record_stream(self, keys, ordering, include_delta_closure):
1471
"""See VersionedFiles.get_record_stream."""
1472
for (k,) in list(keys):
1473
lines = self._get_lines(k)
1474
if lines is not None:
1475
if not isinstance(lines, list):
1476
raise AssertionError
1477
yield ChunkedContentFactory((k,), None,
1478
sha1=osutils.sha_strings(lines),
1481
yield AbsentContentFactory((k,))
1483
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1484
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1485
for i, (key,) in enumerate(keys):
1487
pb.update("Finding changed lines", i, len(keys))
1488
for l in self._get_lines(key):
1492
def network_bytes_to_kind_and_offset(network_bytes):
1493
"""Strip of a record kind from the front of network_bytes.
1495
:param network_bytes: The bytes of a record.
1496
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1498
line_end = network_bytes.find('\n')
1499
storage_kind = network_bytes[:line_end]
1500
return storage_kind, line_end + 1
1503
class NetworkRecordStream(object):
1504
"""A record_stream which reconstitures a serialised stream."""
1506
def __init__(self, bytes_iterator):
1507
"""Create a NetworkRecordStream.
1509
:param bytes_iterator: An iterator of bytes. Each item in this
1510
iterator should have been obtained from a record_streams'
1511
record.get_bytes_as(record.storage_kind) call.
1513
self._bytes_iterator = bytes_iterator
1514
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1515
'knit-delta-gz':knit.knit_network_to_record,
1516
'knit-annotated-ft-gz':knit.knit_network_to_record,
1517
'knit-annotated-delta-gz':knit.knit_network_to_record,
1518
'knit-delta-closure':knit.knit_delta_closure_to_records,
1519
'fulltext':fulltext_network_to_record,
1525
:return: An iterator as per VersionedFiles.get_record_stream().
1527
for bytes in self._bytes_iterator:
1528
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1529
for record in self._kind_factory[storage_kind](
1530
storage_kind, bytes, line_end):
1534
def fulltext_network_to_record(kind, bytes, line_end):
1535
"""Convert a network fulltext record to record."""
1536
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1537
record_meta = bytes[line_end+4:line_end+4+meta_len]
1538
key, parents = bencode.bdecode_as_tuple(record_meta)
1539
if parents == 'nil':
1541
fulltext = bytes[line_end+4+meta_len:]
1542
return [FulltextContentFactory(key, parents, None, fulltext)]
1545
def _length_prefix(bytes):
1546
return struct.pack('!L', len(bytes))
1549
def record_to_fulltext_bytes(record):
1550
if record.parents is None:
1553
parents = record.parents
1554
record_meta = bencode.bencode((record.key, parents))
1555
record_content = record.get_bytes_as('fulltext')
1556
return "fulltext\n%s%s%s" % (
1557
_length_prefix(record_meta), record_meta, record_content)
1560
def sort_groupcompress(parent_map):
1561
"""Sort and group the keys in parent_map into groupcompress order.
1563
groupcompress is defined (currently) as reverse-topological order, grouped
1566
:return: A sorted-list of keys
1568
# gc-optimal ordering is approximately reverse topological,
1569
# properly grouped by file-id.
1571
for item in parent_map.iteritems():
1573
if isinstance(key, str) or len(key) == 1:
1578
per_prefix_map[prefix].append(item)
1580
per_prefix_map[prefix] = [item]
1583
for prefix in sorted(per_prefix_map):
1584
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))