1
# Copyright (C) 2005, 2006, 2007, 2008 Canonical Ltd
4
# Johan Rydberg <jrydberg@gnu.org>
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
"""Versioned text file storage api."""
23
from cStringIO import StringIO
26
from zlib import adler32
28
from bzrlib.lazy_import import lazy_import
29
lazy_import(globals(), """
42
from bzrlib.graph import DictParentsProvider, Graph, _StackedParentsProvider
43
from bzrlib.transport.memory import MemoryTransport
45
from bzrlib.inter import InterObject
46
from bzrlib.registry import Registry
47
from bzrlib.symbol_versioning import *
48
from bzrlib.textmerge import TextMerge
49
from bzrlib.util import bencode
52
adapter_registry = Registry()
53
adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
54
'DeltaPlainToFullText')
55
adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
57
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
58
'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
59
adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
60
'bzrlib.knit', 'DeltaAnnotatedToFullText')
61
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
62
'bzrlib.knit', 'FTAnnotatedToUnannotated')
63
adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
64
'bzrlib.knit', 'FTAnnotatedToFullText')
65
# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
66
# 'bzrlib.knit', 'FTAnnotatedToChunked')
69
class ContentFactory(object):
70
"""Abstract interface for insertion and retrieval from a VersionedFile.
72
:ivar sha1: None, or the sha1 of the content fulltext.
73
:ivar storage_kind: The native storage kind of this factory. One of
74
'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
75
'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
76
'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
77
:ivar key: The key of this content. Each key is a tuple with a single
79
:ivar parents: A tuple of parent keys for self.key. If the object has
80
no parent information, None (as opposed to () for an empty list of
85
"""Create a ContentFactory."""
87
self.storage_kind = None
92
class ChunkedContentFactory(ContentFactory):
93
"""Static data content factory.
95
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
96
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
97
satisfies this, as does a list of lines.
99
:ivar sha1: None, or the sha1 of the content fulltext.
100
:ivar storage_kind: The native storage kind of this factory. Always
102
:ivar key: The key of this content. Each key is a tuple with a single
104
:ivar parents: A tuple of parent keys for self.key. If the object has
105
no parent information, None (as opposed to () for an empty list of
109
def __init__(self, key, parents, sha1, chunks):
110
"""Create a ContentFactory."""
112
self.storage_kind = 'chunked'
114
self.parents = parents
115
self._chunks = chunks
117
def get_bytes_as(self, storage_kind):
118
if storage_kind == 'chunked':
120
elif storage_kind == 'fulltext':
121
return ''.join(self._chunks)
122
raise errors.UnavailableRepresentation(self.key, storage_kind,
126
class FulltextContentFactory(ContentFactory):
127
"""Static data content factory.
129
This takes a fulltext when created and just returns that during
130
get_bytes_as('fulltext').
132
:ivar sha1: None, or the sha1 of the content fulltext.
133
:ivar storage_kind: The native storage kind of this factory. Always
135
:ivar key: The key of this content. Each key is a tuple with a single
137
:ivar parents: A tuple of parent keys for self.key. If the object has
138
no parent information, None (as opposed to () for an empty list of
142
def __init__(self, key, parents, sha1, text):
143
"""Create a ContentFactory."""
145
self.storage_kind = 'fulltext'
147
self.parents = parents
150
def get_bytes_as(self, storage_kind):
151
if storage_kind == self.storage_kind:
153
elif storage_kind == 'chunked':
155
raise errors.UnavailableRepresentation(self.key, storage_kind,
159
class AbsentContentFactory(ContentFactory):
160
"""A placeholder content factory for unavailable texts.
163
:ivar storage_kind: 'absent'.
164
:ivar key: The key of this content. Each key is a tuple with a single
169
def __init__(self, key):
170
"""Create a ContentFactory."""
172
self.storage_kind = 'absent'
177
class AdapterFactory(ContentFactory):
178
"""A content factory to adapt between key prefix's."""
180
def __init__(self, key, parents, adapted):
181
"""Create an adapter factory instance."""
183
self.parents = parents
184
self._adapted = adapted
186
def __getattr__(self, attr):
187
"""Return a member from the adapted object."""
188
if attr in ('key', 'parents'):
189
return self.__dict__[attr]
191
return getattr(self._adapted, attr)
194
def filter_absent(record_stream):
195
"""Adapt a record stream to remove absent records."""
196
for record in record_stream:
197
if record.storage_kind != 'absent':
201
class VersionedFile(object):
202
"""Versioned text file storage.
204
A versioned file manages versions of line-based text files,
205
keeping track of the originating version for each line.
207
To clients the "lines" of the file are represented as a list of
208
strings. These strings will typically have terminal newline
209
characters, but this is not required. In particular files commonly
210
do not have a newline at the end of the file.
212
Texts are identified by a version-id string.
216
def check_not_reserved_id(version_id):
217
revision.check_not_reserved_id(version_id)
219
def copy_to(self, name, transport):
220
"""Copy this versioned file to name on transport."""
221
raise NotImplementedError(self.copy_to)
223
def get_record_stream(self, versions, ordering, include_delta_closure):
224
"""Get a stream of records for versions.
226
:param versions: The versions to include. Each version is a tuple
228
:param ordering: Either 'unordered' or 'topological'. A topologically
229
sorted stream has compression parents strictly before their
231
:param include_delta_closure: If True then the closure across any
232
compression parents will be included (in the data content of the
233
stream, not in the emitted records). This guarantees that
234
'fulltext' can be used successfully on every record.
235
:return: An iterator of ContentFactory objects, each of which is only
236
valid until the iterator is advanced.
238
raise NotImplementedError(self.get_record_stream)
240
def has_version(self, version_id):
241
"""Returns whether version is present."""
242
raise NotImplementedError(self.has_version)
244
def insert_record_stream(self, stream):
245
"""Insert a record stream into this versioned file.
247
:param stream: A stream of records to insert.
249
:seealso VersionedFile.get_record_stream:
251
raise NotImplementedError
253
def add_lines(self, version_id, parents, lines, parent_texts=None,
254
left_matching_blocks=None, nostore_sha=None, random_id=False,
256
"""Add a single text on top of the versioned file.
258
Must raise RevisionAlreadyPresent if the new version is
259
already present in file history.
261
Must raise RevisionNotPresent if any of the given parents are
262
not present in file history.
264
:param lines: A list of lines. Each line must be a bytestring. And all
265
of them except the last must be terminated with \n and contain no
266
other \n's. The last line may either contain no \n's or a single
267
terminated \n. If the lines list does meet this constraint the add
268
routine may error or may succeed - but you will be unable to read
269
the data back accurately. (Checking the lines have been split
270
correctly is expensive and extremely unlikely to catch bugs so it
271
is not done at runtime unless check_content is True.)
272
:param parent_texts: An optional dictionary containing the opaque
273
representations of some or all of the parents of version_id to
274
allow delta optimisations. VERY IMPORTANT: the texts must be those
275
returned by add_lines or data corruption can be caused.
276
:param left_matching_blocks: a hint about which areas are common
277
between the text and its left-hand-parent. The format is
278
the SequenceMatcher.get_matching_blocks format.
279
:param nostore_sha: Raise ExistingContent and do not add the lines to
280
the versioned file if the digest of the lines matches this.
281
:param random_id: If True a random id has been selected rather than
282
an id determined by some deterministic process such as a converter
283
from a foreign VCS. When True the backend may choose not to check
284
for uniqueness of the resulting key within the versioned file, so
285
this should only be done when the result is expected to be unique
287
:param check_content: If True, the lines supplied are verified to be
288
bytestrings that are correctly formed lines.
289
:return: The text sha1, the number of bytes in the text, and an opaque
290
representation of the inserted version which can be provided
291
back to future add_lines calls in the parent_texts dictionary.
293
self._check_write_ok()
294
return self._add_lines(version_id, parents, lines, parent_texts,
295
left_matching_blocks, nostore_sha, random_id, check_content)
297
def _add_lines(self, version_id, parents, lines, parent_texts,
298
left_matching_blocks, nostore_sha, random_id, check_content):
299
"""Helper to do the class specific add_lines."""
300
raise NotImplementedError(self.add_lines)
302
def add_lines_with_ghosts(self, version_id, parents, lines,
303
parent_texts=None, nostore_sha=None, random_id=False,
304
check_content=True, left_matching_blocks=None):
305
"""Add lines to the versioned file, allowing ghosts to be present.
307
This takes the same parameters as add_lines and returns the same.
309
self._check_write_ok()
310
return self._add_lines_with_ghosts(version_id, parents, lines,
311
parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
313
def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
314
nostore_sha, random_id, check_content, left_matching_blocks):
315
"""Helper to do class specific add_lines_with_ghosts."""
316
raise NotImplementedError(self.add_lines_with_ghosts)
318
def check(self, progress_bar=None):
319
"""Check the versioned file for integrity."""
320
raise NotImplementedError(self.check)
322
def _check_lines_not_unicode(self, lines):
323
"""Check that lines being added to a versioned file are not unicode."""
325
if line.__class__ is not str:
326
raise errors.BzrBadParameterUnicode("lines")
328
def _check_lines_are_lines(self, lines):
329
"""Check that the lines really are full lines without inline EOL."""
331
if '\n' in line[:-1]:
332
raise errors.BzrBadParameterContainsNewline("lines")
334
def get_format_signature(self):
335
"""Get a text description of the data encoding in this file.
339
raise NotImplementedError(self.get_format_signature)
341
def make_mpdiffs(self, version_ids):
342
"""Create multiparent diffs for specified versions."""
343
knit_versions = set()
344
knit_versions.update(version_ids)
345
parent_map = self.get_parent_map(version_ids)
346
for version_id in version_ids:
348
knit_versions.update(parent_map[version_id])
350
raise errors.RevisionNotPresent(version_id, self)
351
# We need to filter out ghosts, because we can't diff against them.
352
knit_versions = set(self.get_parent_map(knit_versions).keys())
353
lines = dict(zip(knit_versions,
354
self._get_lf_split_line_list(knit_versions)))
356
for version_id in version_ids:
357
target = lines[version_id]
359
parents = [lines[p] for p in parent_map[version_id] if p in
362
# I don't know how this could ever trigger.
363
# parent_map[version_id] was already triggered in the previous
364
# for loop, and lines[p] has the 'if p in knit_versions' check,
365
# so we again won't have a KeyError.
366
raise errors.RevisionNotPresent(version_id, self)
368
left_parent_blocks = self._extract_blocks(version_id,
371
left_parent_blocks = None
372
diffs.append(multiparent.MultiParent.from_lines(target, parents,
376
def _extract_blocks(self, version_id, source, target):
379
def add_mpdiffs(self, records):
380
"""Add mpdiffs to this VersionedFile.
382
Records should be iterables of version, parents, expected_sha1,
383
mpdiff. mpdiff should be a MultiParent instance.
385
# Does this need to call self._check_write_ok()? (IanC 20070919)
387
mpvf = multiparent.MultiMemoryVersionedFile()
389
for version, parent_ids, expected_sha1, mpdiff in records:
390
versions.append(version)
391
mpvf.add_diff(mpdiff, version, parent_ids)
392
needed_parents = set()
393
for version, parent_ids, expected_sha1, mpdiff in records:
394
needed_parents.update(p for p in parent_ids
395
if not mpvf.has_version(p))
396
present_parents = set(self.get_parent_map(needed_parents).keys())
397
for parent_id, lines in zip(present_parents,
398
self._get_lf_split_line_list(present_parents)):
399
mpvf.add_version(lines, parent_id, [])
400
for (version, parent_ids, expected_sha1, mpdiff), lines in\
401
zip(records, mpvf.get_line_list(versions)):
402
if len(parent_ids) == 1:
403
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
404
mpvf.get_diff(parent_ids[0]).num_lines()))
406
left_matching_blocks = None
408
_, _, version_text = self.add_lines_with_ghosts(version,
409
parent_ids, lines, vf_parents,
410
left_matching_blocks=left_matching_blocks)
411
except NotImplementedError:
412
# The vf can't handle ghosts, so add lines normally, which will
413
# (reasonably) fail if there are ghosts in the data.
414
_, _, version_text = self.add_lines(version,
415
parent_ids, lines, vf_parents,
416
left_matching_blocks=left_matching_blocks)
417
vf_parents[version] = version_text
418
sha1s = self.get_sha1s(versions)
419
for version, parent_ids, expected_sha1, mpdiff in records:
420
if expected_sha1 != sha1s[version]:
421
raise errors.VersionedFileInvalidChecksum(version)
423
def get_text(self, version_id):
424
"""Return version contents as a text string.
426
Raises RevisionNotPresent if version is not present in
429
return ''.join(self.get_lines(version_id))
430
get_string = get_text
432
def get_texts(self, version_ids):
433
"""Return the texts of listed versions as a list of strings.
435
Raises RevisionNotPresent if version is not present in
438
return [''.join(self.get_lines(v)) for v in version_ids]
440
def get_lines(self, version_id):
441
"""Return version contents as a sequence of lines.
443
Raises RevisionNotPresent if version is not present in
446
raise NotImplementedError(self.get_lines)
448
def _get_lf_split_line_list(self, version_ids):
449
return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
451
def get_ancestry(self, version_ids, topo_sorted=True):
452
"""Return a list of all ancestors of given version(s). This
453
will not include the null revision.
455
This list will not be topologically sorted if topo_sorted=False is
458
Must raise RevisionNotPresent if any of the given versions are
459
not present in file history."""
460
if isinstance(version_ids, basestring):
461
version_ids = [version_ids]
462
raise NotImplementedError(self.get_ancestry)
464
def get_ancestry_with_ghosts(self, version_ids):
465
"""Return a list of all ancestors of given version(s). This
466
will not include the null revision.
468
Must raise RevisionNotPresent if any of the given versions are
469
not present in file history.
471
Ghosts that are known about will be included in ancestry list,
472
but are not explicitly marked.
474
raise NotImplementedError(self.get_ancestry_with_ghosts)
476
def get_parent_map(self, version_ids):
477
"""Get a map of the parents of version_ids.
479
:param version_ids: The version ids to look up parents for.
480
:return: A mapping from version id to parents.
482
raise NotImplementedError(self.get_parent_map)
484
def get_parents_with_ghosts(self, version_id):
485
"""Return version names for parents of version_id.
487
Will raise RevisionNotPresent if version_id is not present
490
Ghosts that are known about will be included in the parent list,
491
but are not explicitly marked.
494
return list(self.get_parent_map([version_id])[version_id])
496
raise errors.RevisionNotPresent(version_id, self)
498
def annotate(self, version_id):
499
"""Return a list of (version-id, line) tuples for version_id.
501
:raise RevisionNotPresent: If the given version is
502
not present in file history.
504
raise NotImplementedError(self.annotate)
506
def iter_lines_added_or_present_in_versions(self, version_ids=None,
508
"""Iterate over the lines in the versioned file from version_ids.
510
This may return lines from other versions. Each item the returned
511
iterator yields is a tuple of a line and a text version that that line
512
is present in (not introduced in).
514
Ordering of results is in whatever order is most suitable for the
515
underlying storage format.
517
If a progress bar is supplied, it may be used to indicate progress.
518
The caller is responsible for cleaning up progress bars (because this
521
NOTES: Lines are normalised: they will all have \n terminators.
522
Lines are returned in arbitrary order.
524
:return: An iterator over (line, version_id).
526
raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
528
def plan_merge(self, ver_a, ver_b):
529
"""Return pseudo-annotation indicating how the two versions merge.
531
This is computed between versions a and b and their common
534
Weave lines present in none of them are skipped entirely.
537
killed-base Dead in base revision
538
killed-both Killed in each revision
541
unchanged Alive in both a and b (possibly created in both)
544
ghost-a Killed in a, unborn in b
545
ghost-b Killed in b, unborn in a
546
irrelevant Not in either revision
548
raise NotImplementedError(VersionedFile.plan_merge)
550
def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
551
b_marker=TextMerge.B_MARKER):
552
return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
555
class RecordingVersionedFilesDecorator(object):
556
"""A minimal versioned files that records calls made on it.
558
Only enough methods have been added to support tests using it to date.
560
:ivar calls: A list of the calls made; can be reset at any time by
564
def __init__(self, backing_vf):
565
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
567
:param backing_vf: The versioned file to answer all methods.
569
self._backing_vf = backing_vf
572
def add_lines(self, key, parents, lines, parent_texts=None,
573
left_matching_blocks=None, nostore_sha=None, random_id=False,
575
self.calls.append(("add_lines", key, parents, lines, parent_texts,
576
left_matching_blocks, nostore_sha, random_id, check_content))
577
return self._backing_vf.add_lines(key, parents, lines, parent_texts,
578
left_matching_blocks, nostore_sha, random_id, check_content)
581
self._backing_vf.check()
583
def get_parent_map(self, keys):
584
self.calls.append(("get_parent_map", copy(keys)))
585
return self._backing_vf.get_parent_map(keys)
587
def get_record_stream(self, keys, sort_order, include_delta_closure):
588
self.calls.append(("get_record_stream", list(keys), sort_order,
589
include_delta_closure))
590
return self._backing_vf.get_record_stream(keys, sort_order,
591
include_delta_closure)
593
def get_sha1s(self, keys):
594
self.calls.append(("get_sha1s", copy(keys)))
595
return self._backing_vf.get_sha1s(keys)
597
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
598
self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
599
return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
602
self.calls.append(("keys",))
603
return self._backing_vf.keys()
606
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
607
"""A VF that records calls, and returns keys in specific order.
609
:ivar calls: A list of the calls made; can be reset at any time by
613
def __init__(self, backing_vf, key_priority):
614
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
616
:param backing_vf: The versioned file to answer all methods.
617
:param key_priority: A dictionary defining what order keys should be
618
returned from an 'unordered' get_record_stream request.
619
Keys with lower priority are returned first, keys not present in
620
the map get an implicit priority of 0, and are returned in
621
lexicographical order.
623
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
624
self._key_priority = key_priority
626
def get_record_stream(self, keys, sort_order, include_delta_closure):
627
self.calls.append(("get_record_stream", list(keys), sort_order,
628
include_delta_closure))
629
if sort_order == 'unordered':
631
return (self._key_priority.get(key, 0), key)
632
# Use a defined order by asking for the keys one-by-one from the
634
for key in sorted(keys, key=sort_key):
635
for record in self._backing_vf.get_record_stream([key],
636
'unordered', include_delta_closure):
639
for record in self._backing_vf.get_record_stream(keys, sort_order,
640
include_delta_closure):
644
class KeyMapper(object):
645
"""KeyMappers map between keys and underlying partitioned storage."""
648
"""Map key to an underlying storage identifier.
650
:param key: A key tuple e.g. ('file-id', 'revision-id').
651
:return: An underlying storage identifier, specific to the partitioning
654
raise NotImplementedError(self.map)
656
def unmap(self, partition_id):
657
"""Map a partitioned storage id back to a key prefix.
659
:param partition_id: The underlying partition id.
660
:return: As much of a key (or prefix) as is derivable from the partition
663
raise NotImplementedError(self.unmap)
666
class ConstantMapper(KeyMapper):
667
"""A key mapper that maps to a constant result."""
669
def __init__(self, result):
670
"""Create a ConstantMapper which will return result for all maps."""
671
self._result = result
674
"""See KeyMapper.map()."""
678
class URLEscapeMapper(KeyMapper):
679
"""Base class for use with transport backed storage.
681
This provides a map and unmap wrapper that respectively url escape and
682
unescape their outputs and inputs.
686
"""See KeyMapper.map()."""
687
return urllib.quote(self._map(key))
689
def unmap(self, partition_id):
690
"""See KeyMapper.unmap()."""
691
return self._unmap(urllib.unquote(partition_id))
694
class PrefixMapper(URLEscapeMapper):
695
"""A key mapper that extracts the first component of a key.
697
This mapper is for use with a transport based backend.
701
"""See KeyMapper.map()."""
704
def _unmap(self, partition_id):
705
"""See KeyMapper.unmap()."""
706
return (partition_id,)
709
class HashPrefixMapper(URLEscapeMapper):
710
"""A key mapper that combines the first component of a key with a hash.
712
This mapper is for use with a transport based backend.
716
"""See KeyMapper.map()."""
717
prefix = self._escape(key[0])
718
return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
720
def _escape(self, prefix):
721
"""No escaping needed here."""
724
def _unmap(self, partition_id):
725
"""See KeyMapper.unmap()."""
726
return (self._unescape(osutils.basename(partition_id)),)
728
def _unescape(self, basename):
729
"""No unescaping needed for HashPrefixMapper."""
733
class HashEscapedPrefixMapper(HashPrefixMapper):
734
"""Combines the escaped first component of a key with a hash.
736
This mapper is for use with a transport based backend.
739
_safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
741
def _escape(self, prefix):
742
"""Turn a key element into a filesystem safe string.
744
This is similar to a plain urllib.quote, except
745
it uses specific safe characters, so that it doesn't
746
have to translate a lot of valid file ids.
748
# @ does not get escaped. This is because it is a valid
749
# filesystem character we use all the time, and it looks
750
# a lot better than seeing %40 all the time.
751
r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
755
def _unescape(self, basename):
756
"""Escaped names are easily unescaped by urlutils."""
757
return urllib.unquote(basename)
760
def make_versioned_files_factory(versioned_file_factory, mapper):
761
"""Create a ThunkedVersionedFiles factory.
763
This will create a callable which when called creates a
764
ThunkedVersionedFiles on a transport, using mapper to access individual
765
versioned files, and versioned_file_factory to create each individual file.
767
def factory(transport):
768
return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
773
class VersionedFiles(object):
774
"""Storage for many versioned files.
776
This object allows a single keyspace for accessing the history graph and
777
contents of named bytestrings.
779
Currently no implementation allows the graph of different key prefixes to
780
intersect, but the API does allow such implementations in the future.
782
The keyspace is expressed via simple tuples. Any instance of VersionedFiles
783
may have a different length key-size, but that size will be constant for
784
all texts added to or retrieved from it. For instance, bzrlib uses
785
instances with a key-size of 2 for storing user files in a repository, with
786
the first element the fileid, and the second the version of that file.
788
The use of tuples allows a single code base to support several different
789
uses with only the mapping logic changing from instance to instance.
792
def add_lines(self, key, parents, lines, parent_texts=None,
793
left_matching_blocks=None, nostore_sha=None, random_id=False,
795
"""Add a text to the store.
797
:param key: The key tuple of the text to add. If the last element is
798
None, a CHK string will be generated during the addition.
799
:param parents: The parents key tuples of the text to add.
800
:param lines: A list of lines. Each line must be a bytestring. And all
801
of them except the last must be terminated with \n and contain no
802
other \n's. The last line may either contain no \n's or a single
803
terminating \n. If the lines list does meet this constraint the add
804
routine may error or may succeed - but you will be unable to read
805
the data back accurately. (Checking the lines have been split
806
correctly is expensive and extremely unlikely to catch bugs so it
807
is not done at runtime unless check_content is True.)
808
:param parent_texts: An optional dictionary containing the opaque
809
representations of some or all of the parents of version_id to
810
allow delta optimisations. VERY IMPORTANT: the texts must be those
811
returned by add_lines or data corruption can be caused.
812
:param left_matching_blocks: a hint about which areas are common
813
between the text and its left-hand-parent. The format is
814
the SequenceMatcher.get_matching_blocks format.
815
:param nostore_sha: Raise ExistingContent and do not add the lines to
816
the versioned file if the digest of the lines matches this.
817
:param random_id: If True a random id has been selected rather than
818
an id determined by some deterministic process such as a converter
819
from a foreign VCS. When True the backend may choose not to check
820
for uniqueness of the resulting key within the versioned file, so
821
this should only be done when the result is expected to be unique
823
:param check_content: If True, the lines supplied are verified to be
824
bytestrings that are correctly formed lines.
825
:return: The text sha1, the number of bytes in the text, and an opaque
826
representation of the inserted version which can be provided
827
back to future add_lines calls in the parent_texts dictionary.
829
raise NotImplementedError(self.add_lines)
831
def add_mpdiffs(self, records):
832
"""Add mpdiffs to this VersionedFile.
834
Records should be iterables of version, parents, expected_sha1,
835
mpdiff. mpdiff should be a MultiParent instance.
838
mpvf = multiparent.MultiMemoryVersionedFile()
840
for version, parent_ids, expected_sha1, mpdiff in records:
841
versions.append(version)
842
mpvf.add_diff(mpdiff, version, parent_ids)
843
needed_parents = set()
844
for version, parent_ids, expected_sha1, mpdiff in records:
845
needed_parents.update(p for p in parent_ids
846
if not mpvf.has_version(p))
847
# It seems likely that adding all the present parents as fulltexts can
848
# easily exhaust memory.
849
chunks_to_lines = osutils.chunks_to_lines
850
for record in self.get_record_stream(needed_parents, 'unordered',
852
if record.storage_kind == 'absent':
854
mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
856
for (key, parent_keys, expected_sha1, mpdiff), lines in\
857
zip(records, mpvf.get_line_list(versions)):
858
if len(parent_keys) == 1:
859
left_matching_blocks = list(mpdiff.get_matching_blocks(0,
860
mpvf.get_diff(parent_keys[0]).num_lines()))
862
left_matching_blocks = None
863
version_sha1, _, version_text = self.add_lines(key,
864
parent_keys, lines, vf_parents,
865
left_matching_blocks=left_matching_blocks)
866
if version_sha1 != expected_sha1:
867
raise errors.VersionedFileInvalidChecksum(version)
868
vf_parents[key] = version_text
870
def annotate(self, key):
871
"""Return a list of (version-key, line) tuples for the text of key.
873
:raise RevisionNotPresent: If the key is not present.
875
raise NotImplementedError(self.annotate)
877
def check(self, progress_bar=None):
878
"""Check this object for integrity."""
879
raise NotImplementedError(self.check)
882
def check_not_reserved_id(version_id):
883
revision.check_not_reserved_id(version_id)
885
def _check_lines_not_unicode(self, lines):
886
"""Check that lines being added to a versioned file are not unicode."""
888
if line.__class__ is not str:
889
raise errors.BzrBadParameterUnicode("lines")
891
def _check_lines_are_lines(self, lines):
892
"""Check that the lines really are full lines without inline EOL."""
894
if '\n' in line[:-1]:
895
raise errors.BzrBadParameterContainsNewline("lines")
897
def get_parent_map(self, keys):
898
"""Get a map of the parents of keys.
900
:param keys: The keys to look up parents for.
901
:return: A mapping from keys to parents. Absent keys are absent from
904
raise NotImplementedError(self.get_parent_map)
906
def get_record_stream(self, keys, ordering, include_delta_closure):
907
"""Get a stream of records for keys.
909
:param keys: The keys to include.
910
:param ordering: Either 'unordered' or 'topological'. A topologically
911
sorted stream has compression parents strictly before their
913
:param include_delta_closure: If True then the closure across any
914
compression parents will be included (in the opaque data).
915
:return: An iterator of ContentFactory objects, each of which is only
916
valid until the iterator is advanced.
918
raise NotImplementedError(self.get_record_stream)
920
def get_sha1s(self, keys):
921
"""Get the sha1's of the texts for the given keys.
923
:param keys: The names of the keys to lookup
924
:return: a dict from key to sha1 digest. Keys of texts which are not
925
present in the store are not present in the returned
928
raise NotImplementedError(self.get_sha1s)
930
has_key = index._has_key_from_parent_map
932
def get_missing_compression_parent_keys(self):
933
"""Return an iterable of keys of missing compression parents.
935
Check this after calling insert_record_stream to find out if there are
936
any missing compression parents. If there are, the records that
937
depend on them are not able to be inserted safely. The precise
938
behaviour depends on the concrete VersionedFiles class in use.
940
Classes that do not support this will raise NotImplementedError.
942
raise NotImplementedError(self.get_missing_compression_parent_keys)
944
def insert_record_stream(self, stream):
945
"""Insert a record stream into this container.
947
:param stream: A stream of records to insert.
949
:seealso VersionedFile.get_record_stream:
951
raise NotImplementedError
953
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
954
"""Iterate over the lines in the versioned files from keys.
956
This may return lines from other keys. Each item the returned
957
iterator yields is a tuple of a line and a text version that that line
958
is present in (not introduced in).
960
Ordering of results is in whatever order is most suitable for the
961
underlying storage format.
963
If a progress bar is supplied, it may be used to indicate progress.
964
The caller is responsible for cleaning up progress bars (because this
968
* Lines are normalised by the underlying store: they will all have \n
970
* Lines are returned in arbitrary order.
972
:return: An iterator over (line, key).
974
raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
977
"""Return a iterable of the keys for all the contained texts."""
978
raise NotImplementedError(self.keys)
980
def make_mpdiffs(self, keys):
981
"""Create multiparent diffs for specified keys."""
982
keys_order = tuple(keys)
983
keys = frozenset(keys)
984
knit_keys = set(keys)
985
parent_map = self.get_parent_map(keys)
986
for parent_keys in parent_map.itervalues():
988
knit_keys.update(parent_keys)
989
missing_keys = keys - set(parent_map)
991
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
992
# We need to filter out ghosts, because we can't diff against them.
993
maybe_ghosts = knit_keys - keys
994
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
995
knit_keys.difference_update(ghosts)
997
chunks_to_lines = osutils.chunks_to_lines
998
for record in self.get_record_stream(knit_keys, 'topological', True):
999
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1000
# line_block_dict = {}
1001
# for parent, blocks in record.extract_line_blocks():
1002
# line_blocks[parent] = blocks
1003
# line_blocks[record.key] = line_block_dict
1005
for key in keys_order:
1007
parents = parent_map[key] or []
1008
# Note that filtering knit_keys can lead to a parent difference
1009
# between the creation and the application of the mpdiff.
1010
parent_lines = [lines[p] for p in parents if p in knit_keys]
1011
if len(parent_lines) > 0:
1012
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1015
left_parent_blocks = None
1016
diffs.append(multiparent.MultiParent.from_lines(target,
1017
parent_lines, left_parent_blocks))
1020
missing_keys = index._missing_keys_from_parent_map
1022
def _extract_blocks(self, version_id, source, target):
1026
class ThunkedVersionedFiles(VersionedFiles):
1027
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1029
This object allows a single keyspace for accessing the history graph and
1030
contents of named bytestrings.
1032
Currently no implementation allows the graph of different key prefixes to
1033
intersect, but the API does allow such implementations in the future.
1036
def __init__(self, transport, file_factory, mapper, is_locked):
1037
"""Create a ThunkedVersionedFiles."""
1038
self._transport = transport
1039
self._file_factory = file_factory
1040
self._mapper = mapper
1041
self._is_locked = is_locked
1043
def add_lines(self, key, parents, lines, parent_texts=None,
1044
left_matching_blocks=None, nostore_sha=None, random_id=False,
1045
check_content=True):
1046
"""See VersionedFiles.add_lines()."""
1047
path = self._mapper.map(key)
1048
version_id = key[-1]
1049
parents = [parent[-1] for parent in parents]
1050
vf = self._get_vf(path)
1053
return vf.add_lines_with_ghosts(version_id, parents, lines,
1054
parent_texts=parent_texts,
1055
left_matching_blocks=left_matching_blocks,
1056
nostore_sha=nostore_sha, random_id=random_id,
1057
check_content=check_content)
1058
except NotImplementedError:
1059
return vf.add_lines(version_id, parents, lines,
1060
parent_texts=parent_texts,
1061
left_matching_blocks=left_matching_blocks,
1062
nostore_sha=nostore_sha, random_id=random_id,
1063
check_content=check_content)
1064
except errors.NoSuchFile:
1065
# parent directory may be missing, try again.
1066
self._transport.mkdir(osutils.dirname(path))
1068
return vf.add_lines_with_ghosts(version_id, parents, lines,
1069
parent_texts=parent_texts,
1070
left_matching_blocks=left_matching_blocks,
1071
nostore_sha=nostore_sha, random_id=random_id,
1072
check_content=check_content)
1073
except NotImplementedError:
1074
return vf.add_lines(version_id, parents, lines,
1075
parent_texts=parent_texts,
1076
left_matching_blocks=left_matching_blocks,
1077
nostore_sha=nostore_sha, random_id=random_id,
1078
check_content=check_content)
1080
def annotate(self, key):
1081
"""Return a list of (version-key, line) tuples for the text of key.
1083
:raise RevisionNotPresent: If the key is not present.
1086
path = self._mapper.map(prefix)
1087
vf = self._get_vf(path)
1088
origins = vf.annotate(key[-1])
1090
for origin, line in origins:
1091
result.append((prefix + (origin,), line))
1094
def check(self, progress_bar=None):
1095
"""See VersionedFiles.check()."""
1096
for prefix, vf in self._iter_all_components():
1099
def get_parent_map(self, keys):
1100
"""Get a map of the parents of keys.
1102
:param keys: The keys to look up parents for.
1103
:return: A mapping from keys to parents. Absent keys are absent from
1106
prefixes = self._partition_keys(keys)
1108
for prefix, suffixes in prefixes.items():
1109
path = self._mapper.map(prefix)
1110
vf = self._get_vf(path)
1111
parent_map = vf.get_parent_map(suffixes)
1112
for key, parents in parent_map.items():
1113
result[prefix + (key,)] = tuple(
1114
prefix + (parent,) for parent in parents)
1117
def _get_vf(self, path):
1118
if not self._is_locked():
1119
raise errors.ObjectNotLocked(self)
1120
return self._file_factory(path, self._transport, create=True,
1121
get_scope=lambda:None)
1123
def _partition_keys(self, keys):
1124
"""Turn keys into a dict of prefix:suffix_list."""
1127
prefix_keys = result.setdefault(key[:-1], [])
1128
prefix_keys.append(key[-1])
1131
def _get_all_prefixes(self):
1132
# Identify all key prefixes.
1133
# XXX: A bit hacky, needs polish.
1134
if type(self._mapper) == ConstantMapper:
1135
paths = [self._mapper.map(())]
1139
for quoted_relpath in self._transport.iter_files_recursive():
1140
path, ext = os.path.splitext(quoted_relpath)
1142
paths = list(relpaths)
1143
prefixes = [self._mapper.unmap(path) for path in paths]
1144
return zip(paths, prefixes)
1146
def get_record_stream(self, keys, ordering, include_delta_closure):
1147
"""See VersionedFiles.get_record_stream()."""
1148
# Ordering will be taken care of by each partitioned store; group keys
1151
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1152
suffixes = [(suffix,) for suffix in suffixes]
1153
for record in vf.get_record_stream(suffixes, ordering,
1154
include_delta_closure):
1155
if record.parents is not None:
1156
record.parents = tuple(
1157
prefix + parent for parent in record.parents)
1158
record.key = prefix + record.key
1161
def _iter_keys_vf(self, keys):
1162
prefixes = self._partition_keys(keys)
1164
for prefix, suffixes in prefixes.items():
1165
path = self._mapper.map(prefix)
1166
vf = self._get_vf(path)
1167
yield prefix, suffixes, vf
1169
def get_sha1s(self, keys):
1170
"""See VersionedFiles.get_sha1s()."""
1172
for prefix,suffixes, vf in self._iter_keys_vf(keys):
1173
vf_sha1s = vf.get_sha1s(suffixes)
1174
for suffix, sha1 in vf_sha1s.iteritems():
1175
sha1s[prefix + (suffix,)] = sha1
1178
def insert_record_stream(self, stream):
1179
"""Insert a record stream into this container.
1181
:param stream: A stream of records to insert.
1183
:seealso VersionedFile.get_record_stream:
1185
for record in stream:
1186
prefix = record.key[:-1]
1187
key = record.key[-1:]
1188
if record.parents is not None:
1189
parents = [parent[-1:] for parent in record.parents]
1192
thunk_record = AdapterFactory(key, parents, record)
1193
path = self._mapper.map(prefix)
1194
# Note that this parses the file many times; we can do better but
1195
# as this only impacts weaves in terms of performance, it is
1197
vf = self._get_vf(path)
1198
vf.insert_record_stream([thunk_record])
1200
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1201
"""Iterate over the lines in the versioned files from keys.
1203
This may return lines from other keys. Each item the returned
1204
iterator yields is a tuple of a line and a text version that that line
1205
is present in (not introduced in).
1207
Ordering of results is in whatever order is most suitable for the
1208
underlying storage format.
1210
If a progress bar is supplied, it may be used to indicate progress.
1211
The caller is responsible for cleaning up progress bars (because this
1215
* Lines are normalised by the underlying store: they will all have \n
1217
* Lines are returned in arbitrary order.
1219
:return: An iterator over (line, key).
1221
for prefix, suffixes, vf in self._iter_keys_vf(keys):
1222
for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
1223
yield line, prefix + (version,)
1225
def _iter_all_components(self):
1226
for path, prefix in self._get_all_prefixes():
1227
yield prefix, self._get_vf(path)
1230
"""See VersionedFiles.keys()."""
1232
for prefix, vf in self._iter_all_components():
1233
for suffix in vf.versions():
1234
result.add(prefix + (suffix,))
1238
class _PlanMergeVersionedFile(VersionedFiles):
1239
"""A VersionedFile for uncommitted and committed texts.
1241
It is intended to allow merges to be planned with working tree texts.
1242
It implements only the small part of the VersionedFiles interface used by
1243
PlanMerge. It falls back to multiple versionedfiles for data not stored in
1244
_PlanMergeVersionedFile itself.
1246
:ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
1247
queried for missing texts.
1250
def __init__(self, file_id):
1251
"""Create a _PlanMergeVersionedFile.
1253
:param file_id: Used with _PlanMerge code which is not yet fully
1254
tuple-keyspace aware.
1256
self._file_id = file_id
1257
# fallback locations
1258
self.fallback_versionedfiles = []
1259
# Parents for locally held keys.
1261
# line data for locally held keys.
1263
# key lookup providers
1264
self._providers = [DictParentsProvider(self._parents)]
1266
def plan_merge(self, ver_a, ver_b, base=None):
1267
"""See VersionedFile.plan_merge"""
1268
from bzrlib.merge import _PlanMerge
1270
return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
1271
old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
1272
new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
1273
return _PlanMerge._subtract_plans(old_plan, new_plan)
1275
def plan_lca_merge(self, ver_a, ver_b, base=None):
1276
from bzrlib.merge import _PlanLCAMerge
1278
new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
1281
old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
1282
return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
1284
def add_lines(self, key, parents, lines):
1285
"""See VersionedFiles.add_lines
1287
Lines are added locally, not to fallback versionedfiles. Also, ghosts
1288
are permitted. Only reserved ids are permitted.
1290
if type(key) is not tuple:
1291
raise TypeError(key)
1292
if not revision.is_reserved_id(key[-1]):
1293
raise ValueError('Only reserved ids may be used')
1295
raise ValueError('Parents may not be None')
1297
raise ValueError('Lines may not be None')
1298
self._parents[key] = tuple(parents)
1299
self._lines[key] = lines
1301
def get_record_stream(self, keys, ordering, include_delta_closure):
1304
if key in self._lines:
1305
lines = self._lines[key]
1306
parents = self._parents[key]
1308
yield ChunkedContentFactory(key, parents, None, lines)
1309
for versionedfile in self.fallback_versionedfiles:
1310
for record in versionedfile.get_record_stream(
1311
pending, 'unordered', True):
1312
if record.storage_kind == 'absent':
1315
pending.remove(record.key)
1319
# report absent entries
1321
yield AbsentContentFactory(key)
1323
def get_parent_map(self, keys):
1324
"""See VersionedFiles.get_parent_map"""
1325
# We create a new provider because a fallback may have been added.
1326
# If we make fallbacks private we can update a stack list and avoid
1327
# object creation thrashing.
1330
if revision.NULL_REVISION in keys:
1331
keys.remove(revision.NULL_REVISION)
1332
result[revision.NULL_REVISION] = ()
1333
self._providers = self._providers[:1] + self.fallback_versionedfiles
1335
_StackedParentsProvider(self._providers).get_parent_map(keys))
1336
for key, parents in result.iteritems():
1338
result[key] = (revision.NULL_REVISION,)
1342
class PlanWeaveMerge(TextMerge):
1343
"""Weave merge that takes a plan as its input.
1345
This exists so that VersionedFile.plan_merge is implementable.
1346
Most callers will want to use WeaveMerge instead.
1349
def __init__(self, plan, a_marker=TextMerge.A_MARKER,
1350
b_marker=TextMerge.B_MARKER):
1351
TextMerge.__init__(self, a_marker, b_marker)
1354
def _merge_struct(self):
1359
def outstanding_struct():
1360
if not lines_a and not lines_b:
1362
elif ch_a and not ch_b:
1365
elif ch_b and not ch_a:
1367
elif lines_a == lines_b:
1370
yield (lines_a, lines_b)
1372
# We previously considered either 'unchanged' or 'killed-both' lines
1373
# to be possible places to resynchronize. However, assuming agreement
1374
# on killed-both lines may be too aggressive. -- mbp 20060324
1375
for state, line in self.plan:
1376
if state == 'unchanged':
1377
# resync and flush queued conflicts changes if any
1378
for struct in outstanding_struct():
1384
if state == 'unchanged':
1387
elif state == 'killed-a':
1389
lines_b.append(line)
1390
elif state == 'killed-b':
1392
lines_a.append(line)
1393
elif state == 'new-a':
1395
lines_a.append(line)
1396
elif state == 'new-b':
1398
lines_b.append(line)
1399
elif state == 'conflicted-a':
1401
lines_a.append(line)
1402
elif state == 'conflicted-b':
1404
lines_b.append(line)
1406
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1407
'killed-base', 'killed-both'):
1408
raise AssertionError(state)
1409
for struct in outstanding_struct():
1413
class WeaveMerge(PlanWeaveMerge):
1414
"""Weave merge that takes a VersionedFile and two versions as its input."""
1416
def __init__(self, versionedfile, ver_a, ver_b,
1417
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1418
plan = versionedfile.plan_merge(ver_a, ver_b)
1419
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1422
class VirtualVersionedFiles(VersionedFiles):
1423
"""Dummy implementation for VersionedFiles that uses other functions for
1424
obtaining fulltexts and parent maps.
1426
This is always on the bottom of the stack and uses string keys
1427
(rather than tuples) internally.
1430
def __init__(self, get_parent_map, get_lines):
1431
"""Create a VirtualVersionedFiles.
1433
:param get_parent_map: Same signature as Repository.get_parent_map.
1434
:param get_lines: Should return lines for specified key or None if
1437
super(VirtualVersionedFiles, self).__init__()
1438
self._get_parent_map = get_parent_map
1439
self._get_lines = get_lines
1441
def check(self, progressbar=None):
1442
"""See VersionedFiles.check.
1444
:note: Always returns True for VirtualVersionedFiles.
1448
def add_mpdiffs(self, records):
1449
"""See VersionedFiles.mpdiffs.
1451
:note: Not implemented for VirtualVersionedFiles.
1453
raise NotImplementedError(self.add_mpdiffs)
1455
def get_parent_map(self, keys):
1456
"""See VersionedFiles.get_parent_map."""
1457
return dict([((k,), tuple([(p,) for p in v]))
1458
for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
1460
def get_sha1s(self, keys):
1461
"""See VersionedFiles.get_sha1s."""
1464
lines = self._get_lines(k)
1465
if lines is not None:
1466
if not isinstance(lines, list):
1467
raise AssertionError
1468
ret[(k,)] = osutils.sha_strings(lines)
1471
def get_record_stream(self, keys, ordering, include_delta_closure):
1472
"""See VersionedFiles.get_record_stream."""
1473
for (k,) in list(keys):
1474
lines = self._get_lines(k)
1475
if lines is not None:
1476
if not isinstance(lines, list):
1477
raise AssertionError
1478
yield ChunkedContentFactory((k,), None,
1479
sha1=osutils.sha_strings(lines),
1482
yield AbsentContentFactory((k,))
1484
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1485
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1486
for i, (key,) in enumerate(keys):
1488
pb.update("Finding changed lines", i, len(keys))
1489
for l in self._get_lines(key):
1493
def network_bytes_to_kind_and_offset(network_bytes):
1494
"""Strip of a record kind from the front of network_bytes.
1496
:param network_bytes: The bytes of a record.
1497
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1499
line_end = network_bytes.find('\n')
1500
storage_kind = network_bytes[:line_end]
1501
return storage_kind, line_end + 1
1504
class NetworkRecordStream(object):
1505
"""A record_stream which reconstitures a serialised stream."""
1507
def __init__(self, bytes_iterator):
1508
"""Create a NetworkRecordStream.
1510
:param bytes_iterator: An iterator of bytes. Each item in this
1511
iterator should have been obtained from a record_streams'
1512
record.get_bytes_as(record.storage_kind) call.
1514
self._bytes_iterator = bytes_iterator
1515
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1516
'knit-delta-gz':knit.knit_network_to_record,
1517
'knit-annotated-ft-gz':knit.knit_network_to_record,
1518
'knit-annotated-delta-gz':knit.knit_network_to_record,
1519
'knit-delta-closure':knit.knit_delta_closure_to_records,
1520
'fulltext':fulltext_network_to_record,
1526
:return: An iterator as per VersionedFiles.get_record_stream().
1528
for bytes in self._bytes_iterator:
1529
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1530
for record in self._kind_factory[storage_kind](
1531
storage_kind, bytes, line_end):
1535
def fulltext_network_to_record(kind, bytes, line_end):
1536
"""Convert a network fulltext record to record."""
1537
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1538
record_meta = bytes[line_end+4:line_end+4+meta_len]
1539
key, parents = bencode.bdecode_as_tuple(record_meta)
1540
if parents == 'nil':
1542
fulltext = bytes[line_end+4+meta_len:]
1543
return [FulltextContentFactory(key, parents, None, fulltext)]
1546
def _length_prefix(bytes):
1547
return struct.pack('!L', len(bytes))
1550
def record_to_fulltext_bytes(record):
1551
if record.parents is None:
1554
parents = record.parents
1555
record_meta = bencode.bencode((record.key, parents))
1556
record_content = record.get_bytes_as('fulltext')
1557
return "fulltext\n%s%s%s" % (
1558
_length_prefix(record_meta), record_meta, record_content)
1561
def sort_groupcompress(parent_map):
1562
"""Sort and group the keys in parent_map into groupcompress order.
1564
groupcompress is defined (currently) as reverse-topological order, grouped
1567
:return: A sorted-list of keys
1569
# gc-optimal ordering is approximately reverse topological,
1570
# properly grouped by file-id.
1572
for item in parent_map.iteritems():
1574
if isinstance(key, str) or len(key) == 1:
1579
per_prefix_map[prefix].append(item)
1581
per_prefix_map[prefix] = [item]
1584
for prefix in sorted(per_prefix_map):
1585
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))