537
522
inventories.append(rev_tree.root_inventory)
538
523
return present, inventories
541
class RevisionStore1(AbstractRevisionStore):
542
"""A RevisionStore that uses the old breezy Repository API.
544
The old API was present until bzr.dev rev 3510.
547
def _load_texts(self, revision_id, entries, text_provider, parents_provider):
548
"""See RevisionStore._load_texts()."""
549
# Add the texts that are not already present
550
tx = self.repo.get_transaction()
552
# This test is *really* slow: over 50% of import time
553
#w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
554
#if ie.revision in w:
556
# Try another way, realising that this assumes that the
557
# version is not already there. In the general case,
558
# a shared repository might already have the revision but
559
# we arguably don't need that check when importing from
561
if ie.revision != revision_id:
564
text_parents = [(file_id, p) for p in parents_provider(file_id)]
565
lines = text_provider(file_id)
566
vfile = self.repo.weave_store.get_weave_or_empty(file_id, tx)
567
import pdb; pdb.set_trace()
568
vfile.add_lines(revision_id, text_parents, lines)
570
def get_file_lines(self, revision_id, file_id):
571
tx = self.repo.get_transaction()
572
w = self.repo.weave_store.get_weave(file_id, tx)
573
return w.get_lines(revision_id)
575
def _add_revision(self, rev, inv):
576
# There's no need to do everything repo.add_revision does and
577
# doing so (since bzr.dev 3392) can be pretty slow for long
578
# delta chains on inventories. Just do the essentials here ...
579
_mod_revision.check_not_reserved_id(rev.revision_id)
580
self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
583
class RevisionStore2(AbstractRevisionStore):
584
"""A RevisionStore that uses the new breezy Repository API."""
586
def _load_texts(self, revision_id, entries, text_provider, parents_provider):
587
"""See RevisionStore._load_texts()."""
525
def _load_texts(self, revision_id, entries, text_provider, parents_provider):
526
"""Load texts to a repository for inventory entries.
528
This method is provided for subclasses to use or override.
530
:param revision_id: the revision identifier
531
:param entries: iterator over the inventory entries
532
:param text_provider: a callable expecting a file_id parameter
533
that returns the text for that file-id
534
:param parents_provider: a callable expecting a file_id parameter
535
that return the list of parent-ids for that file-id
589
538
for ie in entries:
590
539
text_keys[(ie.file_id, ie.revision)] = ie
591
540
text_parent_map = self.repo.texts.get_parent_map(text_keys)
592
541
missing_texts = set(text_keys) - set(text_parent_map)
593
self._load_texts_for_file_rev_ids(missing_texts, text_provider,
596
def _load_texts_for_file_rev_ids(self, file_rev_ids, text_provider,
598
"""Load texts to a repository for file-ids, revision-id tuples.
600
:param file_rev_ids: iterator over the (file_id, revision_id) tuples
601
:param text_provider: a callable expecting a file_id parameter
602
that returns the text for that file-id
603
:param parents_provider: a callable expecting a file_id parameter
604
that return the list of parent-ids for that file-id
606
for file_id, revision_id in file_rev_ids:
542
for file_id, revision_id in missing_texts:
607
543
text_key = (file_id, revision_id)
608
544
text_parents = [(file_id, p) for p in parents_provider(file_id)]
609
545
lines = text_provider(file_id)
624
560
# # delta chains on inventories. Just do the essentials here ...
625
561
# _mod_revision.check_not_reserved_id(rev.revision_id)
626
562
# self.repo._add_revision(rev)
629
class ImportRevisionStore1(RevisionStore1):
630
"""A RevisionStore (old Repository API) optimised for importing.
632
This implementation caches serialised inventory texts and provides
633
fine-grained control over when inventories are stored as fulltexts.
636
def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
638
"""See AbstractRevisionStore.__init__.
640
:param repository: the target repository
641
:param parent_text_to_cache: the number of parent texts to cache
642
:para fulltext_when: if non None, a function to call to decide
643
whether to fulltext the inventory or not. The revision count
644
is passed as a parameter and the result is treated as a boolean.
646
RevisionStore1.__init__(self, repo)
647
self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
648
self.fulltext_when = fulltext_when
649
self.random_ids = random_ids
650
self.revision_count = 0
652
def _add_inventory(self, revision_id, inv, parents, parent_invs):
653
"""See RevisionStore._add_inventory."""
654
# Code taken from breezy.repository.add_inventory
655
assert self.repo.is_in_write_group()
656
_mod_revision.check_not_reserved_id(revision_id)
657
assert inv.revision_id is None or inv.revision_id == revision_id, \
658
"Mismatch between inventory revision" \
659
" id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
660
assert inv.root is not None
661
inv_lines = self.repo._serialise_inventory_to_lines(inv)
662
inv_vf = self.repo.get_inventory_weave()
663
sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
664
revision_id, parents, inv_lines, self.inv_parent_texts)
665
self.inv_parent_texts[revision_id] = parent_text
668
def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
670
"""See Repository._inventory_add_lines()."""
671
# setup parameters used in original code but not this API
672
self.revision_count += 1
673
if self.fulltext_when is not None:
674
delta = not self.fulltext_when(self.revision_count)
677
left_matching_blocks = None
678
random_id = self.random_ids
679
check_content = False
681
# breezy.knit.add_lines() but error checking optimised
682
inv_vf._check_add(version_id, lines, random_id, check_content)
684
####################################################################
685
# breezy.knit._add() but skip checking if fulltext better than delta
686
####################################################################
688
line_bytes = b''.join(lines)
689
digest = osutils.sha_string(line_bytes)
691
for parent in parents:
692
if inv_vf.has_version(parent):
693
present_parents.append(parent)
694
if parent_texts is None:
697
# can only compress against the left most present parent.
699
(len(present_parents) == 0 or
700
present_parents[0] != parents[0])):
703
text_length = len(line_bytes)
706
if not lines[-1].endswith(b'\n'):
707
# copy the contents of lines.
709
options.append(b'no-eol')
710
lines[-1] = lines[-1] + b'\n'
714
# # To speed the extract of texts the delta chain is limited
715
# # to a fixed number of deltas. This should minimize both
716
# # I/O and the time spend applying deltas.
717
# delta = inv_vf._check_should_delta(present_parents)
719
assert isinstance(version_id, str)
720
content = inv_vf.factory.make(lines, version_id)
721
if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
722
# Merge annotations from parent texts if needed.
723
delta_hunks = inv_vf._merge_annotations(content, present_parents,
724
parent_texts, delta, inv_vf.factory.annotated,
725
left_matching_blocks)
728
options.append(b'line-delta')
729
store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
730
size, bytes = inv_vf._data._record_to_data(version_id, digest,
733
options.append(b'fulltext')
734
# isinstance is slower and we have no hierarchy.
735
if inv_vf.factory.__class__ == knit.KnitPlainFactory:
736
# Use the already joined bytes saving iteration time in
738
size, bytes = inv_vf._data._record_to_data(version_id, digest,
741
# get mixed annotation + content and feed it into the
743
store_lines = inv_vf.factory.lower_fulltext(content)
744
size, bytes = inv_vf._data._record_to_data(version_id, digest,
747
access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
748
inv_vf._index.add_versions(
749
((version_id, options, access_memo, parents),),
751
return digest, text_length, content