/brz/remove-bazaar : contents of revisionloader.py at revision 0.64.84

: (revision 0.64.84)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""Parameterised loading of revisions into a repository."""


from bzrlib import errors, knit, lru_cache, osutils
from bzrlib import revision as _mod_revision


class AbstractRevisionLoader(object):
    # NOTE: This is effectively bzrlib.repository._install_revision
    # refactored to be a class. When importing, we want more flexibility
    # in how previous revisions are cached, data is feed in, etc.

    def __init__(self, repo):
        """An object responsible for loading revisions into a repository.

        NOTE: Repository locking is not managed by this class. Clients
        should take a write lock, call load() multiple times, then release
        the lock.

        :param repository: the target repository
        """
        self.repo = repo

    def load(self, rev, inv, signature, text_provider,
        inventories_provider=None):
        """Load a revision into a repository.

        :param rev: the Revision
        :param inv: the inventory
        :param signature: signing information
        :param text_provider: a callable expecting a file_id parameter
            that returns the text for that file-id
        :param inventories_provider: a callable expecting a repository and
            a list of revision-ids, that returns:
              * the list of revision-ids present in the repository
              * the list of inventories for the revision-id's,
                including an empty inventory for the missing revisions
            If None, a default implementation is provided.
        """
        if inventories_provider is None:
            inventories_provider = self._default_inventories_provider
        present_parents, parent_invs = inventories_provider(rev.parent_ids)
        self._load_texts(rev.revision_id, inv.iter_entries(), parent_invs,
            text_provider)
        try:
            rev.inventory_sha1 = self._add_inventory(rev.revision_id,
                inv, present_parents)
        except errors.RevisionAlreadyPresent:
            pass
        if signature is not None:
            self.repo.add_signature_text(rev.revision_id, signature)
        self._add_revision(rev, inv)

    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
        """Load texts to a repository for inventory entries.
        
        This method is provided for subclasses to use or override.

        :param revision_id: the revision identifier
        :param entries: iterator over the inventory entries
        :param parent_inv: the parent inventories
        :param text_provider: a callable expecting a file_id parameter
            that returns the text for that file-id
        """
        raise NotImplementedError(self._load_texts)

    def _add_inventory(self, revision_id, inv, parents):
        """Add the inventory inv to the repository as revision_id.
        
        :param parents: The revision ids of the parents that revision_id
                        is known to have and are in the repository already.

        :returns: The validator(which is a sha1 digest, though what is sha'd is
            repository format specific) of the serialized inventory.
        """
        return self.repo.add_inventory(revision_id, inv, parents)

    def _add_revision(self, rev, inv):
        """Add a revision and its inventory to a repository.

        :param rev: the Revision
        :param inv: the inventory
        """
        repo.add_revision(rev.revision_id, rev, inv)

    def _default_inventories_provider(self, revision_ids):
        """An inventories provider that queries the repository."""
        present = []
        inventories = []
        for revision_id in revision_ids:
            if self.repo.has_revision(revision_id):
                present.append(revision_id)
                rev_tree = self.repo.revision_tree(revision_id)
            else:
                rev_tree = self.repo.revision_tree(None)
            inventories.append(rev_tree.inventory)
        return present, inventories


class RevisionLoader1(AbstractRevisionLoader):
    """A RevisionLoader that uses the old bzrlib Repository API.
    
    The old API was present until bzr.dev rev 3510.
    """

    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
        """See RevisionLoader._load_texts()."""
        # Backwards compatibility hack: skip the root id.
        if not self.repo.supports_rich_root():
            path, root = entries.next()
            if root.revision != revision_id:
                raise errors.IncompatibleRevision(repr(self.repo))
        # Add the texts that are not already present
        tx = self.repo.get_transaction()
        for path, ie in entries:
            # This test is *really* slow: over 50% of import time
            #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
            #if ie.revision in w:
            #    continue
            # Try another way, realising that this assumes that the
            # version is not already there. In the general case,
            # a shared repository might already have the revision but
            # we arguably don't need that check when importing from
            # a foreign system.
            if ie.revision != revision_id:
                continue
            text_parents = []
            for parent_inv in parent_invs:
                if ie.file_id not in parent_inv:
                    continue
                parent_id = parent_inv[ie.file_id].revision
                if parent_id in text_parents:
                    continue
                text_parents.append(parent_id)
            lines = text_provider(ie.file_id)
            vfile = self.repo.weave_store.get_weave_or_empty(ie.file_id,  tx)
            vfile.add_lines(revision_id, text_parents, lines)

    def _add_revision(self, rev, inv):
        # There's no need to do everything repo.add_revision does and
        # doing so (since bzr.dev 3392) can be pretty slow for long
        # delta chains on inventories. Just do the essentials here ...
        _mod_revision.check_not_reserved_id(rev.revision_id)
        self.repo._revision_store.add_revision(rev, self.repo.get_transaction())


class RevisionLoader2(AbstractRevisionLoader):
    """A RevisionLoader that uses the new bzrlib Repository API."""

    def _load_texts(self, revision_id, entries, parent_invs, text_provider):
        """See RevisionLoader._load_texts()."""
        # Backwards compatibility hack: skip the root id.
        if not self.repo.supports_rich_root():
            path, root = entries.next()
            if root.revision != revision_id:
                raise errors.IncompatibleRevision(repr(self.repo))
        text_keys = {}
        for path, ie in entries:
            text_keys[(ie.file_id, ie.revision)] = ie
        text_parent_map = self.repo.texts.get_parent_map(text_keys)
        missing_texts = set(text_keys) - set(text_parent_map)
        # Add the texts that are not already present
        for text_key in missing_texts:
            ie = text_keys[text_key]
            text_parents = []
            for parent_inv in parent_invs:
                if ie.file_id not in parent_inv:
                    continue
                parent_id = parent_inv[ie.file_id].revision
                if parent_id in text_parents:
                    continue
                text_parents.append((ie.file_id, parent_id))
            lines = text_provider(ie.file_id)
            self.repo.texts.add_lines(text_key, text_parents, lines)

    def _add_revision(self, rev, inv):
        # There's no need to do everything repo.add_revision does and
        # doing so (since bzr.dev 3392) can be pretty slow for long
        # delta chains on inventories. Just do the essentials here ...
        _mod_revision.check_not_reserved_id(rev.revision_id)
        self.repo._add_revision(rev)
 

class ImportRevisionLoader1(RevisionLoader1):
    """A RevisionLoader (old Repository API) optimised for importing.

    This implementation caches serialised inventory texts and provides
    fine-grained control over when inventories are stored as fulltexts.
    """

    def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
        random_ids=True):
        """See AbstractRevisionLoader.__init__.

        :param repository: the target repository
        :param parent_text_to_cache: the number of parent texts to cache
        :para fulltext_when: if non None, a function to call to decide
          whether to fulltext the inventory or not. The revision count
          is passed as a parameter and the result is treated as a boolean.
        """
        RevisionLoader1.__init__(self, repo)
        self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
        self.fulltext_when = fulltext_when
        self.random_ids = random_ids
        self.revision_count = 0

    def _add_inventory(self, revision_id, inv, parents):
        """See RevisionLoader._add_inventory."""
        # Code taken from bzrlib.repository.add_inventory
        assert self.repo.is_in_write_group()
        _mod_revision.check_not_reserved_id(revision_id)
        assert inv.revision_id is None or inv.revision_id == revision_id, \
            "Mismatch between inventory revision" \
            " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
        assert inv.root is not None
        inv_lines = self.repo._serialise_inventory_to_lines(inv)
        inv_vf = self.repo.get_inventory_weave()
        sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
            revision_id, parents, inv_lines, self.inv_parent_texts)
        self.inv_parent_texts[revision_id] = parent_text
        return sha1

    def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
            parent_texts):
        """See Repository._inventory_add_lines()."""
        # setup parameters used in original code but not this API
        self.revision_count += 1
        if self.fulltext_when is not None:
            delta = not self.fulltext_when(self.revision_count)
        else:
            delta = inv_vf.delta
        left_matching_blocks = None
        random_id = self.random_ids
        check_content = False

        # bzrlib.knit.add_lines() but error checking optimised
        inv_vf._check_add(version_id, lines, random_id, check_content)

        ####################################################################
        # bzrlib.knit._add() but skip checking if fulltext better than delta
        ####################################################################

        line_bytes = ''.join(lines)
        digest = osutils.sha_string(line_bytes)
        present_parents = []
        for parent in parents:
            if inv_vf.has_version(parent):
                present_parents.append(parent)
        if parent_texts is None:
            parent_texts = {}

        # can only compress against the left most present parent.
        if (delta and
            (len(present_parents) == 0 or
             present_parents[0] != parents[0])):
            delta = False

        text_length = len(line_bytes)
        options = []
        if lines:
            if lines[-1][-1] != '\n':
                # copy the contents of lines.
                lines = lines[:]
                options.append('no-eol')
                lines[-1] = lines[-1] + '\n'
                line_bytes += '\n'

        #if delta:
        #    # To speed the extract of texts the delta chain is limited
        #    # to a fixed number of deltas.  This should minimize both
        #    # I/O and the time spend applying deltas.
        #    delta = inv_vf._check_should_delta(present_parents)

        assert isinstance(version_id, str)
        content = inv_vf.factory.make(lines, version_id)
        if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
            # Merge annotations from parent texts if needed.
            delta_hunks = inv_vf._merge_annotations(content, present_parents,
                parent_texts, delta, inv_vf.factory.annotated,
                left_matching_blocks)

        if delta:
            options.append('line-delta')
            store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
            size, bytes = inv_vf._data._record_to_data(version_id, digest,
                store_lines)
        else:
            options.append('fulltext')
            # isinstance is slower and we have no hierarchy.
            if inv_vf.factory.__class__ == knit.KnitPlainFactory:
                # Use the already joined bytes saving iteration time in
                # _record_to_data.
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
                    lines, [line_bytes])
            else:
                # get mixed annotation + content and feed it into the
                # serialiser.
                store_lines = inv_vf.factory.lower_fulltext(content)
                size, bytes = inv_vf._data._record_to_data(version_id, digest,
                    store_lines)

        access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
        inv_vf._index.add_versions(
            ((version_id, options, access_memo, parents),),
            random_id=random_id)
        return digest, text_length, content


class ImportRevisionLoader2(RevisionLoader2):
    """A RevisionLoader (new Repository API) optimised for importing.

    This implementation caches serialised inventory texts.
    Fine-grained control over when inventories are stored as fulltexts
    IS PLANNED LATER.
    """

    def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
        random_ids=True):
        """See AbstractRevisionLoader.__init__.

        :param repository: the target repository
        :param parent_text_to_cache: the number of parent texts to cache
        :para fulltext_when: if non None, a function to call to decide
          whether to fulltext the inventory or not. The revision count
          is passed as a parameter and the result is treated as a boolean.
        """
        RevisionLoader2.__init__(self, repo)
        self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
        self.fulltext_when = fulltext_when
        self.random_ids = random_ids
        self.revision_count = 0

    def _add_inventory(self, revision_id, inv, parents):
        """See RevisionLoader._add_inventory."""
        # Code taken from bzrlib.repository.add_inventory
        repo = self.repo
        if not repo.is_in_write_group():
            raise AssertionError("%r not in write group" % (repo,))
        _mod_revision.check_not_reserved_id(revision_id)
        if not (inv.revision_id is None or inv.revision_id == revision_id):
            raise AssertionError(
                "Mismatch between inventory revision"
                " id and insertion revid (%r, %r)"
                % (inv.revision_id, revision_id))
        if inv.root is None:
            raise AssertionError()
        inv_lines = repo._serialise_inventory_to_lines(inv)
        parents = [(parent,) for parent in parents]
        sha1, num_bytes, parent_text = repo.inventories.add_lines(
            (revision_id,), parents, inv_lines, check_content=False)
        self.inv_parent_texts[revision_id] = parent_text
        return sha1

0.64.5 by Ian Clatworthy first cut at generic processing method	1	# Copyright (C) 2008 Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	"""Parameterised loading of revisions into a repository."""
	18
	19
0.64.49 by Ian Clatworthy skip check re fulltext storage better than delta for inventories when in experimental mode	20	from bzrlib import errors, knit, lru_cache, osutils
0.64.44 by Ian Clatworthy smart caching of serialised inventories	21	from bzrlib import revision as _mod_revision
0.64.6 by Ian Clatworthy generic processing method working for one revision in one branch	22
	23
0.64.79 by Ian Clatworthy support new Repository API	24	class AbstractRevisionLoader(object):
0.64.6 by Ian Clatworthy generic processing method working for one revision in one branch	25	# NOTE: This is effectively bzrlib.repository._install_revision
	26	# refactored to be a class. When importing, we want more flexibility
	27	# in how previous revisions are cached, data is feed in, etc.
0.64.5 by Ian Clatworthy first cut at generic processing method	28
0.64.48 by Ian Clatworthy one revision loader instance	29	def __init__(self, repo):
0.64.5 by Ian Clatworthy first cut at generic processing method	30	"""An object responsible for loading revisions into a repository.
	31
	32	NOTE: Repository locking is not managed by this class. Clients
	33	should take a write lock, call load() multiple times, then release
	34	the lock.
	35
	36	:param repository: the target repository
0.64.48 by Ian Clatworthy one revision loader instance	37	"""
	38	self.repo = repo
	39
	40	def load(self, rev, inv, signature, text_provider,
	41	inventories_provider=None):
	42	"""Load a revision into a repository.
	43
	44	:param rev: the Revision
	45	:param inv: the inventory
	46	:param signature: signing information
	47	:param text_provider: a callable expecting a file_id parameter
	48	that returns the text for that file-id
0.64.5 by Ian Clatworthy first cut at generic processing method	49	:param inventories_provider: a callable expecting a repository and
	50	a list of revision-ids, that returns:
	51	* the list of revision-ids present in the repository
	52	* the list of inventories for the revision-id's,
	53	including an empty inventory for the missing revisions
	54	If None, a default implementation is provided.
	55	"""
0.64.48 by Ian Clatworthy one revision loader instance	56	if inventories_provider is None:
	57	inventories_provider = self._default_inventories_provider
	58	present_parents, parent_invs = inventories_provider(rev.parent_ids)
0.64.6 by Ian Clatworthy generic processing method working for one revision in one branch	59	self._load_texts(rev.revision_id, inv.iter_entries(), parent_invs,
0.64.5 by Ian Clatworthy first cut at generic processing method	60	text_provider)
	61	try:
0.64.44 by Ian Clatworthy smart caching of serialised inventories	62	rev.inventory_sha1 = self._add_inventory(rev.revision_id,
0.64.6 by Ian Clatworthy generic processing method working for one revision in one branch	63	inv, present_parents)
0.64.5 by Ian Clatworthy first cut at generic processing method	64	except errors.RevisionAlreadyPresent:
	65	pass
	66	if signature is not None:
0.64.79 by Ian Clatworthy support new Repository API	67	self.repo.add_signature_text(rev.revision_id, signature)
0.64.79 by Ian Clatworthy support new Repository API	68	self._add_revision(rev, inv)
0.64.5 by Ian Clatworthy first cut at generic processing method	69
	70	def _load_texts(self, revision_id, entries, parent_invs, text_provider):
	71	"""Load texts to a repository for inventory entries.
	72
	73	This method is provided for subclasses to use or override.
	74
	75	:param revision_id: the revision identifier
	76	:param entries: iterator over the inventory entries
	77	:param parent_inv: the parent inventories
	78	:param text_provider: a callable expecting a file_id parameter
	79	that returns the text for that file-id
	80	"""
0.64.79 by Ian Clatworthy support new Repository API	81	raise NotImplementedError(self._load_texts)
0.64.5 by Ian Clatworthy first cut at generic processing method	82
0.64.44 by Ian Clatworthy smart caching of serialised inventories	83	def _add_inventory(self, revision_id, inv, parents):
	84	"""Add the inventory inv to the repository as revision_id.
	85
	86	:param parents: The revision ids of the parents that revision_id
	87	is known to have and are in the repository already.
	88
	89	:returns: The validator(which is a sha1 digest, though what is sha'd is
	90	repository format specific) of the serialized inventory.
	91	"""
	92	return self.repo.add_inventory(revision_id, inv, parents)
	93
0.64.79 by Ian Clatworthy support new Repository API	94	def _add_revision(self, rev, inv):
	95	"""Add a revision and its inventory to a repository.
	96
	97	:param rev: the Revision
	98	:param inv: the inventory
	99	"""
	100	repo.add_revision(rev.revision_id, rev, inv)
	101
0.64.5 by Ian Clatworthy first cut at generic processing method	102	def _default_inventories_provider(self, revision_ids):
	103	"""An inventories provider that queries the repository."""
	104	present = []
	105	inventories = []
	106	for revision_id in revision_ids:
	107	if self.repo.has_revision(revision_id):
	108	present.append(revision_id)
	109	rev_tree = self.repo.revision_tree(revision_id)
	110	else:
	111	rev_tree = self.repo.revision_tree(None)
	112	inventories.append(rev_tree.inventory)
	113	return present, inventories
0.64.44 by Ian Clatworthy smart caching of serialised inventories	114
	115
0.64.79 by Ian Clatworthy support new Repository API	116	class RevisionLoader1(AbstractRevisionLoader):
	117	"""A RevisionLoader that uses the old bzrlib Repository API.
	118
	119	The old API was present until bzr.dev rev 3510.
	120	"""
	121
	122	def _load_texts(self, revision_id, entries, parent_invs, text_provider):
	123	"""See RevisionLoader._load_texts()."""
	124	# Backwards compatibility hack: skip the root id.
	125	if not self.repo.supports_rich_root():
	126	path, root = entries.next()
	127	if root.revision != revision_id:
	128	raise errors.IncompatibleRevision(repr(self.repo))
	129	# Add the texts that are not already present
	130	tx = self.repo.get_transaction()
	131	for path, ie in entries:
	132	# This test is really slow: over 50% of import time
	133	#w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
	134	#if ie.revision in w:
	135	# continue
	136	# Try another way, realising that this assumes that the
	137	# version is not already there. In the general case,
	138	# a shared repository might already have the revision but
	139	# we arguably don't need that check when importing from
	140	# a foreign system.
	141	if ie.revision != revision_id:
	142	continue
	143	text_parents = []
	144	for parent_inv in parent_invs:
	145	if ie.file_id not in parent_inv:
	146	continue
	147	parent_id = parent_inv[ie.file_id].revision
	148	if parent_id in text_parents:
	149	continue
	150	text_parents.append(parent_id)
	151	lines = text_provider(ie.file_id)
	152	vfile = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
	153	vfile.add_lines(revision_id, text_parents, lines)
	154
	155	def _add_revision(self, rev, inv):
	156	# There's no need to do everything repo.add_revision does and
	157	# doing so (since bzr.dev 3392) can be pretty slow for long
	158	# delta chains on inventories. Just do the essentials here ...
	159	_mod_revision.check_not_reserved_id(rev.revision_id)
	160	self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
	161
	162
	163	class RevisionLoader2(AbstractRevisionLoader):
	164	"""A RevisionLoader that uses the new bzrlib Repository API."""
	165
	166	def _load_texts(self, revision_id, entries, parent_invs, text_provider):
	167	"""See RevisionLoader._load_texts()."""
	168	# Backwards compatibility hack: skip the root id.
	169	if not self.repo.supports_rich_root():
	170	path, root = entries.next()
	171	if root.revision != revision_id:
	172	raise errors.IncompatibleRevision(repr(self.repo))
	173	text_keys = {}
	174	for path, ie in entries:
	175	text_keys[(ie.file_id, ie.revision)] = ie
	176	text_parent_map = self.repo.texts.get_parent_map(text_keys)
	177	missing_texts = set(text_keys) - set(text_parent_map)
	178	# Add the texts that are not already present
	179	for text_key in missing_texts:
180	ie = text_keys[text_key]
181	text_parents = []
182	for parent_inv in parent_invs:
183	if ie.file_id not in parent_inv:
184	continue
185	parent_id = parent_inv[ie.file_id].revision
186	if parent_id in text_parents:
187	continue
188	text_parents.append((ie.file_id, parent_id))
189	lines = text_provider(ie.file_id)
190	self.repo.texts.add_lines(text_key, text_parents, lines)
191
192	def _add_revision(self, rev, inv):
193	# There's no need to do everything repo.add_revision does and
194	# doing so (since bzr.dev 3392) can be pretty slow for long
195	# delta chains on inventories. Just do the essentials here ...
196	_mod_revision.check_not_reserved_id(rev.revision_id)
197	self.repo._add_revision(rev)
198
199
200	class ImportRevisionLoader1(RevisionLoader1):
201	"""A RevisionLoader (old Repository API) optimised for importing.
202
203	This implementation caches serialised inventory texts and provides
204	fine-grained control over when inventories are stored as fulltexts.
205	"""
206
207	def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
208	random_ids=True):
209	"""See AbstractRevisionLoader.__init__.
0.64.48 by Ian Clatworthy one revision loader instance	210
	211	:param repository: the target repository
	212	:param parent_text_to_cache: the number of parent texts to cache
0.64.79 by Ian Clatworthy support new Repository API	213	:para fulltext_when: if non None, a function to call to decide
	214	whether to fulltext the inventory or not. The revision count
	215	is passed as a parameter and the result is treated as a boolean.
0.64.48 by Ian Clatworthy one revision loader instance	216	"""
0.64.79 by Ian Clatworthy support new Repository API	217	RevisionLoader1.__init__(self, repo)
0.64.48 by Ian Clatworthy one revision loader instance	218	self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
0.64.79 by Ian Clatworthy support new Repository API	219	self.fulltext_when = fulltext_when
0.64.49 by Ian Clatworthy skip check re fulltext storage better than delta for inventories when in experimental mode	220	self.random_ids = random_ids
0.64.79 by Ian Clatworthy support new Repository API	221	self.revision_count = 0
0.64.44 by Ian Clatworthy smart caching of serialised inventories	222
	223	def _add_inventory(self, revision_id, inv, parents):
	224	"""See RevisionLoader._add_inventory."""
	225	# Code taken from bzrlib.repository.add_inventory
	226	assert self.repo.is_in_write_group()
	227	_mod_revision.check_not_reserved_id(revision_id)
	228	assert inv.revision_id is None or inv.revision_id == revision_id, \
	229	"Mismatch between inventory revision" \
	230	" id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
	231	assert inv.root is not None
	232	inv_lines = self.repo._serialise_inventory_to_lines(inv)
	233	inv_vf = self.repo.get_inventory_weave()
0.64.49 by Ian Clatworthy skip check re fulltext storage better than delta for inventories when in experimental mode	234	sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
	235	revision_id, parents, inv_lines, self.inv_parent_texts)
	236	self.inv_parent_texts[revision_id] = parent_text
	237	return sha1
	238
	239	def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
	240	parent_texts):
	241	"""See Repository._inventory_add_lines()."""
	242	# setup parameters used in original code but not this API
	243	self.revision_count += 1
0.64.77 by Ian Clatworthy add inv-fulltext option and improve speed	244	if self.fulltext_when is not None:
	245	delta = not self.fulltext_when(self.revision_count)
0.64.49 by Ian Clatworthy skip check re fulltext storage better than delta for inventories when in experimental mode	246	else:
	247	delta = inv_vf.delta
	248	left_matching_blocks = None
	249	random_id = self.random_ids
	250	check_content = False
	251
	252	# bzrlib.knit.add_lines() but error checking optimised
	253	inv_vf._check_add(version_id, lines, random_id, check_content)
	254
	255	####################################################################
	256	# bzrlib.knit._add() but skip checking if fulltext better than delta
	257	####################################################################
	258
	259	line_bytes = ''.join(lines)
	260	digest = osutils.sha_string(line_bytes)
	261	present_parents = []
	262	for parent in parents:
	263	if inv_vf.has_version(parent):
	264	present_parents.append(parent)
	265	if parent_texts is None:
	266	parent_texts = {}
	267
	268	# can only compress against the left most present parent.
	269	if (delta and
	270	(len(present_parents) == 0 or
	271	present_parents[0] != parents[0])):
	272	delta = False
	273
	274	text_length = len(line_bytes)
	275	options = []
	276	if lines:
	277	if lines[-1][-1] != '\n':
	278	# copy the contents of lines.
	279	lines = lines[:]
	280	options.append('no-eol')
	281	lines[-1] = lines[-1] + '\n'
	282	line_bytes += '\n'
	283
	284	#if delta:
	285	# # To speed the extract of texts the delta chain is limited
	286	# # to a fixed number of deltas. This should minimize both
	287	# # I/O and the time spend applying deltas.
	288	# delta = inv_vf._check_should_delta(present_parents)
	289
	290	assert isinstance(version_id, str)
	291	content = inv_vf.factory.make(lines, version_id)
	292	if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
	293	# Merge annotations from parent texts if needed.
	294	delta_hunks = inv_vf._merge_annotations(content, present_parents,
	295	parent_texts, delta, inv_vf.factory.annotated,
	296	left_matching_blocks)
	297
	298	if delta:
	299	options.append('line-delta')
	300	store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
	301	size, bytes = inv_vf._data._record_to_data(version_id, digest,
	302	store_lines)
	303	else:
	304	options.append('fulltext')
	305	# isinstance is slower and we have no hierarchy.
	306	if inv_vf.factory.__class__ == knit.KnitPlainFactory:
	307	# Use the already joined bytes saving iteration time in
	308	# _record_to_data.
	309	size, bytes = inv_vf._data._record_to_data(version_id, digest,
310	lines, [line_bytes])
311	else:
312	# get mixed annotation + content and feed it into the
313	# serialiser.
314	store_lines = inv_vf.factory.lower_fulltext(content)
315	size, bytes = inv_vf._data._record_to_data(version_id, digest,
316	store_lines)
317
318	access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
319	inv_vf._index.add_versions(
320	((version_id, options, access_memo, parents),),
321	random_id=random_id)
322	return digest, text_length, content
0.64.79 by Ian Clatworthy support new Repository API	323
	324
	325	class ImportRevisionLoader2(RevisionLoader2):
	326	"""A RevisionLoader (new Repository API) optimised for importing.
	327
	328	This implementation caches serialised inventory texts.
	329	Fine-grained control over when inventories are stored as fulltexts
	330	IS PLANNED LATER.
	331	"""
	332
	333	def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
	334	random_ids=True):
	335	"""See AbstractRevisionLoader.__init__.
	336
	337	:param repository: the target repository
	338	:param parent_text_to_cache: the number of parent texts to cache
	339	:para fulltext_when: if non None, a function to call to decide
	340	whether to fulltext the inventory or not. The revision count
	341	is passed as a parameter and the result is treated as a boolean.
	342	"""
	343	RevisionLoader2.__init__(self, repo)
	344	self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
	345	self.fulltext_when = fulltext_when
	346	self.random_ids = random_ids
	347	self.revision_count = 0
	348
	349	def _add_inventory(self, revision_id, inv, parents):
	350	"""See RevisionLoader._add_inventory."""
	351	# Code taken from bzrlib.repository.add_inventory
	352	repo = self.repo
	353	if not repo.is_in_write_group():
	354	raise AssertionError("%r not in write group" % (repo,))
	355	_mod_revision.check_not_reserved_id(revision_id)
	356	if not (inv.revision_id is None or inv.revision_id == revision_id):
	357	raise AssertionError(
	358	"Mismatch between inventory revision"
	359	" id and insertion revid (%r, %r)"
	360	% (inv.revision_id, revision_id))
	361	if inv.root is None:
	362	raise AssertionError()
	363	inv_lines = repo._serialise_inventory_to_lines(inv)
	364	parents = [(parent,) for parent in parents]
	365	sha1, num_bytes, parent_text = repo.inventories.add_lines(
	366	(revision_id,), parents, inv_lines, check_content=False)
	367	self.inv_parent_texts[revision_id] = parent_text
	368	return sha1