/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
15
16
"""CommitHandlers that build and save revisions & their inventories."""
17
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
18
from __future__ import absolute_import
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
19
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
20
from ... import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
21
    debug,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
22
    errors,
23
    generate_ids,
24
    osutils,
25
    revision,
26
    )
6670.4.3 by Jelmer Vernooij
Fix more imports.
27
from ...bzr import (
28
    inventory,
6670.4.10 by Jelmer Vernooij
Move serializer to bzr.
29
    serializer,
6670.4.3 by Jelmer Vernooij
Fix more imports.
30
    )
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
31
from ...trace import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
32
    mutter,
33
    note,
34
    warning,
35
    )
0.123.2 by Jelmer Vernooij
Split out fastimport, import it from the system.
36
from fastimport import (
0.123.1 by Jelmer Vernooij
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
37
    helpers,
38
    processor,
39
    )
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
40
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
41
from .helpers import (
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
42
    mode_to_kind,
43
    )
44
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
45
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
46
_serializer_handles_escaping = hasattr(serializer.Serializer,
47
    'squashes_xml_invalid_characters')
48
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
49
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
50
def copy_inventory(inv):
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
51
    entries = inv.iter_entries_by_dir()
0.64.319 by Jelmer Vernooij
fix typo.
52
    inv = inventory.Inventory(None, inv.revision_id)
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
53
    for path, inv_entry in entries:
54
        inv.add(inv_entry.copy())
55
    return inv
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
56
57
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
58
class GenericCommitHandler(processor.CommitHandler):
59
    """Base class for Bazaar CommitHandlers."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
60
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
61
    def __init__(self, command, cache_mgr, rev_store, verbose=False,
62
        prune_empty_dirs=True):
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
63
        super(GenericCommitHandler, self).__init__(command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
64
        self.cache_mgr = cache_mgr
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
65
        self.rev_store = rev_store
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
66
        self.verbose = verbose
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
67
        self.branch_ref = command.ref
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
68
        self.prune_empty_dirs = prune_empty_dirs
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
69
        # This tracks path->file-id for things we're creating this commit.
70
        # If the same path is created multiple times, we need to warn the
71
        # user and add it just once.
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
72
        # If a path is added then renamed or copied, we need to handle that.
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
73
        self._new_file_ids = {}
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
74
        # This tracks path->file-id for things we're modifying this commit.
75
        # If a path is modified then renamed or copied, we need the make
76
        # sure we grab the new content.
77
        self._modified_file_ids = {}
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
78
        # This tracks the paths for things we're deleting this commit.
79
        # If the same path is added or the destination of a rename say,
80
        # then a fresh file-id is required.
81
        self._paths_deleted_this_commit = set()
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
82
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
83
    def mutter(self, msg, *args):
84
        """Output a mutter but add context."""
85
        msg = "%s (%s)" % (msg, self.command.id)
86
        mutter(msg, *args)
87
88
    def debug(self, msg, *args):
89
        """Output a mutter if the appropriate -D option was given."""
90
        if "fast-import" in debug.debug_flags:
91
            msg = "%s (%s)" % (msg, self.command.id)
92
            mutter(msg, *args)
93
94
    def note(self, msg, *args):
95
        """Output a note but add context."""
96
        msg = "%s (%s)" % (msg, self.command.id)
97
        note(msg, *args)
98
99
    def warning(self, msg, *args):
100
        """Output a warning but add context."""
101
        msg = "%s (%s)" % (msg, self.command.id)
102
        warning(msg, *args)
103
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
104
    def pre_process_files(self):
105
        """Prepare for committing."""
106
        self.revision_id = self.gen_revision_id()
107
        # cache of texts for this commit, indexed by file-id
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
108
        self.data_for_commit = {}
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
109
        #if self.rev_store.expects_rich_root():
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
110
        self.data_for_commit[inventory.ROOT_ID] = []
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
111
112
        # Track the heads and get the real parent list
0.123.6 by Jelmer Vernooij
Split out reftracker.
113
        parents = self.cache_mgr.reftracker.track_heads(self.command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
114
115
        # Convert the parent commit-ids to bzr revision-ids
116
        if parents:
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
117
            self.parents = [self.cache_mgr.lookup_committish(p)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
118
                for p in parents]
119
        else:
120
            self.parents = []
121
        self.debug("%s id: %s, parents: %s", self.command.id,
122
            self.revision_id, str(self.parents))
123
0.85.2 by Ian Clatworthy
improve per-file graph generation
124
        # Tell the RevisionStore we're starting a new commit
125
        self.revision = self.build_revision()
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
126
        self.parent_invs = [self.get_inventory(p) for p in self.parents]
0.85.2 by Ian Clatworthy
improve per-file graph generation
127
        self.rev_store.start_new_revision(self.revision, self.parents,
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
128
            self.parent_invs)
0.85.2 by Ian Clatworthy
improve per-file graph generation
129
130
        # cache of per-file parents for this commit, indexed by file-id
131
        self.per_file_parents_for_commit = {}
132
        if self.rev_store.expects_rich_root():
0.64.160 by Ian Clatworthy
make per-file parents tuples and fix text loading in chk formats
133
            self.per_file_parents_for_commit[inventory.ROOT_ID] = ()
0.85.2 by Ian Clatworthy
improve per-file graph generation
134
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
135
        # Keep the basis inventory. This needs to be treated as read-only.
136
        if len(self.parents) == 0:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
137
            self.basis_inventory = self._init_inventory()
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
138
        else:
139
            self.basis_inventory = self.get_inventory(self.parents[0])
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
140
        if hasattr(self.basis_inventory, "root_id"):
141
            self.inventory_root_id = self.basis_inventory.root_id
142
        else:
143
            self.inventory_root_id = self.basis_inventory.root.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
144
145
        # directory-path -> inventory-entry for current inventory
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
146
        self.directory_entries = {}
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
147
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
148
    def _init_inventory(self):
149
        return self.rev_store.init_inventory(self.revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
150
151
    def get_inventory(self, revision_id):
152
        """Get the inventory for a revision id."""
153
        try:
154
            inv = self.cache_mgr.inventories[revision_id]
155
        except KeyError:
156
            if self.verbose:
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
157
                self.mutter("get_inventory cache miss for %s", revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
158
            # Not cached so reconstruct from the RevisionStore
159
            inv = self.rev_store.get_inventory(revision_id)
160
            self.cache_mgr.inventories[revision_id] = inv
161
        return inv
162
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
163
    def _get_data(self, file_id):
164
        """Get the data bytes for a file-id."""
165
        return self.data_for_commit[file_id]
166
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
167
    def _get_lines(self, file_id):
168
        """Get the lines for a file-id."""
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
169
        return osutils.split_lines(self._get_data(file_id))
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
170
0.85.2 by Ian Clatworthy
improve per-file graph generation
171
    def _get_per_file_parents(self, file_id):
172
        """Get the lines for a file-id."""
173
        return self.per_file_parents_for_commit[file_id]
174
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
175
    def _get_inventories(self, revision_ids):
176
        """Get the inventories for revision-ids.
177
        
178
        This is a callback used by the RepositoryStore to
179
        speed up inventory reconstruction.
180
        """
181
        present = []
182
        inventories = []
183
        # If an inventory is in the cache, we assume it was
184
        # successfully loaded into the revision store
185
        for revision_id in revision_ids:
186
            try:
187
                inv = self.cache_mgr.inventories[revision_id]
188
                present.append(revision_id)
189
            except KeyError:
190
                if self.verbose:
191
                    self.note("get_inventories cache miss for %s", revision_id)
192
                # Not cached so reconstruct from the revision store
193
                try:
194
                    inv = self.get_inventory(revision_id)
195
                    present.append(revision_id)
196
                except:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
197
                    inv = self._init_inventory()
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
198
                self.cache_mgr.inventories[revision_id] = inv
199
            inventories.append(inv)
200
        return present, inventories
201
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
202
    def bzr_file_id_and_new(self, path):
203
        """Get a Bazaar file identifier and new flag for a path.
204
        
205
        :return: file_id, is_new where
206
          is_new = True if the file_id is newly created
207
        """
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
208
        if path not in self._paths_deleted_this_commit:
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
209
            # Try file-ids renamed in this commit
210
            id = self._modified_file_ids.get(path)
211
            if id is not None:
212
                return id, False
213
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
214
            # Try the basis inventory
215
            id = self.basis_inventory.path2id(path)
216
            if id is not None:
217
                return id, False
218
            
219
            # Try the other inventories
220
            if len(self.parents) > 1:
221
                for inv in self.parent_invs[1:]:
222
                    id = self.basis_inventory.path2id(path)
223
                    if id is not None:
224
                        return id, False
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
225
226
        # Doesn't exist yet so create it
0.64.247 by Ian Clatworthy
base file-ids on the basename, not path, as jam suggested. This improves the samba import from 565M to 353M.
227
        dirname, basename = osutils.split(path)
228
        id = generate_ids.gen_file_id(basename)
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
229
        self.debug("Generated new file id %s for '%s' in revision-id '%s'",
230
            id, path, self.revision_id)
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
231
        self._new_file_ids[path] = id
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
232
        return id, True
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
233
234
    def bzr_file_id(self, path):
235
        """Get a Bazaar file identifier for a path."""
236
        return self.bzr_file_id_and_new(path)[0]
237
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
238
    def _utf8_decode(self, field, value):
239
        try:
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
240
            return value.decode('utf-8')
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
241
        except UnicodeDecodeError:
242
            # The spec says fields are *typically* utf8 encoded
243
            # but that isn't enforced by git-fast-export (at least)
244
            self.warning("%s not in utf8 - replacing unknown "
245
                "characters" % (field,))
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
246
            return value.decode('utf-8', 'replace')
247
248
    def _decode_path(self, path):
249
        try:
250
            return path.decode('utf-8')
251
        except UnicodeDecodeError:
252
            # The spec says fields are *typically* utf8 encoded
253
            # but that isn't enforced by git-fast-export (at least)
254
            self.warning("path %r not in utf8 - replacing unknown "
255
                "characters" % (path,))
256
            return path.decode('utf-8', 'replace')
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
257
258
    def _format_name_email(self, section, name, email):
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
259
        """Format name & email as a string."""
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
260
        name = self._utf8_decode("%s name" % section, name)
261
        email = self._utf8_decode("%s email" % section, email)
262
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
263
        if email:
264
            return "%s <%s>" % (name, email)
265
        else:
266
            return name
267
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
268
    def gen_revision_id(self):
269
        """Generate a revision id.
270
271
        Subclasses may override this to produce deterministic ids say.
272
        """
273
        committer = self.command.committer
274
        # Perhaps 'who' being the person running the import is ok? If so,
275
        # it might be a bit quicker and give slightly better compression?
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
276
        who = self._format_name_email("committer", committer[0], committer[1])
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
277
        timestamp = committer[2]
278
        return generate_ids.gen_revision_id(who, timestamp)
279
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
280
    def build_revision(self):
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
281
        rev_props = self._legal_revision_properties(self.command.properties)
0.112.5 by Max Bowsher
Default branch-nick to mapped git ref name.
282
        if 'branch-nick' not in rev_props:
283
            rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
284
                    self.branch_ref)
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
285
        self._save_author_info(rev_props)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
286
        committer = self.command.committer
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
287
        who = self._format_name_email("committer", committer[0], committer[1])
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
288
        try:
289
            message = self.command.message.decode("utf-8")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
290
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
291
        except UnicodeDecodeError:
292
            self.warning(
293
                "commit message not in utf8 - replacing unknown characters")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
294
            message = self.command.message.decode('utf-8', 'replace')
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
295
        if not _serializer_handles_escaping:
296
            # We need to assume the bad ol' days
297
            message = helpers.escape_commit_message(message)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
298
        return revision.Revision(
299
           timestamp=committer[2],
300
           timezone=committer[3],
301
           committer=who,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
302
           message=message,
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
303
           revision_id=self.revision_id,
304
           properties=rev_props,
305
           parent_ids=self.parents)
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
306
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
307
    def _legal_revision_properties(self, props):
308
        """Clean-up any revision properties we can't handle."""
309
        # For now, we just check for None because that's not allowed in 2.0rc1
310
        result = {}
311
        if props is not None:
312
            for name, value in props.items():
313
                if value is None:
314
                    self.warning(
315
                        "converting None to empty string for property %s"
316
                        % (name,))
317
                    result[name] = ''
318
                else:
319
                    result[name] = value
320
        return result
321
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
322
    def _save_author_info(self, rev_props):
323
        author = self.command.author
324
        if author is None:
325
            return
326
        if self.command.more_authors:
327
            authors = [author] + self.command.more_authors
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
328
            author_ids = [self._format_name_email("author", a[0], a[1]) for a in authors]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
329
        elif author != self.command.committer:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
330
            author_ids = [self._format_name_email("author", author[0], author[1])]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
331
        else:
332
            return
333
        # If we reach here, there are authors worth storing
334
        rev_props['authors'] = "\n".join(author_ids)
335
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
336
    def _modify_item(self, path, kind, is_executable, data, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
337
        """Add to or change an item in the inventory."""
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
338
        # If we've already added this, warn the user that we're ignoring it.
339
        # In the future, it might be nice to double check that the new data
340
        # is the same as the old but, frankly, exporters should be fixed
341
        # not to produce bad data streams in the first place ...
342
        existing = self._new_file_ids.get(path)
343
        if existing:
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
344
            # We don't warn about directories because it's fine for them
345
            # to be created already by a previous rename
346
            if kind != 'directory':
347
                self.warning("%s already added in this commit - ignoring" %
348
                    (path,))
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
349
            return
350
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
351
        # Create the new InventoryEntry
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
352
        basename, parent_id = self._ensure_directory(path, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
353
        file_id = self.bzr_file_id(path)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
354
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
355
        ie.revision = self.revision_id
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
356
        if kind == 'file':
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
357
            ie.executable = is_executable
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
358
            # lines = osutils.split_lines(data)
359
            ie.text_sha1 = osutils.sha_string(data)
360
            ie.text_size = len(data)
361
            self.data_for_commit[file_id] = data
0.102.14 by Ian Clatworthy
export and import empty directories
362
        elif kind == 'directory':
363
            self.directory_entries[path] = ie
364
            # There are no lines stored for a directory so
365
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
366
            self.data_for_commit[file_id] = ''
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
367
        elif kind == 'symlink':
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
368
            ie.symlink_target = self._decode_path(data)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
369
            # There are no lines stored for a symlink so
370
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
371
            self.data_for_commit[file_id] = ''
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
372
        else:
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
373
            self.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
374
                % (kind, path))
375
            return
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
376
        # Record it
6883.7.11 by Jelmer Vernooij
Avoid has_id.
377
        try:
6915.4.2 by Jelmer Vernooij
Remove __getitem__ and __iter__ from Inventory.
378
            old_ie = inv.get_entry(file_id)
6883.7.11 by Jelmer Vernooij
Avoid has_id.
379
        except errors.NoSuchId:
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
380
            try:
381
                self.record_new(path, ie)
382
            except:
6855.3.1 by Jelmer Vernooij
Several more fixes.
383
                print("failed to add path '%s' with entry '%s' in command %s" \
384
                    % (path, ie, self.command.id))
385
                print("parent's children are:\n%r\n" % (ie.parent_id.children,))
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
386
                raise
6883.7.11 by Jelmer Vernooij
Avoid has_id.
387
        else:
388
            if old_ie.kind == 'directory':
389
                self.record_delete(path, old_ie)
390
            self.record_changed(path, ie, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
391
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
392
    def _ensure_directory(self, path, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
393
        """Ensure that the containing directory exists for 'path'"""
394
        dirname, basename = osutils.split(path)
395
        if dirname == '':
396
            # the root node doesn't get updated
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
397
            return basename, self.inventory_root_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
398
        try:
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
399
            ie = self._get_directory_entry(inv, dirname)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
400
        except KeyError:
401
            # We will create this entry, since it doesn't exist
402
            pass
403
        else:
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
404
            return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
405
406
        # No directory existed, we will just create one, first, make sure
407
        # the parent exists
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
408
        dir_basename, parent_id = self._ensure_directory(dirname, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
409
        dir_file_id = self.bzr_file_id(dirname)
410
        ie = inventory.entry_factory['directory'](dir_file_id,
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
411
            dir_basename, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
412
        ie.revision = self.revision_id
413
        self.directory_entries[dirname] = ie
414
        # There are no lines stored for a directory so
415
        # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
416
        self.data_for_commit[dir_file_id] = ''
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
417
418
        # It's possible that a file or symlink with that file-id
419
        # already exists. If it does, we need to delete it.
0.64.323 by Jelmer Vernooij
Avoid deprecated Inventory.__contains__.
420
        if inv.has_id(dir_file_id):
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
421
            self.record_delete(dirname, ie)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
422
        self.record_new(dirname, ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
423
        return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
424
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
425
    def _get_directory_entry(self, inv, dirname):
426
        """Get the inventory entry for a directory.
427
        
428
        Raises KeyError if dirname is not a directory in inv.
429
        """
430
        result = self.directory_entries.get(dirname)
431
        if result is None:
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
432
            if dirname in self._paths_deleted_this_commit:
433
                raise KeyError
0.64.146 by Ian Clatworthy
fix first file is in a subdirectory bug for chk formats
434
            try:
435
                file_id = inv.path2id(dirname)
436
            except errors.NoSuchId:
437
                # In a CHKInventory, this is raised if there's no root yet
438
                raise KeyError
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
439
            if file_id is None:
440
                raise KeyError
6915.4.2 by Jelmer Vernooij
Remove __getitem__ and __iter__ from Inventory.
441
            result = inv.get_entry(file_id)
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
442
            # dirname must be a directory for us to return it
443
            if result.kind == 'directory':
444
                self.directory_entries[dirname] = result
445
            else:
446
                raise KeyError
447
        return result
448
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
449
    def _delete_item(self, path, inv):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
450
        newly_added = self._new_file_ids.get(path)
451
        if newly_added:
452
            # We've only just added this path earlier in this commit.
453
            file_id = newly_added
454
            # note: delta entries look like (old, new, file-id, ie)
455
            ie = self._delta_entries_by_fileid[file_id][3]
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
456
        else:
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
457
            file_id = inv.path2id(path)
458
            if file_id is None:
459
                self.mutter("ignoring delete of %s as not in inventory", path)
460
                return
461
            try:
6915.4.2 by Jelmer Vernooij
Remove __getitem__ and __iter__ from Inventory.
462
                ie = inv.get_entry(file_id)
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
463
            except errors.NoSuchId:
464
                self.mutter("ignoring delete of %s as not in inventory", path)
465
                return
466
        self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
467
468
    def _copy_item(self, src_path, dest_path, inv):
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
469
        newly_changed = self._new_file_ids.get(src_path) or \
470
            self._modified_file_ids.get(src_path)
471
        if newly_changed:
472
            # We've only just added/changed this path earlier in this commit.
473
            file_id = newly_changed
0.99.8 by Ian Clatworthy
handle copy of a newly added file
474
            # note: delta entries look like (old, new, file-id, ie)
475
            ie = self._delta_entries_by_fileid[file_id][3]
476
        else:
477
            file_id = inv.path2id(src_path)
478
            if file_id is None:
479
                self.warning("ignoring copy of %s to %s - source does not exist",
480
                    src_path, dest_path)
481
                return
6915.4.2 by Jelmer Vernooij
Remove __getitem__ and __iter__ from Inventory.
482
            ie = inv.get_entry(file_id)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
483
        kind = ie.kind
484
        if kind == 'file':
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
485
            if newly_changed:
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
486
                content = self.data_for_commit[file_id]
0.99.8 by Ian Clatworthy
handle copy of a newly added file
487
            else:
488
                content = self.rev_store.get_file_text(self.parents[0], file_id)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
489
            self._modify_item(dest_path, kind, ie.executable, content, inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
490
        elif kind == 'symlink':
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
491
            self._modify_item(dest_path, kind, False,
492
                ie.symlink_target.encode("utf-8"), inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
493
        else:
494
            self.warning("ignoring copy of %s %s - feature not yet supported",
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
495
                kind, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
496
497
    def _rename_item(self, old_path, new_path, inv):
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
498
        existing = self._new_file_ids.get(old_path) or \
499
            self._modified_file_ids.get(old_path)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
500
        if existing:
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
501
            # We've only just added/modified this path earlier in this commit.
502
            # Change the add/modify of old_path to an add of new_path
503
            self._rename_pending_change(old_path, new_path, existing)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
504
            return
505
0.81.8 by Ian Clatworthy
refactor rename_item
506
        file_id = inv.path2id(old_path)
0.64.167 by Ian Clatworthy
incremental packing for chk formats
507
        if file_id is None:
508
            self.warning(
509
                "ignoring rename of %s to %s - old path does not exist" %
510
                (old_path, new_path))
511
            return
6915.4.2 by Jelmer Vernooij
Remove __getitem__ and __iter__ from Inventory.
512
        ie = inv.get_entry(file_id)
0.81.8 by Ian Clatworthy
refactor rename_item
513
        rev_id = ie.revision
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
514
        new_file_id = inv.path2id(new_path)
515
        if new_file_id is not None:
6915.4.2 by Jelmer Vernooij
Remove __getitem__ and __iter__ from Inventory.
516
            self.record_delete(new_path, inv.get_entry(new_file_id))
0.81.8 by Ian Clatworthy
refactor rename_item
517
        self.record_rename(old_path, new_path, file_id, ie)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
518
0.81.8 by Ian Clatworthy
refactor rename_item
519
        # The revision-id for this entry will be/has been updated and
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
520
        # that means the loader then needs to know what the "new" text is.
521
        # We therefore must go back to the revision store to get it.
0.81.8 by Ian Clatworthy
refactor rename_item
522
        lines = self.rev_store.get_file_lines(rev_id, file_id)
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
523
        self.data_for_commit[file_id] = ''.join(lines)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
524
525
    def _delete_all_items(self, inv):
0.64.320 by Jelmer Vernooij
Fix deleteall handler.
526
        if len(inv) == 0:
527
            return
528
        for path, ie in inv.iter_entries_by_dir():
529
            if path != "":
530
                self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
531
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
532
    def _warn_unless_in_merges(self, fileid, path):
533
        if len(self.parents) <= 1:
534
            return
535
        for parent in self.parents[1:]:
536
            if fileid in self.get_inventory(parent):
537
                return
538
        self.warning("ignoring delete of %s as not in parent inventories", path)
539
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
540
541
class InventoryCommitHandler(GenericCommitHandler):
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
542
    """A CommitHandler that builds and saves Inventory objects."""
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
543
544
    def pre_process_files(self):
545
        super(InventoryCommitHandler, self).pre_process_files()
546
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
547
        # Seed the inventory from the previous one. Note that
548
        # the parent class version of pre_process_files() has
549
        # already set the right basis_inventory for this branch
550
        # but we need to copy it in order to mutate it safely
551
        # without corrupting the cached inventory value.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
552
        if len(self.parents) == 0:
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
553
            self.inventory = self.basis_inventory
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
554
        else:
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
555
            self.inventory = copy_inventory(self.basis_inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
556
        self.inventory_root = self.inventory.root
557
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
558
        # directory-path -> inventory-entry for current inventory
559
        self.directory_entries = dict(self.inventory.directories())
560
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
561
        # Initialise the inventory revision info as required
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
562
        if self.rev_store.expects_rich_root():
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
563
            self.inventory.revision_id = self.revision_id
564
        else:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
565
            # In this revision store, root entries have no knit or weave.
566
            # When serializing out to disk and back in, root.revision is
567
            # always the new revision_id.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
568
            self.inventory.root.revision = self.revision_id
569
570
    def post_process_files(self):
571
        """Save the revision."""
572
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.85.2 by Ian Clatworthy
improve per-file graph generation
573
        self.rev_store.load(self.revision, self.inventory, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
574
            lambda file_id: self._get_data(file_id),
0.85.2 by Ian Clatworthy
improve per-file graph generation
575
            lambda file_id: self._get_per_file_parents(file_id),
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
576
            lambda revision_ids: self._get_inventories(revision_ids))
577
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
578
    def record_new(self, path, ie):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
579
        try:
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
580
            # If this is a merge, the file was most likely added already.
581
            # The per-file parent(s) must therefore be calculated and
582
            # we can't assume there are none.
583
            per_file_parents, ie.revision = \
584
                self.rev_store.get_parents_and_revision_for_entry(ie)
585
            self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
586
            self.inventory.add(ie)
587
        except errors.DuplicateFileId:
588
            # Directory already exists as a file or symlink
589
            del self.inventory[ie.file_id]
590
            # Try again
591
            self.inventory.add(ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
592
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
593
    def record_changed(self, path, ie, parent_id):
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
594
        # HACK: no API for this (del+add does more than it needs to)
0.85.2 by Ian Clatworthy
improve per-file graph generation
595
        per_file_parents, ie.revision = \
596
            self.rev_store.get_parents_and_revision_for_entry(ie)
597
        self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
598
        self.inventory._byid[ie.file_id] = ie
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
599
        parent_ie = self.inventory._byid[parent_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
600
        parent_ie.children[ie.name] = ie
601
0.81.9 by Ian Clatworthy
refactor delete_item
602
    def record_delete(self, path, ie):
603
        self.inventory.remove_recursive_id(ie.file_id)
0.81.8 by Ian Clatworthy
refactor rename_item
604
605
    def record_rename(self, old_path, new_path, file_id, ie):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
606
        # For a rename, the revision-id is always the new one so
607
        # no need to change/set it here
608
        ie.revision = self.revision_id
609
        per_file_parents, _ = \
610
            self.rev_store.get_parents_and_revision_for_entry(ie)
611
        self.per_file_parents_for_commit[file_id] = per_file_parents
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
612
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
613
            self.inventory)
0.81.8 by Ian Clatworthy
refactor rename_item
614
        self.inventory.rename(file_id, new_parent_id, new_basename)
615
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
616
    def modify_handler(self, filecmd):
617
        if filecmd.dataref is not None:
618
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
619
        else:
620
            data = filecmd.data
621
        self.debug("modifying %s", filecmd.path)
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
622
        (kind, is_executable) = mode_to_kind(filecmd.mode)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
623
        self._modify_item(self._decode_path(filecmd.path), kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
624
            is_executable, data, self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
625
626
    def delete_handler(self, filecmd):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
627
        self.debug("deleting %s", filecmd.path)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
628
        self._delete_item(self._decode_path(filecmd.path), self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
629
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
630
    def copy_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
631
        src_path = self._decode_path(filecmd.src_path)
632
        dest_path = self._decode_path(filecmd.dest_path)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
633
        self.debug("copying %s to %s", src_path, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
634
        self._copy_item(src_path, dest_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
635
636
    def rename_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
637
        old_path = self._decode_path(filecmd.old_path)
638
        new_path = self._decode_path(filecmd.new_path)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
639
        self.debug("renaming %s to %s", old_path, new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
640
        self._rename_item(old_path, new_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
641
642
    def deleteall_handler(self, filecmd):
643
        self.debug("deleting all files (and also all directories)")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
644
        self._delete_all_items(self.inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
645
646
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
647
class InventoryDeltaCommitHandler(GenericCommitHandler):
648
    """A CommitHandler that builds Inventories by applying a delta."""
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
649
650
    def pre_process_files(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
651
        super(InventoryDeltaCommitHandler, self).pre_process_files()
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
652
        self._dirs_that_might_become_empty = set()
653
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
654
        # A given file-id can only appear once so we accumulate
655
        # the entries in a dict then build the actual delta at the end
656
        self._delta_entries_by_fileid = {}
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
657
        if len(self.parents) == 0 or not self.rev_store.expects_rich_root():
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
658
            if self.parents:
659
                old_path = ''
660
            else:
661
                old_path = None
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
662
            # Need to explicitly add the root entry for the first revision
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
663
            # and for non rich-root inventories
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
664
            root_id = inventory.ROOT_ID
665
            root_ie = inventory.InventoryDirectory(root_id, u'', None)
666
            root_ie.revision = self.revision_id
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
667
            self._add_entry((old_path, '', root_id, root_ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
668
669
    def post_process_files(self):
670
        """Save the revision."""
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
671
        delta = self._get_final_delta()
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
672
        inv = self.rev_store.load_using_delta(self.revision,
673
            self.basis_inventory, delta, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
674
            self._get_data,
675
            self._get_per_file_parents,
676
            self._get_inventories)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
677
        self.cache_mgr.inventories[self.revision_id] = inv
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
678
        #print "committed %s" % self.revision_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
679
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
680
    def _get_final_delta(self):
681
        """Generate the final delta.
682
683
        Smart post-processing of changes, e.g. pruning of directories
684
        that would become empty, goes here.
685
        """
686
        delta = list(self._delta_entries_by_fileid.values())
687
        if self.prune_empty_dirs and self._dirs_that_might_become_empty:
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
688
            candidates = self._dirs_that_might_become_empty
689
            while candidates:
690
                never_born = set()
691
                parent_dirs_that_might_become_empty = set()
692
                for path, file_id in self._empty_after_delta(delta, candidates):
693
                    newly_added = self._new_file_ids.get(path)
694
                    if newly_added:
695
                        never_born.add(newly_added)
696
                    else:
697
                        delta.append((path, None, file_id, None))
698
                    parent_dir = osutils.dirname(path)
699
                    if parent_dir:
700
                        parent_dirs_that_might_become_empty.add(parent_dir)
701
                candidates = parent_dirs_that_might_become_empty
0.101.5 by Tom Widmer
Add missing tab characters to ensure that never born dirs are correctly removed during each pass of parent directory pruning.
702
                # Clean up entries that got deleted before they were ever added
703
                if never_born:
704
                    delta = [de for de in delta if de[2] not in never_born]
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
705
        return delta
706
707
    def _empty_after_delta(self, delta, candidates):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
708
        #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
709
        #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
710
        new_inv = self._get_proposed_inventory(delta)
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
711
        result = []
712
        for dir in candidates:
713
            file_id = new_inv.path2id(dir)
0.64.219 by Ian Clatworthy
More robust implicit delete logic when file-id not found
714
            if file_id is None:
715
                continue
6915.4.2 by Jelmer Vernooij
Remove __getitem__ and __iter__ from Inventory.
716
            ie = new_inv.get_entry(file_id)
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
717
            if ie.kind != 'directory':
718
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
719
            if len(ie.children) == 0:
720
                result.append((dir, file_id))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
721
                if self.verbose:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
722
                    self.note("pruning empty directory %s" % (dir,))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
723
        return result
724
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
725
    def _get_proposed_inventory(self, delta):
726
        if len(self.parents):
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
727
            # new_inv = self.basis_inventory._get_mutable_inventory()
728
            # Note that this will create unreferenced chk pages if we end up
729
            # deleting entries, because this 'test' inventory won't end up
730
            # used. However, it is cheaper than having to create a full copy of
731
            # the inventory for every commit.
732
            new_inv = self.basis_inventory.create_by_apply_delta(delta,
733
                'not-a-valid-revision-id:')
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
734
        else:
735
            new_inv = inventory.Inventory(revision_id=self.revision_id)
736
            # This is set in the delta so remove it to prevent a duplicate
6915.4.1 by Jelmer Vernooij
Inventory.__delitem__ => Inventory.delete.
737
            new_inv.delete(inventory.ROOT_ID)
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
738
            try:
739
                new_inv.apply_delta(delta)
740
            except errors.InconsistentDelta:
741
                self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
742
                raise
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
743
        return new_inv
744
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
745
    def _add_entry(self, entry):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
746
        # We need to combine the data if multiple entries have the same file-id.
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
747
        # For example, a rename followed by a modification looks like:
748
        #
749
        # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
750
        #
751
        # Likewise, a modification followed by a rename looks like:
752
        #
753
        # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
754
        #
755
        # Here's a rename followed by a delete and a modification followed by
756
        # a delete:
757
        #
758
        # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
759
        # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
760
        #
761
        # In summary, we use the original old-path, new new-path and new ie
762
        # when combining entries.
0.85.2 by Ian Clatworthy
improve per-file graph generation
763
        old_path = entry[0]
764
        new_path = entry[1]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
765
        file_id = entry[2]
0.85.2 by Ian Clatworthy
improve per-file graph generation
766
        ie = entry[3]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
767
        existing = self._delta_entries_by_fileid.get(file_id, None)
768
        if existing is not None:
0.85.2 by Ian Clatworthy
improve per-file graph generation
769
            old_path = existing[0]
770
            entry = (old_path, new_path, file_id, ie)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
771
        if new_path is None and old_path is None:
772
            # This is a delete cancelling a previous add
773
            del self._delta_entries_by_fileid[file_id]
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
774
            parent_dir = osutils.dirname(existing[1])
775
            self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir))
776
            if parent_dir:
777
                self._dirs_that_might_become_empty.add(parent_dir)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
778
            return
779
        else:
780
            self._delta_entries_by_fileid[file_id] = entry
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
781
0.99.6 by Ian Clatworthy
Handle rename of a just added file
782
        # Collect parent directories that might become empty
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
783
        if new_path is None:
784
            # delete
785
            parent_dir = osutils.dirname(old_path)
786
            # note: no need to check the root
787
            if parent_dir:
788
                self._dirs_that_might_become_empty.add(parent_dir)
789
        elif old_path is not None and old_path != new_path:
790
            # rename
791
            old_parent_dir = osutils.dirname(old_path)
792
            new_parent_dir = osutils.dirname(new_path)
793
            if old_parent_dir and old_parent_dir != new_parent_dir:
794
                self._dirs_that_might_become_empty.add(old_parent_dir)
795
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
796
        # Calculate the per-file parents, if not already done
797
        if file_id in self.per_file_parents_for_commit:
798
            return
0.85.2 by Ian Clatworthy
improve per-file graph generation
799
        if old_path is None:
800
            # add
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
801
            # If this is a merge, the file was most likely added already.
802
            # The per-file parent(s) must therefore be calculated and
803
            # we can't assume there are none.
804
            per_file_parents, ie.revision = \
805
                self.rev_store.get_parents_and_revision_for_entry(ie)
806
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
807
        elif new_path is None:
808
            # delete
809
            pass
810
        elif old_path != new_path:
811
            # rename
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
812
            per_file_parents, _ = \
813
                self.rev_store.get_parents_and_revision_for_entry(ie)
814
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
815
        else:
816
            # modify
817
            per_file_parents, ie.revision = \
818
                self.rev_store.get_parents_and_revision_for_entry(ie)
819
            self.per_file_parents_for_commit[file_id] = per_file_parents
820
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
821
    def record_new(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
822
        self._add_entry((None, path, ie.file_id, ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
823
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
824
    def record_changed(self, path, ie, parent_id=None):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
825
        self._add_entry((path, path, ie.file_id, ie))
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
826
        self._modified_file_ids[path] = ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
827
0.81.9 by Ian Clatworthy
refactor delete_item
828
    def record_delete(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
829
        self._add_entry((path, None, ie.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
830
        self._paths_deleted_this_commit.add(path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
831
        if ie.kind == 'directory':
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
832
            try:
833
                del self.directory_entries[path]
834
            except KeyError:
835
                pass
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
836
            for child_relpath, entry in \
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
837
                self.basis_inventory.iter_entries_by_dir(from_dir=ie):
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
838
                child_path = osutils.pathjoin(path, child_relpath)
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
839
                self._add_entry((child_path, None, entry.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
840
                self._paths_deleted_this_commit.add(child_path)
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
841
                if entry.kind == 'directory':
842
                    try:
843
                        del self.directory_entries[child_path]
844
                    except KeyError:
845
                        pass
0.81.8 by Ian Clatworthy
refactor rename_item
846
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
847
    def record_rename(self, old_path, new_path, file_id, old_ie):
848
        new_ie = old_ie.copy()
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
849
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
850
            self.basis_inventory)
851
        new_ie.name = new_basename
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
852
        new_ie.parent_id = new_parent_id
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
853
        new_ie.revision = self.revision_id
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
854
        self._add_entry((old_path, new_path, file_id, new_ie))
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
855
        self._modified_file_ids[new_path] = file_id
0.64.233 by Ian Clatworthy
Handle delete, rename then modify all in the one commit
856
        self._paths_deleted_this_commit.discard(new_path)
0.64.234 by Ian Clatworthy
Make sure renamed directories are found in file-id lookups
857
        if new_ie.kind == 'directory':
858
            self.directory_entries[new_path] = new_ie
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
859
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
860
    def _rename_pending_change(self, old_path, new_path, file_id):
861
        """Instead of adding/modifying old-path, add new-path instead."""
0.99.6 by Ian Clatworthy
Handle rename of a just added file
862
        # note: delta entries look like (old, new, file-id, ie)
863
        old_ie = self._delta_entries_by_fileid[file_id][3]
864
865
        # Delete the old path. Note that this might trigger implicit
866
        # deletion of newly created parents that could now become empty.
867
        self.record_delete(old_path, old_ie)
868
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
869
        # Update the dictionaries used for tracking new file-ids
870
        if old_path in self._new_file_ids:
871
            del self._new_file_ids[old_path]
872
        else:
873
            del self._modified_file_ids[old_path]
0.99.6 by Ian Clatworthy
Handle rename of a just added file
874
        self._new_file_ids[new_path] = file_id
875
876
        # Create the new InventoryEntry
877
        kind = old_ie.kind
878
        basename, parent_id = self._ensure_directory(new_path,
879
            self.basis_inventory)
880
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
881
        ie.revision = self.revision_id
882
        if kind == 'file':
883
            ie.executable = old_ie.executable
884
            ie.text_sha1 = old_ie.text_sha1
885
            ie.text_size = old_ie.text_size
886
        elif kind == 'symlink':
887
            ie.symlink_target = old_ie.symlink_target
888
889
        # Record it
890
        self.record_new(new_path, ie)
891
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
892
    def modify_handler(self, filecmd):
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
893
        (kind, executable) = mode_to_kind(filecmd.mode)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
894
        if filecmd.dataref is not None:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
895
            if kind == "directory":
0.102.14 by Ian Clatworthy
export and import empty directories
896
                data = None
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
897
            elif kind == "tree-reference":
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
898
                data = filecmd.dataref
899
            else:
900
                data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
901
        else:
902
            data = filecmd.data
903
        self.debug("modifying %s", filecmd.path)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
904
        decoded_path = self._decode_path(filecmd.path)
905
        self._modify_item(decoded_path, kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
906
            executable, data, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
907
908
    def delete_handler(self, filecmd):
909
        self.debug("deleting %s", filecmd.path)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
910
        self._delete_item(
911
            self._decode_path(filecmd.path), self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
912
913
    def copy_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
914
        src_path = self._decode_path(filecmd.src_path)
915
        dest_path = self._decode_path(filecmd.dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
916
        self.debug("copying %s to %s", src_path, dest_path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
917
        self._copy_item(src_path, dest_path, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
918
919
    def rename_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
920
        old_path = self._decode_path(filecmd.old_path)
921
        new_path = self._decode_path(filecmd.new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
922
        self.debug("renaming %s to %s", old_path, new_path)
923
        self._rename_item(old_path, new_path, self.basis_inventory)
924
925
    def deleteall_handler(self, filecmd):
926
        self.debug("deleting all files (and also all directories)")
927
        self._delete_all_items(self.basis_inventory)