/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
15
16
"""CommitHandlers that build and save revisions & their inventories."""
17
18
19
from bzrlib import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
20
    debug,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
21
    errors,
22
    generate_ids,
23
    inventory,
24
    osutils,
25
    revision,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
26
    serializer,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
27
    )
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
28
from bzrlib.trace import (
29
    mutter,
30
    note,
31
    warning,
32
    )
0.123.2 by Jelmer Vernooij
Split out fastimport, import it from the system.
33
from fastimport import (
0.123.1 by Jelmer Vernooij
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
34
    helpers,
35
    processor,
36
    )
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
37
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
38
from bzrlib.plugins.fastimport.helpers import (
39
    mode_to_kind,
40
    )
41
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
42
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
43
_serializer_handles_escaping = hasattr(serializer.Serializer,
44
    'squashes_xml_invalid_characters')
45
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
46
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
47
def copy_inventory(inv):
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
48
    entries = inv.iter_entries_by_dir()
0.64.319 by Jelmer Vernooij
fix typo.
49
    inv = inventory.Inventory(None, inv.revision_id)
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
50
    for path, inv_entry in entries:
51
        inv.add(inv_entry.copy())
52
    return inv
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
53
54
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
55
class GenericCommitHandler(processor.CommitHandler):
56
    """Base class for Bazaar CommitHandlers."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
57
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
58
    def __init__(self, command, cache_mgr, rev_store, verbose=False,
59
        prune_empty_dirs=True):
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
60
        super(GenericCommitHandler, self).__init__(command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
61
        self.cache_mgr = cache_mgr
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
62
        self.rev_store = rev_store
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
63
        self.verbose = verbose
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
64
        self.branch_ref = command.ref
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
65
        self.prune_empty_dirs = prune_empty_dirs
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
66
        # This tracks path->file-id for things we're creating this commit.
67
        # If the same path is created multiple times, we need to warn the
68
        # user and add it just once.
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
69
        # If a path is added then renamed or copied, we need to handle that.
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
70
        self._new_file_ids = {}
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
71
        # This tracks path->file-id for things we're modifying this commit.
72
        # If a path is modified then renamed or copied, we need the make
73
        # sure we grab the new content.
74
        self._modified_file_ids = {}
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
75
        # This tracks the paths for things we're deleting this commit.
76
        # If the same path is added or the destination of a rename say,
77
        # then a fresh file-id is required.
78
        self._paths_deleted_this_commit = set()
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
79
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
80
    def mutter(self, msg, *args):
81
        """Output a mutter but add context."""
82
        msg = "%s (%s)" % (msg, self.command.id)
83
        mutter(msg, *args)
84
85
    def debug(self, msg, *args):
86
        """Output a mutter if the appropriate -D option was given."""
87
        if "fast-import" in debug.debug_flags:
88
            msg = "%s (%s)" % (msg, self.command.id)
89
            mutter(msg, *args)
90
91
    def note(self, msg, *args):
92
        """Output a note but add context."""
93
        msg = "%s (%s)" % (msg, self.command.id)
94
        note(msg, *args)
95
96
    def warning(self, msg, *args):
97
        """Output a warning but add context."""
98
        msg = "%s (%s)" % (msg, self.command.id)
99
        warning(msg, *args)
100
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
101
    def pre_process_files(self):
102
        """Prepare for committing."""
103
        self.revision_id = self.gen_revision_id()
104
        # cache of texts for this commit, indexed by file-id
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
105
        self.data_for_commit = {}
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
106
        #if self.rev_store.expects_rich_root():
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
107
        self.data_for_commit[inventory.ROOT_ID] = []
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
108
109
        # Track the heads and get the real parent list
0.123.6 by Jelmer Vernooij
Split out reftracker.
110
        parents = self.cache_mgr.reftracker.track_heads(self.command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
111
112
        # Convert the parent commit-ids to bzr revision-ids
113
        if parents:
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
114
            self.parents = [self.cache_mgr.lookup_committish(p)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
115
                for p in parents]
116
        else:
117
            self.parents = []
118
        self.debug("%s id: %s, parents: %s", self.command.id,
119
            self.revision_id, str(self.parents))
120
0.85.2 by Ian Clatworthy
improve per-file graph generation
121
        # Tell the RevisionStore we're starting a new commit
122
        self.revision = self.build_revision()
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
123
        self.parent_invs = [self.get_inventory(p) for p in self.parents]
0.85.2 by Ian Clatworthy
improve per-file graph generation
124
        self.rev_store.start_new_revision(self.revision, self.parents,
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
125
            self.parent_invs)
0.85.2 by Ian Clatworthy
improve per-file graph generation
126
127
        # cache of per-file parents for this commit, indexed by file-id
128
        self.per_file_parents_for_commit = {}
129
        if self.rev_store.expects_rich_root():
0.64.160 by Ian Clatworthy
make per-file parents tuples and fix text loading in chk formats
130
            self.per_file_parents_for_commit[inventory.ROOT_ID] = ()
0.85.2 by Ian Clatworthy
improve per-file graph generation
131
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
132
        # Keep the basis inventory. This needs to be treated as read-only.
133
        if len(self.parents) == 0:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
134
            self.basis_inventory = self._init_inventory()
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
135
        else:
136
            self.basis_inventory = self.get_inventory(self.parents[0])
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
137
        if hasattr(self.basis_inventory, "root_id"):
138
            self.inventory_root_id = self.basis_inventory.root_id
139
        else:
140
            self.inventory_root_id = self.basis_inventory.root.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
141
142
        # directory-path -> inventory-entry for current inventory
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
143
        self.directory_entries = {}
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
144
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
145
    def _init_inventory(self):
146
        return self.rev_store.init_inventory(self.revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
147
148
    def get_inventory(self, revision_id):
149
        """Get the inventory for a revision id."""
150
        try:
151
            inv = self.cache_mgr.inventories[revision_id]
152
        except KeyError:
153
            if self.verbose:
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
154
                self.mutter("get_inventory cache miss for %s", revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
155
            # Not cached so reconstruct from the RevisionStore
156
            inv = self.rev_store.get_inventory(revision_id)
157
            self.cache_mgr.inventories[revision_id] = inv
158
        return inv
159
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
160
    def _get_data(self, file_id):
161
        """Get the data bytes for a file-id."""
162
        return self.data_for_commit[file_id]
163
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
164
    def _get_lines(self, file_id):
165
        """Get the lines for a file-id."""
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
166
        return osutils.split_lines(self._get_data(file_id))
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
167
0.85.2 by Ian Clatworthy
improve per-file graph generation
168
    def _get_per_file_parents(self, file_id):
169
        """Get the lines for a file-id."""
170
        return self.per_file_parents_for_commit[file_id]
171
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
172
    def _get_inventories(self, revision_ids):
173
        """Get the inventories for revision-ids.
174
        
175
        This is a callback used by the RepositoryStore to
176
        speed up inventory reconstruction.
177
        """
178
        present = []
179
        inventories = []
180
        # If an inventory is in the cache, we assume it was
181
        # successfully loaded into the revision store
182
        for revision_id in revision_ids:
183
            try:
184
                inv = self.cache_mgr.inventories[revision_id]
185
                present.append(revision_id)
186
            except KeyError:
187
                if self.verbose:
188
                    self.note("get_inventories cache miss for %s", revision_id)
189
                # Not cached so reconstruct from the revision store
190
                try:
191
                    inv = self.get_inventory(revision_id)
192
                    present.append(revision_id)
193
                except:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
194
                    inv = self._init_inventory()
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
195
                self.cache_mgr.inventories[revision_id] = inv
196
            inventories.append(inv)
197
        return present, inventories
198
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
199
    def bzr_file_id_and_new(self, path):
200
        """Get a Bazaar file identifier and new flag for a path.
201
        
202
        :return: file_id, is_new where
203
          is_new = True if the file_id is newly created
204
        """
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
205
        if path not in self._paths_deleted_this_commit:
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
206
            # Try file-ids renamed in this commit
207
            id = self._modified_file_ids.get(path)
208
            if id is not None:
209
                return id, False
210
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
211
            # Try the basis inventory
212
            id = self.basis_inventory.path2id(path)
213
            if id is not None:
214
                return id, False
215
            
216
            # Try the other inventories
217
            if len(self.parents) > 1:
218
                for inv in self.parent_invs[1:]:
219
                    id = self.basis_inventory.path2id(path)
220
                    if id is not None:
221
                        return id, False
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
222
223
        # Doesn't exist yet so create it
0.64.247 by Ian Clatworthy
base file-ids on the basename, not path, as jam suggested. This improves the samba import from 565M to 353M.
224
        dirname, basename = osutils.split(path)
225
        id = generate_ids.gen_file_id(basename)
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
226
        self.debug("Generated new file id %s for '%s' in revision-id '%s'",
227
            id, path, self.revision_id)
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
228
        self._new_file_ids[path] = id
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
229
        return id, True
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
230
231
    def bzr_file_id(self, path):
232
        """Get a Bazaar file identifier for a path."""
233
        return self.bzr_file_id_and_new(path)[0]
234
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
235
    def _utf8_decode(self, field, value):
236
        try:
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
237
            return value.decode('utf-8')
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
238
        except UnicodeDecodeError:
239
            # The spec says fields are *typically* utf8 encoded
240
            # but that isn't enforced by git-fast-export (at least)
241
            self.warning("%s not in utf8 - replacing unknown "
242
                "characters" % (field,))
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
243
            return value.decode('utf-8', 'replace')
244
245
    def _decode_path(self, path):
246
        try:
247
            return path.decode('utf-8')
248
        except UnicodeDecodeError:
249
            # The spec says fields are *typically* utf8 encoded
250
            # but that isn't enforced by git-fast-export (at least)
251
            self.warning("path %r not in utf8 - replacing unknown "
252
                "characters" % (path,))
253
            return path.decode('utf-8', 'replace')
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
254
255
    def _format_name_email(self, section, name, email):
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
256
        """Format name & email as a string."""
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
257
        name = self._utf8_decode("%s name" % section, name)
258
        email = self._utf8_decode("%s email" % section, email)
259
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
260
        if email:
261
            return "%s <%s>" % (name, email)
262
        else:
263
            return name
264
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
265
    def gen_revision_id(self):
266
        """Generate a revision id.
267
268
        Subclasses may override this to produce deterministic ids say.
269
        """
270
        committer = self.command.committer
271
        # Perhaps 'who' being the person running the import is ok? If so,
272
        # it might be a bit quicker and give slightly better compression?
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
273
        who = self._format_name_email("committer", committer[0], committer[1])
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
274
        timestamp = committer[2]
275
        return generate_ids.gen_revision_id(who, timestamp)
276
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
277
    def build_revision(self):
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
278
        rev_props = self._legal_revision_properties(self.command.properties)
0.112.5 by Max Bowsher
Default branch-nick to mapped git ref name.
279
        if 'branch-nick' not in rev_props:
280
            rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
281
                    self.branch_ref)
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
282
        self._save_author_info(rev_props)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
283
        committer = self.command.committer
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
284
        who = self._format_name_email("committer", committer[0], committer[1])
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
285
        try:
286
            message = self.command.message.decode("utf-8")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
287
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
288
        except UnicodeDecodeError:
289
            self.warning(
290
                "commit message not in utf8 - replacing unknown characters")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
291
            message = self.command.message.decode('utf-8', 'replace')
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
292
        if not _serializer_handles_escaping:
293
            # We need to assume the bad ol' days
294
            message = helpers.escape_commit_message(message)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
295
        return revision.Revision(
296
           timestamp=committer[2],
297
           timezone=committer[3],
298
           committer=who,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
299
           message=message,
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
300
           revision_id=self.revision_id,
301
           properties=rev_props,
302
           parent_ids=self.parents)
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
303
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
304
    def _legal_revision_properties(self, props):
305
        """Clean-up any revision properties we can't handle."""
306
        # For now, we just check for None because that's not allowed in 2.0rc1
307
        result = {}
308
        if props is not None:
309
            for name, value in props.items():
310
                if value is None:
311
                    self.warning(
312
                        "converting None to empty string for property %s"
313
                        % (name,))
314
                    result[name] = ''
315
                else:
316
                    result[name] = value
317
        return result
318
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
319
    def _save_author_info(self, rev_props):
320
        author = self.command.author
321
        if author is None:
322
            return
323
        if self.command.more_authors:
324
            authors = [author] + self.command.more_authors
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
325
            author_ids = [self._format_name_email("author", a[0], a[1]) for a in authors]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
326
        elif author != self.command.committer:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
327
            author_ids = [self._format_name_email("author", author[0], author[1])]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
328
        else:
329
            return
330
        # If we reach here, there are authors worth storing
331
        rev_props['authors'] = "\n".join(author_ids)
332
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
333
    def _modify_item(self, path, kind, is_executable, data, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
334
        """Add to or change an item in the inventory."""
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
335
        # If we've already added this, warn the user that we're ignoring it.
336
        # In the future, it might be nice to double check that the new data
337
        # is the same as the old but, frankly, exporters should be fixed
338
        # not to produce bad data streams in the first place ...
339
        existing = self._new_file_ids.get(path)
340
        if existing:
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
341
            # We don't warn about directories because it's fine for them
342
            # to be created already by a previous rename
343
            if kind != 'directory':
344
                self.warning("%s already added in this commit - ignoring" %
345
                    (path,))
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
346
            return
347
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
348
        # Create the new InventoryEntry
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
349
        basename, parent_id = self._ensure_directory(path, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
350
        file_id = self.bzr_file_id(path)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
351
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
352
        ie.revision = self.revision_id
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
353
        if kind == 'file':
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
354
            ie.executable = is_executable
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
355
            # lines = osutils.split_lines(data)
356
            ie.text_sha1 = osutils.sha_string(data)
357
            ie.text_size = len(data)
358
            self.data_for_commit[file_id] = data
0.102.14 by Ian Clatworthy
export and import empty directories
359
        elif kind == 'directory':
360
            self.directory_entries[path] = ie
361
            # There are no lines stored for a directory so
362
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
363
            self.data_for_commit[file_id] = ''
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
364
        elif kind == 'symlink':
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
365
            ie.symlink_target = self._decode_path(data)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
366
            # There are no lines stored for a symlink so
367
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
368
            self.data_for_commit[file_id] = ''
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
369
        else:
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
370
            self.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
371
                % (kind, path))
372
            return
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
373
        # Record it
0.64.323 by Jelmer Vernooij
Avoid deprecated Inventory.__contains__.
374
        if inv.has_id(file_id):
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
375
            old_ie = inv[file_id]
376
            if old_ie.kind == 'directory':
377
                self.record_delete(path, old_ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
378
            self.record_changed(path, ie, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
379
        else:
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
380
            try:
381
                self.record_new(path, ie)
382
            except:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
383
                print "failed to add path '%s' with entry '%s' in command %s" \
384
                    % (path, ie, self.command.id)
385
                print "parent's children are:\n%r\n" % (ie.parent_id.children,)
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
386
                raise
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
387
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
388
    def _ensure_directory(self, path, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
389
        """Ensure that the containing directory exists for 'path'"""
390
        dirname, basename = osutils.split(path)
391
        if dirname == '':
392
            # the root node doesn't get updated
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
393
            return basename, self.inventory_root_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
394
        try:
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
395
            ie = self._get_directory_entry(inv, dirname)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
396
        except KeyError:
397
            # We will create this entry, since it doesn't exist
398
            pass
399
        else:
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
400
            return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
401
402
        # No directory existed, we will just create one, first, make sure
403
        # the parent exists
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
404
        dir_basename, parent_id = self._ensure_directory(dirname, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
405
        dir_file_id = self.bzr_file_id(dirname)
406
        ie = inventory.entry_factory['directory'](dir_file_id,
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
407
            dir_basename, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
408
        ie.revision = self.revision_id
409
        self.directory_entries[dirname] = ie
410
        # There are no lines stored for a directory so
411
        # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
412
        self.data_for_commit[dir_file_id] = ''
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
413
414
        # It's possible that a file or symlink with that file-id
415
        # already exists. If it does, we need to delete it.
0.64.323 by Jelmer Vernooij
Avoid deprecated Inventory.__contains__.
416
        if inv.has_id(dir_file_id):
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
417
            self.record_delete(dirname, ie)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
418
        self.record_new(dirname, ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
419
        return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
420
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
421
    def _get_directory_entry(self, inv, dirname):
422
        """Get the inventory entry for a directory.
423
        
424
        Raises KeyError if dirname is not a directory in inv.
425
        """
426
        result = self.directory_entries.get(dirname)
427
        if result is None:
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
428
            if dirname in self._paths_deleted_this_commit:
429
                raise KeyError
0.64.146 by Ian Clatworthy
fix first file is in a subdirectory bug for chk formats
430
            try:
431
                file_id = inv.path2id(dirname)
432
            except errors.NoSuchId:
433
                # In a CHKInventory, this is raised if there's no root yet
434
                raise KeyError
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
435
            if file_id is None:
436
                raise KeyError
437
            result = inv[file_id]
438
            # dirname must be a directory for us to return it
439
            if result.kind == 'directory':
440
                self.directory_entries[dirname] = result
441
            else:
442
                raise KeyError
443
        return result
444
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
445
    def _delete_item(self, path, inv):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
446
        newly_added = self._new_file_ids.get(path)
447
        if newly_added:
448
            # We've only just added this path earlier in this commit.
449
            file_id = newly_added
450
            # note: delta entries look like (old, new, file-id, ie)
451
            ie = self._delta_entries_by_fileid[file_id][3]
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
452
        else:
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
453
            file_id = inv.path2id(path)
454
            if file_id is None:
455
                self.mutter("ignoring delete of %s as not in inventory", path)
456
                return
457
            try:
458
                ie = inv[file_id]
459
            except errors.NoSuchId:
460
                self.mutter("ignoring delete of %s as not in inventory", path)
461
                return
462
        self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
463
464
    def _copy_item(self, src_path, dest_path, inv):
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
465
        newly_changed = self._new_file_ids.get(src_path) or \
466
            self._modified_file_ids.get(src_path)
467
        if newly_changed:
468
            # We've only just added/changed this path earlier in this commit.
469
            file_id = newly_changed
0.99.8 by Ian Clatworthy
handle copy of a newly added file
470
            # note: delta entries look like (old, new, file-id, ie)
471
            ie = self._delta_entries_by_fileid[file_id][3]
472
        else:
473
            file_id = inv.path2id(src_path)
474
            if file_id is None:
475
                self.warning("ignoring copy of %s to %s - source does not exist",
476
                    src_path, dest_path)
477
                return
478
            ie = inv[file_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
479
        kind = ie.kind
480
        if kind == 'file':
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
481
            if newly_changed:
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
482
                content = self.data_for_commit[file_id]
0.99.8 by Ian Clatworthy
handle copy of a newly added file
483
            else:
484
                content = self.rev_store.get_file_text(self.parents[0], file_id)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
485
            self._modify_item(dest_path, kind, ie.executable, content, inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
486
        elif kind == 'symlink':
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
487
            self._modify_item(dest_path, kind, False,
488
                ie.symlink_target.encode("utf-8"), inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
489
        else:
490
            self.warning("ignoring copy of %s %s - feature not yet supported",
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
491
                kind, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
492
493
    def _rename_item(self, old_path, new_path, inv):
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
494
        existing = self._new_file_ids.get(old_path) or \
495
            self._modified_file_ids.get(old_path)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
496
        if existing:
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
497
            # We've only just added/modified this path earlier in this commit.
498
            # Change the add/modify of old_path to an add of new_path
499
            self._rename_pending_change(old_path, new_path, existing)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
500
            return
501
0.81.8 by Ian Clatworthy
refactor rename_item
502
        file_id = inv.path2id(old_path)
0.64.167 by Ian Clatworthy
incremental packing for chk formats
503
        if file_id is None:
504
            self.warning(
505
                "ignoring rename of %s to %s - old path does not exist" %
506
                (old_path, new_path))
507
            return
0.81.8 by Ian Clatworthy
refactor rename_item
508
        ie = inv[file_id]
509
        rev_id = ie.revision
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
510
        new_file_id = inv.path2id(new_path)
511
        if new_file_id is not None:
0.81.9 by Ian Clatworthy
refactor delete_item
512
            self.record_delete(new_path, inv[new_file_id])
0.81.8 by Ian Clatworthy
refactor rename_item
513
        self.record_rename(old_path, new_path, file_id, ie)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
514
0.81.8 by Ian Clatworthy
refactor rename_item
515
        # The revision-id for this entry will be/has been updated and
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
516
        # that means the loader then needs to know what the "new" text is.
517
        # We therefore must go back to the revision store to get it.
0.81.8 by Ian Clatworthy
refactor rename_item
518
        lines = self.rev_store.get_file_lines(rev_id, file_id)
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
519
        self.data_for_commit[file_id] = ''.join(lines)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
520
521
    def _delete_all_items(self, inv):
0.64.320 by Jelmer Vernooij
Fix deleteall handler.
522
        if len(inv) == 0:
523
            return
524
        for path, ie in inv.iter_entries_by_dir():
525
            if path != "":
526
                self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
527
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
528
    def _warn_unless_in_merges(self, fileid, path):
529
        if len(self.parents) <= 1:
530
            return
531
        for parent in self.parents[1:]:
532
            if fileid in self.get_inventory(parent):
533
                return
534
        self.warning("ignoring delete of %s as not in parent inventories", path)
535
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
536
537
class InventoryCommitHandler(GenericCommitHandler):
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
538
    """A CommitHandler that builds and saves Inventory objects."""
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
539
540
    def pre_process_files(self):
541
        super(InventoryCommitHandler, self).pre_process_files()
542
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
543
        # Seed the inventory from the previous one. Note that
544
        # the parent class version of pre_process_files() has
545
        # already set the right basis_inventory for this branch
546
        # but we need to copy it in order to mutate it safely
547
        # without corrupting the cached inventory value.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
548
        if len(self.parents) == 0:
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
549
            self.inventory = self.basis_inventory
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
550
        else:
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
551
            self.inventory = copy_inventory(self.basis_inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
552
        self.inventory_root = self.inventory.root
553
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
554
        # directory-path -> inventory-entry for current inventory
555
        self.directory_entries = dict(self.inventory.directories())
556
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
557
        # Initialise the inventory revision info as required
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
558
        if self.rev_store.expects_rich_root():
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
559
            self.inventory.revision_id = self.revision_id
560
        else:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
561
            # In this revision store, root entries have no knit or weave.
562
            # When serializing out to disk and back in, root.revision is
563
            # always the new revision_id.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
564
            self.inventory.root.revision = self.revision_id
565
566
    def post_process_files(self):
567
        """Save the revision."""
568
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.85.2 by Ian Clatworthy
improve per-file graph generation
569
        self.rev_store.load(self.revision, self.inventory, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
570
            lambda file_id: self._get_data(file_id),
0.85.2 by Ian Clatworthy
improve per-file graph generation
571
            lambda file_id: self._get_per_file_parents(file_id),
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
572
            lambda revision_ids: self._get_inventories(revision_ids))
573
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
574
    def record_new(self, path, ie):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
575
        try:
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
576
            # If this is a merge, the file was most likely added already.
577
            # The per-file parent(s) must therefore be calculated and
578
            # we can't assume there are none.
579
            per_file_parents, ie.revision = \
580
                self.rev_store.get_parents_and_revision_for_entry(ie)
581
            self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
582
            self.inventory.add(ie)
583
        except errors.DuplicateFileId:
584
            # Directory already exists as a file or symlink
585
            del self.inventory[ie.file_id]
586
            # Try again
587
            self.inventory.add(ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
588
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
589
    def record_changed(self, path, ie, parent_id):
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
590
        # HACK: no API for this (del+add does more than it needs to)
0.85.2 by Ian Clatworthy
improve per-file graph generation
591
        per_file_parents, ie.revision = \
592
            self.rev_store.get_parents_and_revision_for_entry(ie)
593
        self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
594
        self.inventory._byid[ie.file_id] = ie
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
595
        parent_ie = self.inventory._byid[parent_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
596
        parent_ie.children[ie.name] = ie
597
0.81.9 by Ian Clatworthy
refactor delete_item
598
    def record_delete(self, path, ie):
599
        self.inventory.remove_recursive_id(ie.file_id)
0.81.8 by Ian Clatworthy
refactor rename_item
600
601
    def record_rename(self, old_path, new_path, file_id, ie):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
602
        # For a rename, the revision-id is always the new one so
603
        # no need to change/set it here
604
        ie.revision = self.revision_id
605
        per_file_parents, _ = \
606
            self.rev_store.get_parents_and_revision_for_entry(ie)
607
        self.per_file_parents_for_commit[file_id] = per_file_parents
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
608
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
609
            self.inventory)
0.81.8 by Ian Clatworthy
refactor rename_item
610
        self.inventory.rename(file_id, new_parent_id, new_basename)
611
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
612
    def modify_handler(self, filecmd):
613
        if filecmd.dataref is not None:
614
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
615
        else:
616
            data = filecmd.data
617
        self.debug("modifying %s", filecmd.path)
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
618
        (kind, is_executable) = mode_to_kind(filecmd.mode)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
619
        self._modify_item(self._decode_path(filecmd.path), kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
620
            is_executable, data, self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
621
622
    def delete_handler(self, filecmd):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
623
        self.debug("deleting %s", filecmd.path)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
624
        self._delete_item(self._decode_path(filecmd.path), self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
625
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
626
    def copy_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
627
        src_path = self._decode_path(filecmd.src_path)
628
        dest_path = self._decode_path(filecmd.dest_path)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
629
        self.debug("copying %s to %s", src_path, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
630
        self._copy_item(src_path, dest_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
631
632
    def rename_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
633
        old_path = self._decode_path(filecmd.old_path)
634
        new_path = self._decode_path(filecmd.new_path)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
635
        self.debug("renaming %s to %s", old_path, new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
636
        self._rename_item(old_path, new_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
637
638
    def deleteall_handler(self, filecmd):
639
        self.debug("deleting all files (and also all directories)")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
640
        self._delete_all_items(self.inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
641
642
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
643
class InventoryDeltaCommitHandler(GenericCommitHandler):
644
    """A CommitHandler that builds Inventories by applying a delta."""
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
645
646
    def pre_process_files(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
647
        super(InventoryDeltaCommitHandler, self).pre_process_files()
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
648
        self._dirs_that_might_become_empty = set()
649
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
650
        # A given file-id can only appear once so we accumulate
651
        # the entries in a dict then build the actual delta at the end
652
        self._delta_entries_by_fileid = {}
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
653
        if len(self.parents) == 0 or not self.rev_store.expects_rich_root():
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
654
            if self.parents:
655
                old_path = ''
656
            else:
657
                old_path = None
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
658
            # Need to explicitly add the root entry for the first revision
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
659
            # and for non rich-root inventories
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
660
            root_id = inventory.ROOT_ID
661
            root_ie = inventory.InventoryDirectory(root_id, u'', None)
662
            root_ie.revision = self.revision_id
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
663
            self._add_entry((old_path, '', root_id, root_ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
664
665
    def post_process_files(self):
666
        """Save the revision."""
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
667
        delta = self._get_final_delta()
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
668
        inv = self.rev_store.load_using_delta(self.revision,
669
            self.basis_inventory, delta, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
670
            self._get_data,
671
            self._get_per_file_parents,
672
            self._get_inventories)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
673
        self.cache_mgr.inventories[self.revision_id] = inv
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
674
        #print "committed %s" % self.revision_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
675
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
676
    def _get_final_delta(self):
677
        """Generate the final delta.
678
679
        Smart post-processing of changes, e.g. pruning of directories
680
        that would become empty, goes here.
681
        """
682
        delta = list(self._delta_entries_by_fileid.values())
683
        if self.prune_empty_dirs and self._dirs_that_might_become_empty:
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
684
            candidates = self._dirs_that_might_become_empty
685
            while candidates:
686
                never_born = set()
687
                parent_dirs_that_might_become_empty = set()
688
                for path, file_id in self._empty_after_delta(delta, candidates):
689
                    newly_added = self._new_file_ids.get(path)
690
                    if newly_added:
691
                        never_born.add(newly_added)
692
                    else:
693
                        delta.append((path, None, file_id, None))
694
                    parent_dir = osutils.dirname(path)
695
                    if parent_dir:
696
                        parent_dirs_that_might_become_empty.add(parent_dir)
697
                candidates = parent_dirs_that_might_become_empty
0.101.5 by Tom Widmer
Add missing tab characters to ensure that never born dirs are correctly removed during each pass of parent directory pruning.
698
                # Clean up entries that got deleted before they were ever added
699
                if never_born:
700
                    delta = [de for de in delta if de[2] not in never_born]
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
701
        return delta
702
703
    def _empty_after_delta(self, delta, candidates):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
704
        #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
705
        #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
706
        new_inv = self._get_proposed_inventory(delta)
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
707
        result = []
708
        for dir in candidates:
709
            file_id = new_inv.path2id(dir)
0.64.219 by Ian Clatworthy
More robust implicit delete logic when file-id not found
710
            if file_id is None:
711
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
712
            ie = new_inv[file_id]
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
713
            if ie.kind != 'directory':
714
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
715
            if len(ie.children) == 0:
716
                result.append((dir, file_id))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
717
                if self.verbose:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
718
                    self.note("pruning empty directory %s" % (dir,))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
719
        return result
720
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
721
    def _get_proposed_inventory(self, delta):
722
        if len(self.parents):
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
723
            # new_inv = self.basis_inventory._get_mutable_inventory()
724
            # Note that this will create unreferenced chk pages if we end up
725
            # deleting entries, because this 'test' inventory won't end up
726
            # used. However, it is cheaper than having to create a full copy of
727
            # the inventory for every commit.
728
            new_inv = self.basis_inventory.create_by_apply_delta(delta,
729
                'not-a-valid-revision-id:')
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
730
        else:
731
            new_inv = inventory.Inventory(revision_id=self.revision_id)
732
            # This is set in the delta so remove it to prevent a duplicate
733
            del new_inv[inventory.ROOT_ID]
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
734
            try:
735
                new_inv.apply_delta(delta)
736
            except errors.InconsistentDelta:
737
                self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
738
                raise
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
739
        return new_inv
740
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
741
    def _add_entry(self, entry):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
742
        # We need to combine the data if multiple entries have the same file-id.
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
743
        # For example, a rename followed by a modification looks like:
744
        #
745
        # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
746
        #
747
        # Likewise, a modification followed by a rename looks like:
748
        #
749
        # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
750
        #
751
        # Here's a rename followed by a delete and a modification followed by
752
        # a delete:
753
        #
754
        # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
755
        # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
756
        #
757
        # In summary, we use the original old-path, new new-path and new ie
758
        # when combining entries.
0.85.2 by Ian Clatworthy
improve per-file graph generation
759
        old_path = entry[0]
760
        new_path = entry[1]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
761
        file_id = entry[2]
0.85.2 by Ian Clatworthy
improve per-file graph generation
762
        ie = entry[3]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
763
        existing = self._delta_entries_by_fileid.get(file_id, None)
764
        if existing is not None:
0.85.2 by Ian Clatworthy
improve per-file graph generation
765
            old_path = existing[0]
766
            entry = (old_path, new_path, file_id, ie)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
767
        if new_path is None and old_path is None:
768
            # This is a delete cancelling a previous add
769
            del self._delta_entries_by_fileid[file_id]
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
770
            parent_dir = osutils.dirname(existing[1])
771
            self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir))
772
            if parent_dir:
773
                self._dirs_that_might_become_empty.add(parent_dir)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
774
            return
775
        else:
776
            self._delta_entries_by_fileid[file_id] = entry
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
777
0.99.6 by Ian Clatworthy
Handle rename of a just added file
778
        # Collect parent directories that might become empty
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
779
        if new_path is None:
780
            # delete
781
            parent_dir = osutils.dirname(old_path)
782
            # note: no need to check the root
783
            if parent_dir:
784
                self._dirs_that_might_become_empty.add(parent_dir)
785
        elif old_path is not None and old_path != new_path:
786
            # rename
787
            old_parent_dir = osutils.dirname(old_path)
788
            new_parent_dir = osutils.dirname(new_path)
789
            if old_parent_dir and old_parent_dir != new_parent_dir:
790
                self._dirs_that_might_become_empty.add(old_parent_dir)
791
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
792
        # Calculate the per-file parents, if not already done
793
        if file_id in self.per_file_parents_for_commit:
794
            return
0.85.2 by Ian Clatworthy
improve per-file graph generation
795
        if old_path is None:
796
            # add
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
797
            # If this is a merge, the file was most likely added already.
798
            # The per-file parent(s) must therefore be calculated and
799
            # we can't assume there are none.
800
            per_file_parents, ie.revision = \
801
                self.rev_store.get_parents_and_revision_for_entry(ie)
802
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
803
        elif new_path is None:
804
            # delete
805
            pass
806
        elif old_path != new_path:
807
            # rename
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
808
            per_file_parents, _ = \
809
                self.rev_store.get_parents_and_revision_for_entry(ie)
810
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
811
        else:
812
            # modify
813
            per_file_parents, ie.revision = \
814
                self.rev_store.get_parents_and_revision_for_entry(ie)
815
            self.per_file_parents_for_commit[file_id] = per_file_parents
816
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
817
    def record_new(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
818
        self._add_entry((None, path, ie.file_id, ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
819
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
820
    def record_changed(self, path, ie, parent_id=None):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
821
        self._add_entry((path, path, ie.file_id, ie))
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
822
        self._modified_file_ids[path] = ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
823
0.81.9 by Ian Clatworthy
refactor delete_item
824
    def record_delete(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
825
        self._add_entry((path, None, ie.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
826
        self._paths_deleted_this_commit.add(path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
827
        if ie.kind == 'directory':
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
828
            try:
829
                del self.directory_entries[path]
830
            except KeyError:
831
                pass
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
832
            for child_relpath, entry in \
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
833
                self.basis_inventory.iter_entries_by_dir(from_dir=ie):
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
834
                child_path = osutils.pathjoin(path, child_relpath)
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
835
                self._add_entry((child_path, None, entry.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
836
                self._paths_deleted_this_commit.add(child_path)
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
837
                if entry.kind == 'directory':
838
                    try:
839
                        del self.directory_entries[child_path]
840
                    except KeyError:
841
                        pass
0.81.8 by Ian Clatworthy
refactor rename_item
842
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
843
    def record_rename(self, old_path, new_path, file_id, old_ie):
844
        new_ie = old_ie.copy()
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
845
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
846
            self.basis_inventory)
847
        new_ie.name = new_basename
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
848
        new_ie.parent_id = new_parent_id
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
849
        new_ie.revision = self.revision_id
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
850
        self._add_entry((old_path, new_path, file_id, new_ie))
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
851
        self._modified_file_ids[new_path] = file_id
0.64.233 by Ian Clatworthy
Handle delete, rename then modify all in the one commit
852
        self._paths_deleted_this_commit.discard(new_path)
0.64.234 by Ian Clatworthy
Make sure renamed directories are found in file-id lookups
853
        if new_ie.kind == 'directory':
854
            self.directory_entries[new_path] = new_ie
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
855
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
856
    def _rename_pending_change(self, old_path, new_path, file_id):
857
        """Instead of adding/modifying old-path, add new-path instead."""
0.99.6 by Ian Clatworthy
Handle rename of a just added file
858
        # note: delta entries look like (old, new, file-id, ie)
859
        old_ie = self._delta_entries_by_fileid[file_id][3]
860
861
        # Delete the old path. Note that this might trigger implicit
862
        # deletion of newly created parents that could now become empty.
863
        self.record_delete(old_path, old_ie)
864
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
865
        # Update the dictionaries used for tracking new file-ids
866
        if old_path in self._new_file_ids:
867
            del self._new_file_ids[old_path]
868
        else:
869
            del self._modified_file_ids[old_path]
0.99.6 by Ian Clatworthy
Handle rename of a just added file
870
        self._new_file_ids[new_path] = file_id
871
872
        # Create the new InventoryEntry
873
        kind = old_ie.kind
874
        basename, parent_id = self._ensure_directory(new_path,
875
            self.basis_inventory)
876
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
877
        ie.revision = self.revision_id
878
        if kind == 'file':
879
            ie.executable = old_ie.executable
880
            ie.text_sha1 = old_ie.text_sha1
881
            ie.text_size = old_ie.text_size
882
        elif kind == 'symlink':
883
            ie.symlink_target = old_ie.symlink_target
884
885
        # Record it
886
        self.record_new(new_path, ie)
887
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
888
    def modify_handler(self, filecmd):
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
889
        (kind, executable) = mode_to_kind(filecmd.mode)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
890
        if filecmd.dataref is not None:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
891
            if kind == "directory":
0.102.14 by Ian Clatworthy
export and import empty directories
892
                data = None
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
893
            elif kind == "tree-reference":
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
894
                data = filecmd.dataref
895
            else:
896
                data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
897
        else:
898
            data = filecmd.data
899
        self.debug("modifying %s", filecmd.path)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
900
        decoded_path = self._decode_path(filecmd.path)
901
        self._modify_item(decoded_path, kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
902
            executable, data, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
903
904
    def delete_handler(self, filecmd):
905
        self.debug("deleting %s", filecmd.path)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
906
        self._delete_item(
907
            self._decode_path(filecmd.path), self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
908
909
    def copy_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
910
        src_path = self._decode_path(filecmd.src_path)
911
        dest_path = self._decode_path(filecmd.dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
912
        self.debug("copying %s to %s", src_path, dest_path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
913
        self._copy_item(src_path, dest_path, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
914
915
    def rename_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
916
        old_path = self._decode_path(filecmd.old_path)
917
        new_path = self._decode_path(filecmd.new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
918
        self.debug("renaming %s to %s", old_path, new_path)
919
        self._rename_item(old_path, new_path, self.basis_inventory)
920
921
    def deleteall_handler(self, filecmd):
922
        self.debug("deleting all files (and also all directories)")
923
        self._delete_all_items(self.basis_inventory)