/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""CommitHandlers that build and save revisions & their inventories."""
18
19
20
from bzrlib import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
21
    debug,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
22
    errors,
23
    generate_ids,
24
    inventory,
25
    osutils,
26
    revision,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
27
    serializer,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
28
    )
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
29
from bzrlib.trace import (
30
    mutter,
31
    note,
32
    warning,
33
    )
0.123.2 by Jelmer Vernooij
Split out fastimport, import it from the system.
34
from fastimport import (
0.123.1 by Jelmer Vernooij
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
35
    helpers,
36
    processor,
37
    )
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
38
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
39
from bzrlib.plugins.fastimport.helpers import (
40
    mode_to_kind,
41
    )
42
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
43
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
44
_serializer_handles_escaping = hasattr(serializer.Serializer,
45
    'squashes_xml_invalid_characters')
46
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
47
def copy_inventory(inv):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
48
    # This currently breaks revision-id matching
49
    #if hasattr(inv, "_get_mutable_inventory"):
50
    #    # TODO: Make this a public API on inventory
51
    #    return inv._get_mutable_inventory()
52
53
    # TODO: Shallow copy - deep inventory copying is expensive
54
    return inv.copy()
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
55
56
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
57
class GenericCommitHandler(processor.CommitHandler):
58
    """Base class for Bazaar CommitHandlers."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
59
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
60
    def __init__(self, command, cache_mgr, rev_store, verbose=False,
61
        prune_empty_dirs=True):
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
62
        super(GenericCommitHandler, self).__init__(command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
63
        self.cache_mgr = cache_mgr
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
64
        self.rev_store = rev_store
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
65
        self.verbose = verbose
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
66
        self.branch_ref = command.ref
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
67
        self.prune_empty_dirs = prune_empty_dirs
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
68
        # This tracks path->file-id for things we're creating this commit.
69
        # If the same path is created multiple times, we need to warn the
70
        # user and add it just once.
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
71
        # If a path is added then renamed or copied, we need to handle that.
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
72
        self._new_file_ids = {}
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
73
        # This tracks path->file-id for things we're modifying this commit.
74
        # If a path is modified then renamed or copied, we need the make
75
        # sure we grab the new content.
76
        self._modified_file_ids = {}
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
77
        # This tracks the paths for things we're deleting this commit.
78
        # If the same path is added or the destination of a rename say,
79
        # then a fresh file-id is required.
80
        self._paths_deleted_this_commit = set()
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
81
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
82
    def mutter(self, msg, *args):
83
        """Output a mutter but add context."""
84
        msg = "%s (%s)" % (msg, self.command.id)
85
        mutter(msg, *args)
86
87
    def debug(self, msg, *args):
88
        """Output a mutter if the appropriate -D option was given."""
89
        if "fast-import" in debug.debug_flags:
90
            msg = "%s (%s)" % (msg, self.command.id)
91
            mutter(msg, *args)
92
93
    def note(self, msg, *args):
94
        """Output a note but add context."""
95
        msg = "%s (%s)" % (msg, self.command.id)
96
        note(msg, *args)
97
98
    def warning(self, msg, *args):
99
        """Output a warning but add context."""
100
        msg = "%s (%s)" % (msg, self.command.id)
101
        warning(msg, *args)
102
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
103
    def pre_process_files(self):
104
        """Prepare for committing."""
105
        self.revision_id = self.gen_revision_id()
106
        # cache of texts for this commit, indexed by file-id
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
107
        self.data_for_commit = {}
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
108
        #if self.rev_store.expects_rich_root():
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
109
        self.data_for_commit[inventory.ROOT_ID] = []
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
110
111
        # Track the heads and get the real parent list
0.123.6 by Jelmer Vernooij
Split out reftracker.
112
        parents = self.cache_mgr.reftracker.track_heads(self.command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
113
114
        # Convert the parent commit-ids to bzr revision-ids
115
        if parents:
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
116
            self.parents = [self.cache_mgr.lookup_committish(p)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
117
                for p in parents]
118
        else:
119
            self.parents = []
120
        self.debug("%s id: %s, parents: %s", self.command.id,
121
            self.revision_id, str(self.parents))
122
0.85.2 by Ian Clatworthy
improve per-file graph generation
123
        # Tell the RevisionStore we're starting a new commit
124
        self.revision = self.build_revision()
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
125
        self.parent_invs = [self.get_inventory(p) for p in self.parents]
0.85.2 by Ian Clatworthy
improve per-file graph generation
126
        self.rev_store.start_new_revision(self.revision, self.parents,
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
127
            self.parent_invs)
0.85.2 by Ian Clatworthy
improve per-file graph generation
128
129
        # cache of per-file parents for this commit, indexed by file-id
130
        self.per_file_parents_for_commit = {}
131
        if self.rev_store.expects_rich_root():
0.64.160 by Ian Clatworthy
make per-file parents tuples and fix text loading in chk formats
132
            self.per_file_parents_for_commit[inventory.ROOT_ID] = ()
0.85.2 by Ian Clatworthy
improve per-file graph generation
133
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
134
        # Keep the basis inventory. This needs to be treated as read-only.
135
        if len(self.parents) == 0:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
136
            self.basis_inventory = self._init_inventory()
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
137
        else:
138
            self.basis_inventory = self.get_inventory(self.parents[0])
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
139
        if hasattr(self.basis_inventory, "root_id"):
140
            self.inventory_root_id = self.basis_inventory.root_id
141
        else:
142
            self.inventory_root_id = self.basis_inventory.root.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
143
144
        # directory-path -> inventory-entry for current inventory
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
145
        self.directory_entries = {}
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
146
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
147
    def _init_inventory(self):
148
        return self.rev_store.init_inventory(self.revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
149
150
    def get_inventory(self, revision_id):
151
        """Get the inventory for a revision id."""
152
        try:
153
            inv = self.cache_mgr.inventories[revision_id]
154
        except KeyError:
155
            if self.verbose:
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
156
                self.mutter("get_inventory cache miss for %s", revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
157
            # Not cached so reconstruct from the RevisionStore
158
            inv = self.rev_store.get_inventory(revision_id)
159
            self.cache_mgr.inventories[revision_id] = inv
160
        return inv
161
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
162
    def _get_data(self, file_id):
163
        """Get the data bytes for a file-id."""
164
        return self.data_for_commit[file_id]
165
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
166
    def _get_lines(self, file_id):
167
        """Get the lines for a file-id."""
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
168
        return osutils.split_lines(self._get_data(file_id))
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
169
0.85.2 by Ian Clatworthy
improve per-file graph generation
170
    def _get_per_file_parents(self, file_id):
171
        """Get the lines for a file-id."""
172
        return self.per_file_parents_for_commit[file_id]
173
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
174
    def _get_inventories(self, revision_ids):
175
        """Get the inventories for revision-ids.
176
        
177
        This is a callback used by the RepositoryStore to
178
        speed up inventory reconstruction.
179
        """
180
        present = []
181
        inventories = []
182
        # If an inventory is in the cache, we assume it was
183
        # successfully loaded into the revision store
184
        for revision_id in revision_ids:
185
            try:
186
                inv = self.cache_mgr.inventories[revision_id]
187
                present.append(revision_id)
188
            except KeyError:
189
                if self.verbose:
190
                    self.note("get_inventories cache miss for %s", revision_id)
191
                # Not cached so reconstruct from the revision store
192
                try:
193
                    inv = self.get_inventory(revision_id)
194
                    present.append(revision_id)
195
                except:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
196
                    inv = self._init_inventory()
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
197
                self.cache_mgr.inventories[revision_id] = inv
198
            inventories.append(inv)
199
        return present, inventories
200
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
201
    def bzr_file_id_and_new(self, path):
202
        """Get a Bazaar file identifier and new flag for a path.
203
        
204
        :return: file_id, is_new where
205
          is_new = True if the file_id is newly created
206
        """
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
207
        if path not in self._paths_deleted_this_commit:
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
208
            # Try file-ids renamed in this commit
209
            id = self._modified_file_ids.get(path)
210
            if id is not None:
211
                return id, False
212
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
213
            # Try the basis inventory
214
            id = self.basis_inventory.path2id(path)
215
            if id is not None:
216
                return id, False
217
            
218
            # Try the other inventories
219
            if len(self.parents) > 1:
220
                for inv in self.parent_invs[1:]:
221
                    id = self.basis_inventory.path2id(path)
222
                    if id is not None:
223
                        return id, False
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
224
225
        # Doesn't exist yet so create it
0.64.247 by Ian Clatworthy
base file-ids on the basename, not path, as jam suggested. This improves the samba import from 565M to 353M.
226
        dirname, basename = osutils.split(path)
227
        id = generate_ids.gen_file_id(basename)
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
228
        self.debug("Generated new file id %s for '%s' in revision-id '%s'",
229
            id, path, self.revision_id)
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
230
        self._new_file_ids[path] = id
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
231
        return id, True
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
232
233
    def bzr_file_id(self, path):
234
        """Get a Bazaar file identifier for a path."""
235
        return self.bzr_file_id_and_new(path)[0]
236
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
237
    def _utf8_decode(self, field, value):
238
        try:
239
            return value.decode('utf_8')
240
        except UnicodeDecodeError:
241
            # The spec says fields are *typically* utf8 encoded
242
            # but that isn't enforced by git-fast-export (at least)
243
            self.warning("%s not in utf8 - replacing unknown "
244
                "characters" % (field,))
245
            return value.decode('utf_8', 'replace')
246
247
    def _format_name_email(self, section, name, email):
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
248
        """Format name & email as a string."""
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
249
        name = self._utf8_decode("%s name" % section, name)
250
        email = self._utf8_decode("%s email" % section, email)
251
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
252
        if email:
253
            return "%s <%s>" % (name, email)
254
        else:
255
            return name
256
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
257
    def gen_revision_id(self):
258
        """Generate a revision id.
259
260
        Subclasses may override this to produce deterministic ids say.
261
        """
262
        committer = self.command.committer
263
        # Perhaps 'who' being the person running the import is ok? If so,
264
        # it might be a bit quicker and give slightly better compression?
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
265
        who = self._format_name_email("committer", committer[0], committer[1])
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
266
        timestamp = committer[2]
267
        return generate_ids.gen_revision_id(who, timestamp)
268
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
269
    def build_revision(self):
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
270
        rev_props = self._legal_revision_properties(self.command.properties)
0.112.5 by Max Bowsher
Default branch-nick to mapped git ref name.
271
        if 'branch-nick' not in rev_props:
272
            rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
273
                    self.branch_ref)
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
274
        self._save_author_info(rev_props)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
275
        committer = self.command.committer
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
276
        who = self._format_name_email("committer", committer[0], committer[1])
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
277
        try:
278
            message = self.command.message.decode("utf-8")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
279
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
280
        except UnicodeDecodeError:
281
            self.warning(
282
                "commit message not in utf8 - replacing unknown characters")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
283
            message = self.command.message.decode('utf-8', 'replace')
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
284
        if not _serializer_handles_escaping:
285
            # We need to assume the bad ol' days
286
            message = helpers.escape_commit_message(message)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
287
        return revision.Revision(
288
           timestamp=committer[2],
289
           timezone=committer[3],
290
           committer=who,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
291
           message=message,
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
292
           revision_id=self.revision_id,
293
           properties=rev_props,
294
           parent_ids=self.parents)
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
295
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
296
    def _legal_revision_properties(self, props):
297
        """Clean-up any revision properties we can't handle."""
298
        # For now, we just check for None because that's not allowed in 2.0rc1
299
        result = {}
300
        if props is not None:
301
            for name, value in props.items():
302
                if value is None:
303
                    self.warning(
304
                        "converting None to empty string for property %s"
305
                        % (name,))
306
                    result[name] = ''
307
                else:
308
                    result[name] = value
309
        return result
310
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
311
    def _save_author_info(self, rev_props):
312
        author = self.command.author
313
        if author is None:
314
            return
315
        if self.command.more_authors:
316
            authors = [author] + self.command.more_authors
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
317
            author_ids = [self._format_name_email("author", a[0], a[1]) for a in authors]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
318
        elif author != self.command.committer:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
319
            author_ids = [self._format_name_email("author", author[0], author[1])]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
320
        else:
321
            return
322
        # If we reach here, there are authors worth storing
323
        rev_props['authors'] = "\n".join(author_ids)
324
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
325
    def _modify_item(self, path, kind, is_executable, data, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
326
        """Add to or change an item in the inventory."""
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
327
        # If we've already added this, warn the user that we're ignoring it.
328
        # In the future, it might be nice to double check that the new data
329
        # is the same as the old but, frankly, exporters should be fixed
330
        # not to produce bad data streams in the first place ...
331
        existing = self._new_file_ids.get(path)
332
        if existing:
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
333
            # We don't warn about directories because it's fine for them
334
            # to be created already by a previous rename
335
            if kind != 'directory':
336
                self.warning("%s already added in this commit - ignoring" %
337
                    (path,))
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
338
            return
339
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
340
        # Create the new InventoryEntry
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
341
        basename, parent_id = self._ensure_directory(path, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
342
        file_id = self.bzr_file_id(path)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
343
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
344
        ie.revision = self.revision_id
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
345
        if kind == 'file':
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
346
            ie.executable = is_executable
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
347
            # lines = osutils.split_lines(data)
348
            ie.text_sha1 = osutils.sha_string(data)
349
            ie.text_size = len(data)
350
            self.data_for_commit[file_id] = data
0.102.14 by Ian Clatworthy
export and import empty directories
351
        elif kind == 'directory':
352
            self.directory_entries[path] = ie
353
            # There are no lines stored for a directory so
354
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
355
            self.data_for_commit[file_id] = ''
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
356
        elif kind == 'symlink':
0.124.1 by Daniel Clemente
pass unicode object (rather than str) to match CHKInventory._entry_to_bytes requirements
357
            ie.symlink_target = data.decode('utf8')
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
358
            # There are no lines stored for a symlink so
359
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
360
            self.data_for_commit[file_id] = ''
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
361
        else:
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
362
            self.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
363
                % (kind, path))
364
            return
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
365
        # Record it
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
366
        if file_id in inv:
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
367
            old_ie = inv[file_id]
368
            if old_ie.kind == 'directory':
369
                self.record_delete(path, old_ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
370
            self.record_changed(path, ie, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
371
        else:
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
372
            try:
373
                self.record_new(path, ie)
374
            except:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
375
                print "failed to add path '%s' with entry '%s' in command %s" \
376
                    % (path, ie, self.command.id)
377
                print "parent's children are:\n%r\n" % (ie.parent_id.children,)
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
378
                raise
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
379
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
380
    def _ensure_directory(self, path, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
381
        """Ensure that the containing directory exists for 'path'"""
382
        dirname, basename = osutils.split(path)
383
        if dirname == '':
384
            # the root node doesn't get updated
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
385
            return basename, self.inventory_root_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
386
        try:
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
387
            ie = self._get_directory_entry(inv, dirname)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
388
        except KeyError:
389
            # We will create this entry, since it doesn't exist
390
            pass
391
        else:
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
392
            return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
393
394
        # No directory existed, we will just create one, first, make sure
395
        # the parent exists
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
396
        dir_basename, parent_id = self._ensure_directory(dirname, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
397
        dir_file_id = self.bzr_file_id(dirname)
398
        ie = inventory.entry_factory['directory'](dir_file_id,
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
399
            dir_basename, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
400
        ie.revision = self.revision_id
401
        self.directory_entries[dirname] = ie
402
        # There are no lines stored for a directory so
403
        # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
404
        self.data_for_commit[dir_file_id] = ''
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
405
406
        # It's possible that a file or symlink with that file-id
407
        # already exists. If it does, we need to delete it.
408
        if dir_file_id in inv:
409
            self.record_delete(dirname, ie)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
410
        self.record_new(dirname, ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
411
        return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
412
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
413
    def _get_directory_entry(self, inv, dirname):
414
        """Get the inventory entry for a directory.
415
        
416
        Raises KeyError if dirname is not a directory in inv.
417
        """
418
        result = self.directory_entries.get(dirname)
419
        if result is None:
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
420
            if dirname in self._paths_deleted_this_commit:
421
                raise KeyError
0.64.146 by Ian Clatworthy
fix first file is in a subdirectory bug for chk formats
422
            try:
423
                file_id = inv.path2id(dirname)
424
            except errors.NoSuchId:
425
                # In a CHKInventory, this is raised if there's no root yet
426
                raise KeyError
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
427
            if file_id is None:
428
                raise KeyError
429
            result = inv[file_id]
430
            # dirname must be a directory for us to return it
431
            if result.kind == 'directory':
432
                self.directory_entries[dirname] = result
433
            else:
434
                raise KeyError
435
        return result
436
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
437
    def _delete_item(self, path, inv):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
438
        newly_added = self._new_file_ids.get(path)
439
        if newly_added:
440
            # We've only just added this path earlier in this commit.
441
            file_id = newly_added
442
            # note: delta entries look like (old, new, file-id, ie)
443
            ie = self._delta_entries_by_fileid[file_id][3]
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
444
        else:
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
445
            file_id = inv.path2id(path)
446
            if file_id is None:
447
                self.mutter("ignoring delete of %s as not in inventory", path)
448
                return
449
            try:
450
                ie = inv[file_id]
451
            except errors.NoSuchId:
452
                self.mutter("ignoring delete of %s as not in inventory", path)
453
                return
454
        self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
455
456
    def _copy_item(self, src_path, dest_path, inv):
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
457
        newly_changed = self._new_file_ids.get(src_path) or \
458
            self._modified_file_ids.get(src_path)
459
        if newly_changed:
460
            # We've only just added/changed this path earlier in this commit.
461
            file_id = newly_changed
0.99.8 by Ian Clatworthy
handle copy of a newly added file
462
            # note: delta entries look like (old, new, file-id, ie)
463
            ie = self._delta_entries_by_fileid[file_id][3]
464
        else:
465
            file_id = inv.path2id(src_path)
466
            if file_id is None:
467
                self.warning("ignoring copy of %s to %s - source does not exist",
468
                    src_path, dest_path)
469
                return
470
            ie = inv[file_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
471
        kind = ie.kind
472
        if kind == 'file':
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
473
            if newly_changed:
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
474
                content = self.data_for_commit[file_id]
0.99.8 by Ian Clatworthy
handle copy of a newly added file
475
            else:
476
                content = self.rev_store.get_file_text(self.parents[0], file_id)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
477
            self._modify_item(dest_path, kind, ie.executable, content, inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
478
        elif kind == 'symlink':
0.64.289 by Jelmer Vernooij
Cope with non-ascii characters in symbolic links.
479
            self._modify_item(dest_path, kind, False, ie.symlink_target.encode("utf-8"), inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
480
        else:
481
            self.warning("ignoring copy of %s %s - feature not yet supported",
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
482
                kind, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
483
484
    def _rename_item(self, old_path, new_path, inv):
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
485
        existing = self._new_file_ids.get(old_path) or \
486
            self._modified_file_ids.get(old_path)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
487
        if existing:
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
488
            # We've only just added/modified this path earlier in this commit.
489
            # Change the add/modify of old_path to an add of new_path
490
            self._rename_pending_change(old_path, new_path, existing)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
491
            return
492
0.81.8 by Ian Clatworthy
refactor rename_item
493
        file_id = inv.path2id(old_path)
0.64.167 by Ian Clatworthy
incremental packing for chk formats
494
        if file_id is None:
495
            self.warning(
496
                "ignoring rename of %s to %s - old path does not exist" %
497
                (old_path, new_path))
498
            return
0.81.8 by Ian Clatworthy
refactor rename_item
499
        ie = inv[file_id]
500
        rev_id = ie.revision
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
501
        new_file_id = inv.path2id(new_path)
502
        if new_file_id is not None:
0.81.9 by Ian Clatworthy
refactor delete_item
503
            self.record_delete(new_path, inv[new_file_id])
0.81.8 by Ian Clatworthy
refactor rename_item
504
        self.record_rename(old_path, new_path, file_id, ie)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
505
0.81.8 by Ian Clatworthy
refactor rename_item
506
        # The revision-id for this entry will be/has been updated and
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
507
        # that means the loader then needs to know what the "new" text is.
508
        # We therefore must go back to the revision store to get it.
0.81.8 by Ian Clatworthy
refactor rename_item
509
        lines = self.rev_store.get_file_lines(rev_id, file_id)
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
510
        self.data_for_commit[file_id] = ''.join(lines)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
511
512
    def _delete_all_items(self, inv):
513
        for name, root_item in inv.root.children.iteritems():
514
            inv.remove_recursive_id(root_item.file_id)
515
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
516
    def _warn_unless_in_merges(self, fileid, path):
517
        if len(self.parents) <= 1:
518
            return
519
        for parent in self.parents[1:]:
520
            if fileid in self.get_inventory(parent):
521
                return
522
        self.warning("ignoring delete of %s as not in parent inventories", path)
523
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
524
525
class InventoryCommitHandler(GenericCommitHandler):
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
526
    """A CommitHandler that builds and saves Inventory objects."""
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
527
528
    def pre_process_files(self):
529
        super(InventoryCommitHandler, self).pre_process_files()
530
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
531
        # Seed the inventory from the previous one. Note that
532
        # the parent class version of pre_process_files() has
533
        # already set the right basis_inventory for this branch
534
        # but we need to copy it in order to mutate it safely
535
        # without corrupting the cached inventory value.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
536
        if len(self.parents) == 0:
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
537
            self.inventory = self.basis_inventory
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
538
        else:
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
539
            self.inventory = copy_inventory(self.basis_inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
540
        self.inventory_root = self.inventory.root
541
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
542
        # directory-path -> inventory-entry for current inventory
543
        self.directory_entries = dict(self.inventory.directories())
544
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
545
        # Initialise the inventory revision info as required
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
546
        if self.rev_store.expects_rich_root():
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
547
            self.inventory.revision_id = self.revision_id
548
        else:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
549
            # In this revision store, root entries have no knit or weave.
550
            # When serializing out to disk and back in, root.revision is
551
            # always the new revision_id.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
552
            self.inventory.root.revision = self.revision_id
553
554
    def post_process_files(self):
555
        """Save the revision."""
556
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.85.2 by Ian Clatworthy
improve per-file graph generation
557
        self.rev_store.load(self.revision, self.inventory, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
558
            lambda file_id: self._get_data(file_id),
0.85.2 by Ian Clatworthy
improve per-file graph generation
559
            lambda file_id: self._get_per_file_parents(file_id),
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
560
            lambda revision_ids: self._get_inventories(revision_ids))
561
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
562
    def record_new(self, path, ie):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
563
        try:
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
564
            # If this is a merge, the file was most likely added already.
565
            # The per-file parent(s) must therefore be calculated and
566
            # we can't assume there are none.
567
            per_file_parents, ie.revision = \
568
                self.rev_store.get_parents_and_revision_for_entry(ie)
569
            self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
570
            self.inventory.add(ie)
571
        except errors.DuplicateFileId:
572
            # Directory already exists as a file or symlink
573
            del self.inventory[ie.file_id]
574
            # Try again
575
            self.inventory.add(ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
576
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
577
    def record_changed(self, path, ie, parent_id):
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
578
        # HACK: no API for this (del+add does more than it needs to)
0.85.2 by Ian Clatworthy
improve per-file graph generation
579
        per_file_parents, ie.revision = \
580
            self.rev_store.get_parents_and_revision_for_entry(ie)
581
        self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
582
        self.inventory._byid[ie.file_id] = ie
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
583
        parent_ie = self.inventory._byid[parent_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
584
        parent_ie.children[ie.name] = ie
585
0.81.9 by Ian Clatworthy
refactor delete_item
586
    def record_delete(self, path, ie):
587
        self.inventory.remove_recursive_id(ie.file_id)
0.81.8 by Ian Clatworthy
refactor rename_item
588
589
    def record_rename(self, old_path, new_path, file_id, ie):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
590
        # For a rename, the revision-id is always the new one so
591
        # no need to change/set it here
592
        ie.revision = self.revision_id
593
        per_file_parents, _ = \
594
            self.rev_store.get_parents_and_revision_for_entry(ie)
595
        self.per_file_parents_for_commit[file_id] = per_file_parents
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
596
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
597
            self.inventory)
0.81.8 by Ian Clatworthy
refactor rename_item
598
        self.inventory.rename(file_id, new_parent_id, new_basename)
599
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
600
    def modify_handler(self, filecmd):
601
        if filecmd.dataref is not None:
602
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
603
        else:
604
            data = filecmd.data
605
        self.debug("modifying %s", filecmd.path)
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
606
        (kind, is_executable) = mode_to_kind(filecmd.mode)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
607
        self._modify_item(filecmd.path.decode('utf8'), kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
608
            is_executable, data, self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
609
610
    def delete_handler(self, filecmd):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
611
        self.debug("deleting %s", filecmd.path)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
612
        self._delete_item(filecmd.path.decode('utf8'), self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
613
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
614
    def copy_handler(self, filecmd):
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
615
        src_path = filecmd.src_path.decode('utf8')
616
        dest_path = filecmd.dest_path.decode('utf8')
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
617
        self.debug("copying %s to %s", src_path, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
618
        self._copy_item(src_path, dest_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
619
620
    def rename_handler(self, filecmd):
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
621
        old_path = filecmd.old_path.decode('utf8')
622
        new_path = filecmd.new_path.decode('utf8')
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
623
        self.debug("renaming %s to %s", old_path, new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
624
        self._rename_item(old_path, new_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
625
626
    def deleteall_handler(self, filecmd):
627
        self.debug("deleting all files (and also all directories)")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
628
        self._delete_all_items(self.inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
629
630
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
631
class InventoryDeltaCommitHandler(GenericCommitHandler):
632
    """A CommitHandler that builds Inventories by applying a delta."""
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
633
634
    def pre_process_files(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
635
        super(InventoryDeltaCommitHandler, self).pre_process_files()
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
636
        self._dirs_that_might_become_empty = set()
637
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
638
        # A given file-id can only appear once so we accumulate
639
        # the entries in a dict then build the actual delta at the end
640
        self._delta_entries_by_fileid = {}
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
641
        if len(self.parents) == 0 or not self.rev_store.expects_rich_root():
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
642
            if self.parents:
643
                old_path = ''
644
            else:
645
                old_path = None
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
646
            # Need to explicitly add the root entry for the first revision
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
647
            # and for non rich-root inventories
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
648
            root_id = inventory.ROOT_ID
649
            root_ie = inventory.InventoryDirectory(root_id, u'', None)
650
            root_ie.revision = self.revision_id
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
651
            self._add_entry((old_path, '', root_id, root_ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
652
653
    def post_process_files(self):
654
        """Save the revision."""
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
655
        delta = self._get_final_delta()
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
656
        inv = self.rev_store.load_using_delta(self.revision,
657
            self.basis_inventory, delta, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
658
            self._get_data,
659
            self._get_per_file_parents,
660
            self._get_inventories)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
661
        self.cache_mgr.inventories[self.revision_id] = inv
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
662
        #print "committed %s" % self.revision_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
663
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
664
    def _get_final_delta(self):
665
        """Generate the final delta.
666
667
        Smart post-processing of changes, e.g. pruning of directories
668
        that would become empty, goes here.
669
        """
670
        delta = list(self._delta_entries_by_fileid.values())
671
        if self.prune_empty_dirs and self._dirs_that_might_become_empty:
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
672
            candidates = self._dirs_that_might_become_empty
673
            while candidates:
674
                never_born = set()
675
                parent_dirs_that_might_become_empty = set()
676
                for path, file_id in self._empty_after_delta(delta, candidates):
677
                    newly_added = self._new_file_ids.get(path)
678
                    if newly_added:
679
                        never_born.add(newly_added)
680
                    else:
681
                        delta.append((path, None, file_id, None))
682
                    parent_dir = osutils.dirname(path)
683
                    if parent_dir:
684
                        parent_dirs_that_might_become_empty.add(parent_dir)
685
                candidates = parent_dirs_that_might_become_empty
0.101.5 by Tom Widmer
Add missing tab characters to ensure that never born dirs are correctly removed during each pass of parent directory pruning.
686
                # Clean up entries that got deleted before they were ever added
687
                if never_born:
688
                    delta = [de for de in delta if de[2] not in never_born]
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
689
        return delta
690
691
    def _empty_after_delta(self, delta, candidates):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
692
        #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
693
        #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
694
        new_inv = self._get_proposed_inventory(delta)
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
695
        result = []
696
        for dir in candidates:
697
            file_id = new_inv.path2id(dir)
0.64.219 by Ian Clatworthy
More robust implicit delete logic when file-id not found
698
            if file_id is None:
699
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
700
            ie = new_inv[file_id]
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
701
            if ie.kind != 'directory':
702
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
703
            if len(ie.children) == 0:
704
                result.append((dir, file_id))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
705
                if self.verbose:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
706
                    self.note("pruning empty directory %s" % (dir,))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
707
        return result
708
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
709
    def _get_proposed_inventory(self, delta):
710
        if len(self.parents):
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
711
            # new_inv = self.basis_inventory._get_mutable_inventory()
712
            # Note that this will create unreferenced chk pages if we end up
713
            # deleting entries, because this 'test' inventory won't end up
714
            # used. However, it is cheaper than having to create a full copy of
715
            # the inventory for every commit.
716
            new_inv = self.basis_inventory.create_by_apply_delta(delta,
717
                'not-a-valid-revision-id:')
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
718
        else:
719
            new_inv = inventory.Inventory(revision_id=self.revision_id)
720
            # This is set in the delta so remove it to prevent a duplicate
721
            del new_inv[inventory.ROOT_ID]
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
722
            try:
723
                new_inv.apply_delta(delta)
724
            except errors.InconsistentDelta:
725
                self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
726
                raise
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
727
        return new_inv
728
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
729
    def _add_entry(self, entry):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
730
        # We need to combine the data if multiple entries have the same file-id.
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
731
        # For example, a rename followed by a modification looks like:
732
        #
733
        # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
734
        #
735
        # Likewise, a modification followed by a rename looks like:
736
        #
737
        # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
738
        #
739
        # Here's a rename followed by a delete and a modification followed by
740
        # a delete:
741
        #
742
        # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
743
        # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
744
        #
745
        # In summary, we use the original old-path, new new-path and new ie
746
        # when combining entries.
0.85.2 by Ian Clatworthy
improve per-file graph generation
747
        old_path = entry[0]
748
        new_path = entry[1]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
749
        file_id = entry[2]
0.85.2 by Ian Clatworthy
improve per-file graph generation
750
        ie = entry[3]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
751
        existing = self._delta_entries_by_fileid.get(file_id, None)
752
        if existing is not None:
0.85.2 by Ian Clatworthy
improve per-file graph generation
753
            old_path = existing[0]
754
            entry = (old_path, new_path, file_id, ie)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
755
        if new_path is None and old_path is None:
756
            # This is a delete cancelling a previous add
757
            del self._delta_entries_by_fileid[file_id]
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
758
            parent_dir = osutils.dirname(existing[1])
759
            self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir))
760
            if parent_dir:
761
                self._dirs_that_might_become_empty.add(parent_dir)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
762
            return
763
        else:
764
            self._delta_entries_by_fileid[file_id] = entry
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
765
0.99.6 by Ian Clatworthy
Handle rename of a just added file
766
        # Collect parent directories that might become empty
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
767
        if new_path is None:
768
            # delete
769
            parent_dir = osutils.dirname(old_path)
770
            # note: no need to check the root
771
            if parent_dir:
772
                self._dirs_that_might_become_empty.add(parent_dir)
773
        elif old_path is not None and old_path != new_path:
774
            # rename
775
            old_parent_dir = osutils.dirname(old_path)
776
            new_parent_dir = osutils.dirname(new_path)
777
            if old_parent_dir and old_parent_dir != new_parent_dir:
778
                self._dirs_that_might_become_empty.add(old_parent_dir)
779
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
780
        # Calculate the per-file parents, if not already done
781
        if file_id in self.per_file_parents_for_commit:
782
            return
0.85.2 by Ian Clatworthy
improve per-file graph generation
783
        if old_path is None:
784
            # add
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
785
            # If this is a merge, the file was most likely added already.
786
            # The per-file parent(s) must therefore be calculated and
787
            # we can't assume there are none.
788
            per_file_parents, ie.revision = \
789
                self.rev_store.get_parents_and_revision_for_entry(ie)
790
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
791
        elif new_path is None:
792
            # delete
793
            pass
794
        elif old_path != new_path:
795
            # rename
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
796
            per_file_parents, _ = \
797
                self.rev_store.get_parents_and_revision_for_entry(ie)
798
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
799
        else:
800
            # modify
801
            per_file_parents, ie.revision = \
802
                self.rev_store.get_parents_and_revision_for_entry(ie)
803
            self.per_file_parents_for_commit[file_id] = per_file_parents
804
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
805
    def record_new(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
806
        self._add_entry((None, path, ie.file_id, ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
807
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
808
    def record_changed(self, path, ie, parent_id=None):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
809
        self._add_entry((path, path, ie.file_id, ie))
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
810
        self._modified_file_ids[path] = ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
811
0.81.9 by Ian Clatworthy
refactor delete_item
812
    def record_delete(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
813
        self._add_entry((path, None, ie.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
814
        self._paths_deleted_this_commit.add(path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
815
        if ie.kind == 'directory':
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
816
            try:
817
                del self.directory_entries[path]
818
            except KeyError:
819
                pass
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
820
            for child_relpath, entry in \
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
821
                self.basis_inventory.iter_entries_by_dir(from_dir=ie):
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
822
                child_path = osutils.pathjoin(path, child_relpath)
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
823
                self._add_entry((child_path, None, entry.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
824
                self._paths_deleted_this_commit.add(child_path)
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
825
                if entry.kind == 'directory':
826
                    try:
827
                        del self.directory_entries[child_path]
828
                    except KeyError:
829
                        pass
0.81.8 by Ian Clatworthy
refactor rename_item
830
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
831
    def record_rename(self, old_path, new_path, file_id, old_ie):
832
        new_ie = old_ie.copy()
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
833
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
834
            self.basis_inventory)
835
        new_ie.name = new_basename
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
836
        new_ie.parent_id = new_parent_id
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
837
        new_ie.revision = self.revision_id
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
838
        self._add_entry((old_path, new_path, file_id, new_ie))
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
839
        self._modified_file_ids[new_path] = file_id
0.64.233 by Ian Clatworthy
Handle delete, rename then modify all in the one commit
840
        self._paths_deleted_this_commit.discard(new_path)
0.64.234 by Ian Clatworthy
Make sure renamed directories are found in file-id lookups
841
        if new_ie.kind == 'directory':
842
            self.directory_entries[new_path] = new_ie
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
843
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
844
    def _rename_pending_change(self, old_path, new_path, file_id):
845
        """Instead of adding/modifying old-path, add new-path instead."""
0.99.6 by Ian Clatworthy
Handle rename of a just added file
846
        # note: delta entries look like (old, new, file-id, ie)
847
        old_ie = self._delta_entries_by_fileid[file_id][3]
848
849
        # Delete the old path. Note that this might trigger implicit
850
        # deletion of newly created parents that could now become empty.
851
        self.record_delete(old_path, old_ie)
852
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
853
        # Update the dictionaries used for tracking new file-ids
854
        if old_path in self._new_file_ids:
855
            del self._new_file_ids[old_path]
856
        else:
857
            del self._modified_file_ids[old_path]
0.99.6 by Ian Clatworthy
Handle rename of a just added file
858
        self._new_file_ids[new_path] = file_id
859
860
        # Create the new InventoryEntry
861
        kind = old_ie.kind
862
        basename, parent_id = self._ensure_directory(new_path,
863
            self.basis_inventory)
864
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
865
        ie.revision = self.revision_id
866
        if kind == 'file':
867
            ie.executable = old_ie.executable
868
            ie.text_sha1 = old_ie.text_sha1
869
            ie.text_size = old_ie.text_size
870
        elif kind == 'symlink':
871
            ie.symlink_target = old_ie.symlink_target
872
873
        # Record it
874
        self.record_new(new_path, ie)
875
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
876
    def modify_handler(self, filecmd):
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
877
        (kind, executable) = mode_to_kind(filecmd.mode)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
878
        if filecmd.dataref is not None:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
879
            if kind == "directory":
0.102.14 by Ian Clatworthy
export and import empty directories
880
                data = None
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
881
            elif kind == "tree-reference":
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
882
                data = filecmd.dataref
883
            else:
884
                data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
885
        else:
886
            data = filecmd.data
887
        self.debug("modifying %s", filecmd.path)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
888
        self._modify_item(filecmd.path.decode('utf8'), kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
889
            executable, data, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
890
891
    def delete_handler(self, filecmd):
892
        self.debug("deleting %s", filecmd.path)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
893
        self._delete_item(filecmd.path.decode('utf8'), self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
894
895
    def copy_handler(self, filecmd):
0.124.2 by Daniel Clemente
use unicode paths when handling copy and rename
896
        src_path = filecmd.src_path.decode("utf8")
897
        dest_path = filecmd.dest_path.decode("utf8")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
898
        self.debug("copying %s to %s", src_path, dest_path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
899
        self._copy_item(src_path, dest_path, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
900
901
    def rename_handler(self, filecmd):
0.124.2 by Daniel Clemente
use unicode paths when handling copy and rename
902
        old_path = filecmd.old_path.decode("utf8")
903
        new_path = filecmd.new_path.decode("utf8")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
904
        self.debug("renaming %s to %s", old_path, new_path)
905
        self._rename_item(old_path, new_path, self.basis_inventory)
906
907
    def deleteall_handler(self, filecmd):
908
        self.debug("deleting all files (and also all directories)")
909
        # I'm not 100% sure this will work in the delta case.
910
        # But clearing out the basis inventory so that everything
911
        # is added sounds ok in theory ...
912
        # We grab a copy as the basis is likely to be cached and
913
        # we don't want to destroy the cached version
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
914
        self.basis_inventory = copy_inventory(self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
915
        self._delete_all_items(self.basis_inventory)