/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""CommitHandlers that build and save revisions & their inventories."""
18
19
20
from bzrlib import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
21
    debug,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
22
    errors,
23
    generate_ids,
24
    inventory,
25
    osutils,
26
    revision,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
27
    serializer,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
28
    )
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
29
from bzrlib.trace import (
30
    mutter,
31
    note,
32
    warning,
33
    )
0.123.2 by Jelmer Vernooij
Split out fastimport, import it from the system.
34
from fastimport import (
0.123.1 by Jelmer Vernooij
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
35
    helpers,
36
    processor,
37
    )
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
38
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
39
from bzrlib.plugins.fastimport.helpers import (
40
    mode_to_kind,
41
    )
42
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
43
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
44
_serializer_handles_escaping = hasattr(serializer.Serializer,
45
    'squashes_xml_invalid_characters')
46
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
47
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
48
def copy_inventory(inv):
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
49
    entries = inv.iter_entries_by_dir()
0.64.319 by Jelmer Vernooij
fix typo.
50
    inv = inventory.Inventory(None, inv.revision_id)
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
51
    for path, inv_entry in entries:
52
        inv.add(inv_entry.copy())
53
    return inv
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
54
55
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
56
class GenericCommitHandler(processor.CommitHandler):
57
    """Base class for Bazaar CommitHandlers."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
58
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
59
    def __init__(self, command, cache_mgr, rev_store, verbose=False,
60
        prune_empty_dirs=True):
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
61
        super(GenericCommitHandler, self).__init__(command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
62
        self.cache_mgr = cache_mgr
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
63
        self.rev_store = rev_store
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
64
        self.verbose = verbose
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
65
        self.branch_ref = command.ref
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
66
        self.prune_empty_dirs = prune_empty_dirs
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
67
        # This tracks path->file-id for things we're creating this commit.
68
        # If the same path is created multiple times, we need to warn the
69
        # user and add it just once.
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
70
        # If a path is added then renamed or copied, we need to handle that.
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
71
        self._new_file_ids = {}
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
72
        # This tracks path->file-id for things we're modifying this commit.
73
        # If a path is modified then renamed or copied, we need the make
74
        # sure we grab the new content.
75
        self._modified_file_ids = {}
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
76
        # This tracks the paths for things we're deleting this commit.
77
        # If the same path is added or the destination of a rename say,
78
        # then a fresh file-id is required.
79
        self._paths_deleted_this_commit = set()
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
80
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
81
    def mutter(self, msg, *args):
82
        """Output a mutter but add context."""
83
        msg = "%s (%s)" % (msg, self.command.id)
84
        mutter(msg, *args)
85
86
    def debug(self, msg, *args):
87
        """Output a mutter if the appropriate -D option was given."""
88
        if "fast-import" in debug.debug_flags:
89
            msg = "%s (%s)" % (msg, self.command.id)
90
            mutter(msg, *args)
91
92
    def note(self, msg, *args):
93
        """Output a note but add context."""
94
        msg = "%s (%s)" % (msg, self.command.id)
95
        note(msg, *args)
96
97
    def warning(self, msg, *args):
98
        """Output a warning but add context."""
99
        msg = "%s (%s)" % (msg, self.command.id)
100
        warning(msg, *args)
101
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
102
    def pre_process_files(self):
103
        """Prepare for committing."""
104
        self.revision_id = self.gen_revision_id()
105
        # cache of texts for this commit, indexed by file-id
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
106
        self.data_for_commit = {}
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
107
        #if self.rev_store.expects_rich_root():
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
108
        self.data_for_commit[inventory.ROOT_ID] = []
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
109
110
        # Track the heads and get the real parent list
0.123.6 by Jelmer Vernooij
Split out reftracker.
111
        parents = self.cache_mgr.reftracker.track_heads(self.command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
112
113
        # Convert the parent commit-ids to bzr revision-ids
114
        if parents:
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
115
            self.parents = [self.cache_mgr.lookup_committish(p)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
116
                for p in parents]
117
        else:
118
            self.parents = []
119
        self.debug("%s id: %s, parents: %s", self.command.id,
120
            self.revision_id, str(self.parents))
121
0.85.2 by Ian Clatworthy
improve per-file graph generation
122
        # Tell the RevisionStore we're starting a new commit
123
        self.revision = self.build_revision()
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
124
        self.parent_invs = [self.get_inventory(p) for p in self.parents]
0.85.2 by Ian Clatworthy
improve per-file graph generation
125
        self.rev_store.start_new_revision(self.revision, self.parents,
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
126
            self.parent_invs)
0.85.2 by Ian Clatworthy
improve per-file graph generation
127
128
        # cache of per-file parents for this commit, indexed by file-id
129
        self.per_file_parents_for_commit = {}
130
        if self.rev_store.expects_rich_root():
0.64.160 by Ian Clatworthy
make per-file parents tuples and fix text loading in chk formats
131
            self.per_file_parents_for_commit[inventory.ROOT_ID] = ()
0.85.2 by Ian Clatworthy
improve per-file graph generation
132
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
133
        # Keep the basis inventory. This needs to be treated as read-only.
134
        if len(self.parents) == 0:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
135
            self.basis_inventory = self._init_inventory()
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
136
        else:
137
            self.basis_inventory = self.get_inventory(self.parents[0])
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
138
        if hasattr(self.basis_inventory, "root_id"):
139
            self.inventory_root_id = self.basis_inventory.root_id
140
        else:
141
            self.inventory_root_id = self.basis_inventory.root.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
142
143
        # directory-path -> inventory-entry for current inventory
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
144
        self.directory_entries = {}
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
145
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
146
    def _init_inventory(self):
147
        return self.rev_store.init_inventory(self.revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
148
149
    def get_inventory(self, revision_id):
150
        """Get the inventory for a revision id."""
151
        try:
152
            inv = self.cache_mgr.inventories[revision_id]
153
        except KeyError:
154
            if self.verbose:
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
155
                self.mutter("get_inventory cache miss for %s", revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
156
            # Not cached so reconstruct from the RevisionStore
157
            inv = self.rev_store.get_inventory(revision_id)
158
            self.cache_mgr.inventories[revision_id] = inv
159
        return inv
160
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
161
    def _get_data(self, file_id):
162
        """Get the data bytes for a file-id."""
163
        return self.data_for_commit[file_id]
164
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
165
    def _get_lines(self, file_id):
166
        """Get the lines for a file-id."""
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
167
        return osutils.split_lines(self._get_data(file_id))
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
168
0.85.2 by Ian Clatworthy
improve per-file graph generation
169
    def _get_per_file_parents(self, file_id):
170
        """Get the lines for a file-id."""
171
        return self.per_file_parents_for_commit[file_id]
172
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
173
    def _get_inventories(self, revision_ids):
174
        """Get the inventories for revision-ids.
175
        
176
        This is a callback used by the RepositoryStore to
177
        speed up inventory reconstruction.
178
        """
179
        present = []
180
        inventories = []
181
        # If an inventory is in the cache, we assume it was
182
        # successfully loaded into the revision store
183
        for revision_id in revision_ids:
184
            try:
185
                inv = self.cache_mgr.inventories[revision_id]
186
                present.append(revision_id)
187
            except KeyError:
188
                if self.verbose:
189
                    self.note("get_inventories cache miss for %s", revision_id)
190
                # Not cached so reconstruct from the revision store
191
                try:
192
                    inv = self.get_inventory(revision_id)
193
                    present.append(revision_id)
194
                except:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
195
                    inv = self._init_inventory()
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
196
                self.cache_mgr.inventories[revision_id] = inv
197
            inventories.append(inv)
198
        return present, inventories
199
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
200
    def bzr_file_id_and_new(self, path):
201
        """Get a Bazaar file identifier and new flag for a path.
202
        
203
        :return: file_id, is_new where
204
          is_new = True if the file_id is newly created
205
        """
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
206
        if path not in self._paths_deleted_this_commit:
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
207
            # Try file-ids renamed in this commit
208
            id = self._modified_file_ids.get(path)
209
            if id is not None:
210
                return id, False
211
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
212
            # Try the basis inventory
213
            id = self.basis_inventory.path2id(path)
214
            if id is not None:
215
                return id, False
216
            
217
            # Try the other inventories
218
            if len(self.parents) > 1:
219
                for inv in self.parent_invs[1:]:
220
                    id = self.basis_inventory.path2id(path)
221
                    if id is not None:
222
                        return id, False
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
223
224
        # Doesn't exist yet so create it
0.64.247 by Ian Clatworthy
base file-ids on the basename, not path, as jam suggested. This improves the samba import from 565M to 353M.
225
        dirname, basename = osutils.split(path)
226
        id = generate_ids.gen_file_id(basename)
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
227
        self.debug("Generated new file id %s for '%s' in revision-id '%s'",
228
            id, path, self.revision_id)
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
229
        self._new_file_ids[path] = id
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
230
        return id, True
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
231
232
    def bzr_file_id(self, path):
233
        """Get a Bazaar file identifier for a path."""
234
        return self.bzr_file_id_and_new(path)[0]
235
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
236
    def _utf8_decode(self, field, value):
237
        try:
238
            return value.decode('utf_8')
239
        except UnicodeDecodeError:
240
            # The spec says fields are *typically* utf8 encoded
241
            # but that isn't enforced by git-fast-export (at least)
242
            self.warning("%s not in utf8 - replacing unknown "
243
                "characters" % (field,))
244
            return value.decode('utf_8', 'replace')
245
246
    def _format_name_email(self, section, name, email):
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
247
        """Format name & email as a string."""
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
248
        name = self._utf8_decode("%s name" % section, name)
249
        email = self._utf8_decode("%s email" % section, email)
250
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
251
        if email:
252
            return "%s <%s>" % (name, email)
253
        else:
254
            return name
255
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
256
    def gen_revision_id(self):
257
        """Generate a revision id.
258
259
        Subclasses may override this to produce deterministic ids say.
260
        """
261
        committer = self.command.committer
262
        # Perhaps 'who' being the person running the import is ok? If so,
263
        # it might be a bit quicker and give slightly better compression?
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
264
        who = self._format_name_email("committer", committer[0], committer[1])
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
265
        timestamp = committer[2]
266
        return generate_ids.gen_revision_id(who, timestamp)
267
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
268
    def build_revision(self):
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
269
        rev_props = self._legal_revision_properties(self.command.properties)
0.112.5 by Max Bowsher
Default branch-nick to mapped git ref name.
270
        if 'branch-nick' not in rev_props:
271
            rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
272
                    self.branch_ref)
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
273
        self._save_author_info(rev_props)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
274
        committer = self.command.committer
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
275
        who = self._format_name_email("committer", committer[0], committer[1])
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
276
        try:
277
            message = self.command.message.decode("utf-8")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
278
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
279
        except UnicodeDecodeError:
280
            self.warning(
281
                "commit message not in utf8 - replacing unknown characters")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
282
            message = self.command.message.decode('utf-8', 'replace')
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
283
        if not _serializer_handles_escaping:
284
            # We need to assume the bad ol' days
285
            message = helpers.escape_commit_message(message)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
286
        return revision.Revision(
287
           timestamp=committer[2],
288
           timezone=committer[3],
289
           committer=who,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
290
           message=message,
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
291
           revision_id=self.revision_id,
292
           properties=rev_props,
293
           parent_ids=self.parents)
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
294
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
295
    def _legal_revision_properties(self, props):
296
        """Clean-up any revision properties we can't handle."""
297
        # For now, we just check for None because that's not allowed in 2.0rc1
298
        result = {}
299
        if props is not None:
300
            for name, value in props.items():
301
                if value is None:
302
                    self.warning(
303
                        "converting None to empty string for property %s"
304
                        % (name,))
305
                    result[name] = ''
306
                else:
307
                    result[name] = value
308
        return result
309
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
310
    def _save_author_info(self, rev_props):
311
        author = self.command.author
312
        if author is None:
313
            return
314
        if self.command.more_authors:
315
            authors = [author] + self.command.more_authors
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
316
            author_ids = [self._format_name_email("author", a[0], a[1]) for a in authors]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
317
        elif author != self.command.committer:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
318
            author_ids = [self._format_name_email("author", author[0], author[1])]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
319
        else:
320
            return
321
        # If we reach here, there are authors worth storing
322
        rev_props['authors'] = "\n".join(author_ids)
323
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
324
    def _modify_item(self, path, kind, is_executable, data, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
325
        """Add to or change an item in the inventory."""
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
326
        # If we've already added this, warn the user that we're ignoring it.
327
        # In the future, it might be nice to double check that the new data
328
        # is the same as the old but, frankly, exporters should be fixed
329
        # not to produce bad data streams in the first place ...
330
        existing = self._new_file_ids.get(path)
331
        if existing:
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
332
            # We don't warn about directories because it's fine for them
333
            # to be created already by a previous rename
334
            if kind != 'directory':
335
                self.warning("%s already added in this commit - ignoring" %
336
                    (path,))
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
337
            return
338
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
339
        # Create the new InventoryEntry
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
340
        basename, parent_id = self._ensure_directory(path, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
341
        file_id = self.bzr_file_id(path)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
342
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
343
        ie.revision = self.revision_id
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
344
        if kind == 'file':
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
345
            ie.executable = is_executable
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
346
            # lines = osutils.split_lines(data)
347
            ie.text_sha1 = osutils.sha_string(data)
348
            ie.text_size = len(data)
349
            self.data_for_commit[file_id] = data
0.102.14 by Ian Clatworthy
export and import empty directories
350
        elif kind == 'directory':
351
            self.directory_entries[path] = ie
352
            # There are no lines stored for a directory so
353
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
354
            self.data_for_commit[file_id] = ''
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
355
        elif kind == 'symlink':
0.124.1 by Daniel Clemente
pass unicode object (rather than str) to match CHKInventory._entry_to_bytes requirements
356
            ie.symlink_target = data.decode('utf8')
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
357
            # There are no lines stored for a symlink so
358
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
359
            self.data_for_commit[file_id] = ''
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
360
        else:
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
361
            self.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
362
                % (kind, path))
363
            return
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
364
        # Record it
0.64.323 by Jelmer Vernooij
Avoid deprecated Inventory.__contains__.
365
        if inv.has_id(file_id):
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
366
            old_ie = inv[file_id]
367
            if old_ie.kind == 'directory':
368
                self.record_delete(path, old_ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
369
            self.record_changed(path, ie, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
370
        else:
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
371
            try:
372
                self.record_new(path, ie)
373
            except:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
374
                print "failed to add path '%s' with entry '%s' in command %s" \
375
                    % (path, ie, self.command.id)
376
                print "parent's children are:\n%r\n" % (ie.parent_id.children,)
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
377
                raise
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
378
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
379
    def _ensure_directory(self, path, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
380
        """Ensure that the containing directory exists for 'path'"""
381
        dirname, basename = osutils.split(path)
382
        if dirname == '':
383
            # the root node doesn't get updated
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
384
            return basename, self.inventory_root_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
385
        try:
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
386
            ie = self._get_directory_entry(inv, dirname)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
387
        except KeyError:
388
            # We will create this entry, since it doesn't exist
389
            pass
390
        else:
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
391
            return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
392
393
        # No directory existed, we will just create one, first, make sure
394
        # the parent exists
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
395
        dir_basename, parent_id = self._ensure_directory(dirname, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
396
        dir_file_id = self.bzr_file_id(dirname)
397
        ie = inventory.entry_factory['directory'](dir_file_id,
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
398
            dir_basename, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
399
        ie.revision = self.revision_id
400
        self.directory_entries[dirname] = ie
401
        # There are no lines stored for a directory so
402
        # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
403
        self.data_for_commit[dir_file_id] = ''
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
404
405
        # It's possible that a file or symlink with that file-id
406
        # already exists. If it does, we need to delete it.
0.64.323 by Jelmer Vernooij
Avoid deprecated Inventory.__contains__.
407
        if inv.has_id(dir_file_id):
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
408
            self.record_delete(dirname, ie)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
409
        self.record_new(dirname, ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
410
        return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
411
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
412
    def _get_directory_entry(self, inv, dirname):
413
        """Get the inventory entry for a directory.
414
        
415
        Raises KeyError if dirname is not a directory in inv.
416
        """
417
        result = self.directory_entries.get(dirname)
418
        if result is None:
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
419
            if dirname in self._paths_deleted_this_commit:
420
                raise KeyError
0.64.146 by Ian Clatworthy
fix first file is in a subdirectory bug for chk formats
421
            try:
422
                file_id = inv.path2id(dirname)
423
            except errors.NoSuchId:
424
                # In a CHKInventory, this is raised if there's no root yet
425
                raise KeyError
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
426
            if file_id is None:
427
                raise KeyError
428
            result = inv[file_id]
429
            # dirname must be a directory for us to return it
430
            if result.kind == 'directory':
431
                self.directory_entries[dirname] = result
432
            else:
433
                raise KeyError
434
        return result
435
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
436
    def _delete_item(self, path, inv):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
437
        newly_added = self._new_file_ids.get(path)
438
        if newly_added:
439
            # We've only just added this path earlier in this commit.
440
            file_id = newly_added
441
            # note: delta entries look like (old, new, file-id, ie)
442
            ie = self._delta_entries_by_fileid[file_id][3]
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
443
        else:
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
444
            file_id = inv.path2id(path)
445
            if file_id is None:
446
                self.mutter("ignoring delete of %s as not in inventory", path)
447
                return
448
            try:
449
                ie = inv[file_id]
450
            except errors.NoSuchId:
451
                self.mutter("ignoring delete of %s as not in inventory", path)
452
                return
453
        self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
454
455
    def _copy_item(self, src_path, dest_path, inv):
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
456
        newly_changed = self._new_file_ids.get(src_path) or \
457
            self._modified_file_ids.get(src_path)
458
        if newly_changed:
459
            # We've only just added/changed this path earlier in this commit.
460
            file_id = newly_changed
0.99.8 by Ian Clatworthy
handle copy of a newly added file
461
            # note: delta entries look like (old, new, file-id, ie)
462
            ie = self._delta_entries_by_fileid[file_id][3]
463
        else:
464
            file_id = inv.path2id(src_path)
465
            if file_id is None:
466
                self.warning("ignoring copy of %s to %s - source does not exist",
467
                    src_path, dest_path)
468
                return
469
            ie = inv[file_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
470
        kind = ie.kind
471
        if kind == 'file':
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
472
            if newly_changed:
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
473
                content = self.data_for_commit[file_id]
0.99.8 by Ian Clatworthy
handle copy of a newly added file
474
            else:
475
                content = self.rev_store.get_file_text(self.parents[0], file_id)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
476
            self._modify_item(dest_path, kind, ie.executable, content, inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
477
        elif kind == 'symlink':
0.64.289 by Jelmer Vernooij
Cope with non-ascii characters in symbolic links.
478
            self._modify_item(dest_path, kind, False, ie.symlink_target.encode("utf-8"), inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
479
        else:
480
            self.warning("ignoring copy of %s %s - feature not yet supported",
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
481
                kind, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
482
483
    def _rename_item(self, old_path, new_path, inv):
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
484
        existing = self._new_file_ids.get(old_path) or \
485
            self._modified_file_ids.get(old_path)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
486
        if existing:
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
487
            # We've only just added/modified this path earlier in this commit.
488
            # Change the add/modify of old_path to an add of new_path
489
            self._rename_pending_change(old_path, new_path, existing)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
490
            return
491
0.81.8 by Ian Clatworthy
refactor rename_item
492
        file_id = inv.path2id(old_path)
0.64.167 by Ian Clatworthy
incremental packing for chk formats
493
        if file_id is None:
494
            self.warning(
495
                "ignoring rename of %s to %s - old path does not exist" %
496
                (old_path, new_path))
497
            return
0.81.8 by Ian Clatworthy
refactor rename_item
498
        ie = inv[file_id]
499
        rev_id = ie.revision
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
500
        new_file_id = inv.path2id(new_path)
501
        if new_file_id is not None:
0.81.9 by Ian Clatworthy
refactor delete_item
502
            self.record_delete(new_path, inv[new_file_id])
0.81.8 by Ian Clatworthy
refactor rename_item
503
        self.record_rename(old_path, new_path, file_id, ie)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
504
0.81.8 by Ian Clatworthy
refactor rename_item
505
        # The revision-id for this entry will be/has been updated and
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
506
        # that means the loader then needs to know what the "new" text is.
507
        # We therefore must go back to the revision store to get it.
0.81.8 by Ian Clatworthy
refactor rename_item
508
        lines = self.rev_store.get_file_lines(rev_id, file_id)
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
509
        self.data_for_commit[file_id] = ''.join(lines)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
510
511
    def _delete_all_items(self, inv):
0.64.320 by Jelmer Vernooij
Fix deleteall handler.
512
        if len(inv) == 0:
513
            return
514
        for path, ie in inv.iter_entries_by_dir():
515
            if path != "":
516
                self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
517
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
518
    def _warn_unless_in_merges(self, fileid, path):
519
        if len(self.parents) <= 1:
520
            return
521
        for parent in self.parents[1:]:
522
            if fileid in self.get_inventory(parent):
523
                return
524
        self.warning("ignoring delete of %s as not in parent inventories", path)
525
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
526
527
class InventoryCommitHandler(GenericCommitHandler):
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
528
    """A CommitHandler that builds and saves Inventory objects."""
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
529
530
    def pre_process_files(self):
531
        super(InventoryCommitHandler, self).pre_process_files()
532
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
533
        # Seed the inventory from the previous one. Note that
534
        # the parent class version of pre_process_files() has
535
        # already set the right basis_inventory for this branch
536
        # but we need to copy it in order to mutate it safely
537
        # without corrupting the cached inventory value.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
538
        if len(self.parents) == 0:
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
539
            self.inventory = self.basis_inventory
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
540
        else:
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
541
            self.inventory = copy_inventory(self.basis_inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
542
        self.inventory_root = self.inventory.root
543
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
544
        # directory-path -> inventory-entry for current inventory
545
        self.directory_entries = dict(self.inventory.directories())
546
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
547
        # Initialise the inventory revision info as required
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
548
        if self.rev_store.expects_rich_root():
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
549
            self.inventory.revision_id = self.revision_id
550
        else:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
551
            # In this revision store, root entries have no knit or weave.
552
            # When serializing out to disk and back in, root.revision is
553
            # always the new revision_id.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
554
            self.inventory.root.revision = self.revision_id
555
556
    def post_process_files(self):
557
        """Save the revision."""
558
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.85.2 by Ian Clatworthy
improve per-file graph generation
559
        self.rev_store.load(self.revision, self.inventory, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
560
            lambda file_id: self._get_data(file_id),
0.85.2 by Ian Clatworthy
improve per-file graph generation
561
            lambda file_id: self._get_per_file_parents(file_id),
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
562
            lambda revision_ids: self._get_inventories(revision_ids))
563
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
564
    def record_new(self, path, ie):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
565
        try:
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
566
            # If this is a merge, the file was most likely added already.
567
            # The per-file parent(s) must therefore be calculated and
568
            # we can't assume there are none.
569
            per_file_parents, ie.revision = \
570
                self.rev_store.get_parents_and_revision_for_entry(ie)
571
            self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
572
            self.inventory.add(ie)
573
        except errors.DuplicateFileId:
574
            # Directory already exists as a file or symlink
575
            del self.inventory[ie.file_id]
576
            # Try again
577
            self.inventory.add(ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
578
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
579
    def record_changed(self, path, ie, parent_id):
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
580
        # HACK: no API for this (del+add does more than it needs to)
0.85.2 by Ian Clatworthy
improve per-file graph generation
581
        per_file_parents, ie.revision = \
582
            self.rev_store.get_parents_and_revision_for_entry(ie)
583
        self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
584
        self.inventory._byid[ie.file_id] = ie
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
585
        parent_ie = self.inventory._byid[parent_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
586
        parent_ie.children[ie.name] = ie
587
0.81.9 by Ian Clatworthy
refactor delete_item
588
    def record_delete(self, path, ie):
589
        self.inventory.remove_recursive_id(ie.file_id)
0.81.8 by Ian Clatworthy
refactor rename_item
590
591
    def record_rename(self, old_path, new_path, file_id, ie):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
592
        # For a rename, the revision-id is always the new one so
593
        # no need to change/set it here
594
        ie.revision = self.revision_id
595
        per_file_parents, _ = \
596
            self.rev_store.get_parents_and_revision_for_entry(ie)
597
        self.per_file_parents_for_commit[file_id] = per_file_parents
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
598
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
599
            self.inventory)
0.81.8 by Ian Clatworthy
refactor rename_item
600
        self.inventory.rename(file_id, new_parent_id, new_basename)
601
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
602
    def modify_handler(self, filecmd):
603
        if filecmd.dataref is not None:
604
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
605
        else:
606
            data = filecmd.data
607
        self.debug("modifying %s", filecmd.path)
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
608
        (kind, is_executable) = mode_to_kind(filecmd.mode)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
609
        self._modify_item(filecmd.path.decode('utf8'), kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
610
            is_executable, data, self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
611
612
    def delete_handler(self, filecmd):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
613
        self.debug("deleting %s", filecmd.path)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
614
        self._delete_item(filecmd.path.decode('utf8'), self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
615
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
616
    def copy_handler(self, filecmd):
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
617
        src_path = filecmd.src_path.decode('utf8')
618
        dest_path = filecmd.dest_path.decode('utf8')
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
619
        self.debug("copying %s to %s", src_path, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
620
        self._copy_item(src_path, dest_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
621
622
    def rename_handler(self, filecmd):
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
623
        old_path = filecmd.old_path.decode('utf8')
624
        new_path = filecmd.new_path.decode('utf8')
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
625
        self.debug("renaming %s to %s", old_path, new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
626
        self._rename_item(old_path, new_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
627
628
    def deleteall_handler(self, filecmd):
629
        self.debug("deleting all files (and also all directories)")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
630
        self._delete_all_items(self.inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
631
632
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
633
class InventoryDeltaCommitHandler(GenericCommitHandler):
634
    """A CommitHandler that builds Inventories by applying a delta."""
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
635
636
    def pre_process_files(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
637
        super(InventoryDeltaCommitHandler, self).pre_process_files()
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
638
        self._dirs_that_might_become_empty = set()
639
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
640
        # A given file-id can only appear once so we accumulate
641
        # the entries in a dict then build the actual delta at the end
642
        self._delta_entries_by_fileid = {}
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
643
        if len(self.parents) == 0 or not self.rev_store.expects_rich_root():
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
644
            if self.parents:
645
                old_path = ''
646
            else:
647
                old_path = None
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
648
            # Need to explicitly add the root entry for the first revision
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
649
            # and for non rich-root inventories
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
650
            root_id = inventory.ROOT_ID
651
            root_ie = inventory.InventoryDirectory(root_id, u'', None)
652
            root_ie.revision = self.revision_id
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
653
            self._add_entry((old_path, '', root_id, root_ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
654
655
    def post_process_files(self):
656
        """Save the revision."""
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
657
        delta = self._get_final_delta()
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
658
        inv = self.rev_store.load_using_delta(self.revision,
659
            self.basis_inventory, delta, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
660
            self._get_data,
661
            self._get_per_file_parents,
662
            self._get_inventories)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
663
        self.cache_mgr.inventories[self.revision_id] = inv
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
664
        #print "committed %s" % self.revision_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
665
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
666
    def _get_final_delta(self):
667
        """Generate the final delta.
668
669
        Smart post-processing of changes, e.g. pruning of directories
670
        that would become empty, goes here.
671
        """
672
        delta = list(self._delta_entries_by_fileid.values())
673
        if self.prune_empty_dirs and self._dirs_that_might_become_empty:
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
674
            candidates = self._dirs_that_might_become_empty
675
            while candidates:
676
                never_born = set()
677
                parent_dirs_that_might_become_empty = set()
678
                for path, file_id in self._empty_after_delta(delta, candidates):
679
                    newly_added = self._new_file_ids.get(path)
680
                    if newly_added:
681
                        never_born.add(newly_added)
682
                    else:
683
                        delta.append((path, None, file_id, None))
684
                    parent_dir = osutils.dirname(path)
685
                    if parent_dir:
686
                        parent_dirs_that_might_become_empty.add(parent_dir)
687
                candidates = parent_dirs_that_might_become_empty
0.101.5 by Tom Widmer
Add missing tab characters to ensure that never born dirs are correctly removed during each pass of parent directory pruning.
688
                # Clean up entries that got deleted before they were ever added
689
                if never_born:
690
                    delta = [de for de in delta if de[2] not in never_born]
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
691
        return delta
692
693
    def _empty_after_delta(self, delta, candidates):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
694
        #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
695
        #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
696
        new_inv = self._get_proposed_inventory(delta)
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
697
        result = []
698
        for dir in candidates:
699
            file_id = new_inv.path2id(dir)
0.64.219 by Ian Clatworthy
More robust implicit delete logic when file-id not found
700
            if file_id is None:
701
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
702
            ie = new_inv[file_id]
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
703
            if ie.kind != 'directory':
704
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
705
            if len(ie.children) == 0:
706
                result.append((dir, file_id))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
707
                if self.verbose:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
708
                    self.note("pruning empty directory %s" % (dir,))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
709
        return result
710
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
711
    def _get_proposed_inventory(self, delta):
712
        if len(self.parents):
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
713
            # new_inv = self.basis_inventory._get_mutable_inventory()
714
            # Note that this will create unreferenced chk pages if we end up
715
            # deleting entries, because this 'test' inventory won't end up
716
            # used. However, it is cheaper than having to create a full copy of
717
            # the inventory for every commit.
718
            new_inv = self.basis_inventory.create_by_apply_delta(delta,
719
                'not-a-valid-revision-id:')
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
720
        else:
721
            new_inv = inventory.Inventory(revision_id=self.revision_id)
722
            # This is set in the delta so remove it to prevent a duplicate
723
            del new_inv[inventory.ROOT_ID]
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
724
            try:
725
                new_inv.apply_delta(delta)
726
            except errors.InconsistentDelta:
727
                self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
728
                raise
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
729
        return new_inv
730
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
731
    def _add_entry(self, entry):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
732
        # We need to combine the data if multiple entries have the same file-id.
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
733
        # For example, a rename followed by a modification looks like:
734
        #
735
        # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
736
        #
737
        # Likewise, a modification followed by a rename looks like:
738
        #
739
        # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
740
        #
741
        # Here's a rename followed by a delete and a modification followed by
742
        # a delete:
743
        #
744
        # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
745
        # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
746
        #
747
        # In summary, we use the original old-path, new new-path and new ie
748
        # when combining entries.
0.85.2 by Ian Clatworthy
improve per-file graph generation
749
        old_path = entry[0]
750
        new_path = entry[1]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
751
        file_id = entry[2]
0.85.2 by Ian Clatworthy
improve per-file graph generation
752
        ie = entry[3]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
753
        existing = self._delta_entries_by_fileid.get(file_id, None)
754
        if existing is not None:
0.85.2 by Ian Clatworthy
improve per-file graph generation
755
            old_path = existing[0]
756
            entry = (old_path, new_path, file_id, ie)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
757
        if new_path is None and old_path is None:
758
            # This is a delete cancelling a previous add
759
            del self._delta_entries_by_fileid[file_id]
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
760
            parent_dir = osutils.dirname(existing[1])
761
            self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir))
762
            if parent_dir:
763
                self._dirs_that_might_become_empty.add(parent_dir)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
764
            return
765
        else:
766
            self._delta_entries_by_fileid[file_id] = entry
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
767
0.99.6 by Ian Clatworthy
Handle rename of a just added file
768
        # Collect parent directories that might become empty
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
769
        if new_path is None:
770
            # delete
771
            parent_dir = osutils.dirname(old_path)
772
            # note: no need to check the root
773
            if parent_dir:
774
                self._dirs_that_might_become_empty.add(parent_dir)
775
        elif old_path is not None and old_path != new_path:
776
            # rename
777
            old_parent_dir = osutils.dirname(old_path)
778
            new_parent_dir = osutils.dirname(new_path)
779
            if old_parent_dir and old_parent_dir != new_parent_dir:
780
                self._dirs_that_might_become_empty.add(old_parent_dir)
781
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
782
        # Calculate the per-file parents, if not already done
783
        if file_id in self.per_file_parents_for_commit:
784
            return
0.85.2 by Ian Clatworthy
improve per-file graph generation
785
        if old_path is None:
786
            # add
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
787
            # If this is a merge, the file was most likely added already.
788
            # The per-file parent(s) must therefore be calculated and
789
            # we can't assume there are none.
790
            per_file_parents, ie.revision = \
791
                self.rev_store.get_parents_and_revision_for_entry(ie)
792
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
793
        elif new_path is None:
794
            # delete
795
            pass
796
        elif old_path != new_path:
797
            # rename
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
798
            per_file_parents, _ = \
799
                self.rev_store.get_parents_and_revision_for_entry(ie)
800
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
801
        else:
802
            # modify
803
            per_file_parents, ie.revision = \
804
                self.rev_store.get_parents_and_revision_for_entry(ie)
805
            self.per_file_parents_for_commit[file_id] = per_file_parents
806
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
807
    def record_new(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
808
        self._add_entry((None, path, ie.file_id, ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
809
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
810
    def record_changed(self, path, ie, parent_id=None):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
811
        self._add_entry((path, path, ie.file_id, ie))
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
812
        self._modified_file_ids[path] = ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
813
0.81.9 by Ian Clatworthy
refactor delete_item
814
    def record_delete(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
815
        self._add_entry((path, None, ie.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
816
        self._paths_deleted_this_commit.add(path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
817
        if ie.kind == 'directory':
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
818
            try:
819
                del self.directory_entries[path]
820
            except KeyError:
821
                pass
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
822
            for child_relpath, entry in \
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
823
                self.basis_inventory.iter_entries_by_dir(from_dir=ie):
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
824
                child_path = osutils.pathjoin(path, child_relpath)
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
825
                self._add_entry((child_path, None, entry.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
826
                self._paths_deleted_this_commit.add(child_path)
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
827
                if entry.kind == 'directory':
828
                    try:
829
                        del self.directory_entries[child_path]
830
                    except KeyError:
831
                        pass
0.81.8 by Ian Clatworthy
refactor rename_item
832
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
833
    def record_rename(self, old_path, new_path, file_id, old_ie):
834
        new_ie = old_ie.copy()
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
835
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
836
            self.basis_inventory)
837
        new_ie.name = new_basename
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
838
        new_ie.parent_id = new_parent_id
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
839
        new_ie.revision = self.revision_id
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
840
        self._add_entry((old_path, new_path, file_id, new_ie))
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
841
        self._modified_file_ids[new_path] = file_id
0.64.233 by Ian Clatworthy
Handle delete, rename then modify all in the one commit
842
        self._paths_deleted_this_commit.discard(new_path)
0.64.234 by Ian Clatworthy
Make sure renamed directories are found in file-id lookups
843
        if new_ie.kind == 'directory':
844
            self.directory_entries[new_path] = new_ie
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
845
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
846
    def _rename_pending_change(self, old_path, new_path, file_id):
847
        """Instead of adding/modifying old-path, add new-path instead."""
0.99.6 by Ian Clatworthy
Handle rename of a just added file
848
        # note: delta entries look like (old, new, file-id, ie)
849
        old_ie = self._delta_entries_by_fileid[file_id][3]
850
851
        # Delete the old path. Note that this might trigger implicit
852
        # deletion of newly created parents that could now become empty.
853
        self.record_delete(old_path, old_ie)
854
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
855
        # Update the dictionaries used for tracking new file-ids
856
        if old_path in self._new_file_ids:
857
            del self._new_file_ids[old_path]
858
        else:
859
            del self._modified_file_ids[old_path]
0.99.6 by Ian Clatworthy
Handle rename of a just added file
860
        self._new_file_ids[new_path] = file_id
861
862
        # Create the new InventoryEntry
863
        kind = old_ie.kind
864
        basename, parent_id = self._ensure_directory(new_path,
865
            self.basis_inventory)
866
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
867
        ie.revision = self.revision_id
868
        if kind == 'file':
869
            ie.executable = old_ie.executable
870
            ie.text_sha1 = old_ie.text_sha1
871
            ie.text_size = old_ie.text_size
872
        elif kind == 'symlink':
873
            ie.symlink_target = old_ie.symlink_target
874
875
        # Record it
876
        self.record_new(new_path, ie)
877
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
878
    def modify_handler(self, filecmd):
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
879
        (kind, executable) = mode_to_kind(filecmd.mode)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
880
        if filecmd.dataref is not None:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
881
            if kind == "directory":
0.102.14 by Ian Clatworthy
export and import empty directories
882
                data = None
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
883
            elif kind == "tree-reference":
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
884
                data = filecmd.dataref
885
            else:
886
                data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
887
        else:
888
            data = filecmd.data
889
        self.debug("modifying %s", filecmd.path)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
890
        self._modify_item(filecmd.path.decode('utf8'), kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
891
            executable, data, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
892
893
    def delete_handler(self, filecmd):
894
        self.debug("deleting %s", filecmd.path)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
895
        self._delete_item(filecmd.path.decode('utf8'), self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
896
897
    def copy_handler(self, filecmd):
0.124.2 by Daniel Clemente
use unicode paths when handling copy and rename
898
        src_path = filecmd.src_path.decode("utf8")
899
        dest_path = filecmd.dest_path.decode("utf8")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
900
        self.debug("copying %s to %s", src_path, dest_path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
901
        self._copy_item(src_path, dest_path, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
902
903
    def rename_handler(self, filecmd):
0.124.2 by Daniel Clemente
use unicode paths when handling copy and rename
904
        old_path = filecmd.old_path.decode("utf8")
905
        new_path = filecmd.new_path.decode("utf8")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
906
        self.debug("renaming %s to %s", old_path, new_path)
907
        self._rename_item(old_path, new_path, self.basis_inventory)
908
909
    def deleteall_handler(self, filecmd):
910
        self.debug("deleting all files (and also all directories)")
911
        self._delete_all_items(self.basis_inventory)