/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""CommitHandlers that build and save revisions & their inventories."""
18
19
20
from bzrlib import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
21
    debug,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
22
    errors,
23
    generate_ids,
24
    inventory,
25
    osutils,
26
    revision,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
27
    serializer,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
28
    )
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
29
from bzrlib.trace import (
30
    mutter,
31
    note,
32
    warning,
33
    )
0.123.2 by Jelmer Vernooij
Split out fastimport, import it from the system.
34
from fastimport import (
0.123.1 by Jelmer Vernooij
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
35
    helpers,
36
    processor,
37
    )
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
38
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
39
from bzrlib.plugins.fastimport.helpers import (
40
    mode_to_kind,
41
    )
42
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
43
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
44
_serializer_handles_escaping = hasattr(serializer.Serializer,
45
    'squashes_xml_invalid_characters')
46
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
47
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
48
def copy_inventory(inv):
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
49
    entries = inv.iter_entries_by_dir()
50
    inv = inventory.Inventory(None, inventory.revision_id)
51
    for path, inv_entry in entries:
52
        inv.add(inv_entry.copy())
53
    return inv
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
54
55
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
56
class GenericCommitHandler(processor.CommitHandler):
57
    """Base class for Bazaar CommitHandlers."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
58
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
59
    def __init__(self, command, cache_mgr, rev_store, verbose=False,
60
        prune_empty_dirs=True):
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
61
        super(GenericCommitHandler, self).__init__(command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
62
        self.cache_mgr = cache_mgr
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
63
        self.rev_store = rev_store
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
64
        self.verbose = verbose
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
65
        self.branch_ref = command.ref
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
66
        self.prune_empty_dirs = prune_empty_dirs
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
67
        # This tracks path->file-id for things we're creating this commit.
68
        # If the same path is created multiple times, we need to warn the
69
        # user and add it just once.
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
70
        # If a path is added then renamed or copied, we need to handle that.
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
71
        self._new_file_ids = {}
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
72
        # This tracks path->file-id for things we're modifying this commit.
73
        # If a path is modified then renamed or copied, we need the make
74
        # sure we grab the new content.
75
        self._modified_file_ids = {}
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
76
        # This tracks the paths for things we're deleting this commit.
77
        # If the same path is added or the destination of a rename say,
78
        # then a fresh file-id is required.
79
        self._paths_deleted_this_commit = set()
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
80
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
81
    def mutter(self, msg, *args):
82
        """Output a mutter but add context."""
83
        msg = "%s (%s)" % (msg, self.command.id)
84
        mutter(msg, *args)
85
86
    def debug(self, msg, *args):
87
        """Output a mutter if the appropriate -D option was given."""
88
        if "fast-import" in debug.debug_flags:
89
            msg = "%s (%s)" % (msg, self.command.id)
90
            mutter(msg, *args)
91
92
    def note(self, msg, *args):
93
        """Output a note but add context."""
94
        msg = "%s (%s)" % (msg, self.command.id)
95
        note(msg, *args)
96
97
    def warning(self, msg, *args):
98
        """Output a warning but add context."""
99
        msg = "%s (%s)" % (msg, self.command.id)
100
        warning(msg, *args)
101
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
102
    def pre_process_files(self):
103
        """Prepare for committing."""
104
        self.revision_id = self.gen_revision_id()
105
        # cache of texts for this commit, indexed by file-id
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
106
        self.data_for_commit = {}
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
107
        #if self.rev_store.expects_rich_root():
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
108
        self.data_for_commit[inventory.ROOT_ID] = []
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
109
110
        # Track the heads and get the real parent list
0.123.6 by Jelmer Vernooij
Split out reftracker.
111
        parents = self.cache_mgr.reftracker.track_heads(self.command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
112
113
        # Convert the parent commit-ids to bzr revision-ids
114
        if parents:
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
115
            self.parents = [self.cache_mgr.lookup_committish(p)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
116
                for p in parents]
117
        else:
118
            self.parents = []
119
        self.debug("%s id: %s, parents: %s", self.command.id,
120
            self.revision_id, str(self.parents))
121
0.85.2 by Ian Clatworthy
improve per-file graph generation
122
        # Tell the RevisionStore we're starting a new commit
123
        self.revision = self.build_revision()
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
124
        self.parent_invs = [self.get_inventory(p) for p in self.parents]
0.85.2 by Ian Clatworthy
improve per-file graph generation
125
        self.rev_store.start_new_revision(self.revision, self.parents,
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
126
            self.parent_invs)
0.85.2 by Ian Clatworthy
improve per-file graph generation
127
128
        # cache of per-file parents for this commit, indexed by file-id
129
        self.per_file_parents_for_commit = {}
130
        if self.rev_store.expects_rich_root():
0.64.160 by Ian Clatworthy
make per-file parents tuples and fix text loading in chk formats
131
            self.per_file_parents_for_commit[inventory.ROOT_ID] = ()
0.85.2 by Ian Clatworthy
improve per-file graph generation
132
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
133
        # Keep the basis inventory. This needs to be treated as read-only.
134
        if len(self.parents) == 0:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
135
            self.basis_inventory = self._init_inventory()
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
136
        else:
137
            self.basis_inventory = self.get_inventory(self.parents[0])
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
138
        if hasattr(self.basis_inventory, "root_id"):
139
            self.inventory_root_id = self.basis_inventory.root_id
140
        else:
141
            self.inventory_root_id = self.basis_inventory.root.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
142
143
        # directory-path -> inventory-entry for current inventory
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
144
        self.directory_entries = {}
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
145
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
146
    def _init_inventory(self):
147
        return self.rev_store.init_inventory(self.revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
148
149
    def get_inventory(self, revision_id):
150
        """Get the inventory for a revision id."""
151
        try:
152
            inv = self.cache_mgr.inventories[revision_id]
153
        except KeyError:
154
            if self.verbose:
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
155
                self.mutter("get_inventory cache miss for %s", revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
156
            # Not cached so reconstruct from the RevisionStore
157
            inv = self.rev_store.get_inventory(revision_id)
158
            self.cache_mgr.inventories[revision_id] = inv
159
        return inv
160
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
161
    def _get_data(self, file_id):
162
        """Get the data bytes for a file-id."""
163
        return self.data_for_commit[file_id]
164
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
165
    def _get_lines(self, file_id):
166
        """Get the lines for a file-id."""
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
167
        return osutils.split_lines(self._get_data(file_id))
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
168
0.85.2 by Ian Clatworthy
improve per-file graph generation
169
    def _get_per_file_parents(self, file_id):
170
        """Get the lines for a file-id."""
171
        return self.per_file_parents_for_commit[file_id]
172
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
173
    def _get_inventories(self, revision_ids):
174
        """Get the inventories for revision-ids.
175
        
176
        This is a callback used by the RepositoryStore to
177
        speed up inventory reconstruction.
178
        """
179
        present = []
180
        inventories = []
181
        # If an inventory is in the cache, we assume it was
182
        # successfully loaded into the revision store
183
        for revision_id in revision_ids:
184
            try:
185
                inv = self.cache_mgr.inventories[revision_id]
186
                present.append(revision_id)
187
            except KeyError:
188
                if self.verbose:
189
                    self.note("get_inventories cache miss for %s", revision_id)
190
                # Not cached so reconstruct from the revision store
191
                try:
192
                    inv = self.get_inventory(revision_id)
193
                    present.append(revision_id)
194
                except:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
195
                    inv = self._init_inventory()
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
196
                self.cache_mgr.inventories[revision_id] = inv
197
            inventories.append(inv)
198
        return present, inventories
199
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
200
    def bzr_file_id_and_new(self, path):
201
        """Get a Bazaar file identifier and new flag for a path.
202
        
203
        :return: file_id, is_new where
204
          is_new = True if the file_id is newly created
205
        """
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
206
        if path not in self._paths_deleted_this_commit:
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
207
            # Try file-ids renamed in this commit
208
            id = self._modified_file_ids.get(path)
209
            if id is not None:
210
                return id, False
211
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
212
            # Try the basis inventory
213
            id = self.basis_inventory.path2id(path)
214
            if id is not None:
215
                return id, False
216
            
217
            # Try the other inventories
218
            if len(self.parents) > 1:
219
                for inv in self.parent_invs[1:]:
220
                    id = self.basis_inventory.path2id(path)
221
                    if id is not None:
222
                        return id, False
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
223
224
        # Doesn't exist yet so create it
0.64.247 by Ian Clatworthy
base file-ids on the basename, not path, as jam suggested. This improves the samba import from 565M to 353M.
225
        dirname, basename = osutils.split(path)
226
        id = generate_ids.gen_file_id(basename)
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
227
        self.debug("Generated new file id %s for '%s' in revision-id '%s'",
228
            id, path, self.revision_id)
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
229
        self._new_file_ids[path] = id
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
230
        return id, True
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
231
232
    def bzr_file_id(self, path):
233
        """Get a Bazaar file identifier for a path."""
234
        return self.bzr_file_id_and_new(path)[0]
235
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
236
    def _utf8_decode(self, field, value):
237
        try:
238
            return value.decode('utf_8')
239
        except UnicodeDecodeError:
240
            # The spec says fields are *typically* utf8 encoded
241
            # but that isn't enforced by git-fast-export (at least)
242
            self.warning("%s not in utf8 - replacing unknown "
243
                "characters" % (field,))
244
            return value.decode('utf_8', 'replace')
245
246
    def _format_name_email(self, section, name, email):
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
247
        """Format name & email as a string."""
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
248
        name = self._utf8_decode("%s name" % section, name)
249
        email = self._utf8_decode("%s email" % section, email)
250
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
251
        if email:
252
            return "%s <%s>" % (name, email)
253
        else:
254
            return name
255
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
256
    def gen_revision_id(self):
257
        """Generate a revision id.
258
259
        Subclasses may override this to produce deterministic ids say.
260
        """
261
        committer = self.command.committer
262
        # Perhaps 'who' being the person running the import is ok? If so,
263
        # it might be a bit quicker and give slightly better compression?
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
264
        who = self._format_name_email("committer", committer[0], committer[1])
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
265
        timestamp = committer[2]
266
        return generate_ids.gen_revision_id(who, timestamp)
267
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
268
    def build_revision(self):
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
269
        rev_props = self._legal_revision_properties(self.command.properties)
0.112.5 by Max Bowsher
Default branch-nick to mapped git ref name.
270
        if 'branch-nick' not in rev_props:
271
            rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
272
                    self.branch_ref)
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
273
        self._save_author_info(rev_props)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
274
        committer = self.command.committer
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
275
        who = self._format_name_email("committer", committer[0], committer[1])
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
276
        try:
277
            message = self.command.message.decode("utf-8")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
278
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
279
        except UnicodeDecodeError:
280
            self.warning(
281
                "commit message not in utf8 - replacing unknown characters")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
282
            message = self.command.message.decode('utf-8', 'replace')
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
283
        if not _serializer_handles_escaping:
284
            # We need to assume the bad ol' days
285
            message = helpers.escape_commit_message(message)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
286
        return revision.Revision(
287
           timestamp=committer[2],
288
           timezone=committer[3],
289
           committer=who,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
290
           message=message,
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
291
           revision_id=self.revision_id,
292
           properties=rev_props,
293
           parent_ids=self.parents)
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
294
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
295
    def _legal_revision_properties(self, props):
296
        """Clean-up any revision properties we can't handle."""
297
        # For now, we just check for None because that's not allowed in 2.0rc1
298
        result = {}
299
        if props is not None:
300
            for name, value in props.items():
301
                if value is None:
302
                    self.warning(
303
                        "converting None to empty string for property %s"
304
                        % (name,))
305
                    result[name] = ''
306
                else:
307
                    result[name] = value
308
        return result
309
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
310
    def _save_author_info(self, rev_props):
311
        author = self.command.author
312
        if author is None:
313
            return
314
        if self.command.more_authors:
315
            authors = [author] + self.command.more_authors
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
316
            author_ids = [self._format_name_email("author", a[0], a[1]) for a in authors]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
317
        elif author != self.command.committer:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
318
            author_ids = [self._format_name_email("author", author[0], author[1])]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
319
        else:
320
            return
321
        # If we reach here, there are authors worth storing
322
        rev_props['authors'] = "\n".join(author_ids)
323
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
324
    def _modify_item(self, path, kind, is_executable, data, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
325
        """Add to or change an item in the inventory."""
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
326
        # If we've already added this, warn the user that we're ignoring it.
327
        # In the future, it might be nice to double check that the new data
328
        # is the same as the old but, frankly, exporters should be fixed
329
        # not to produce bad data streams in the first place ...
330
        existing = self._new_file_ids.get(path)
331
        if existing:
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
332
            # We don't warn about directories because it's fine for them
333
            # to be created already by a previous rename
334
            if kind != 'directory':
335
                self.warning("%s already added in this commit - ignoring" %
336
                    (path,))
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
337
            return
338
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
339
        # Create the new InventoryEntry
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
340
        basename, parent_id = self._ensure_directory(path, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
341
        file_id = self.bzr_file_id(path)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
342
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
343
        ie.revision = self.revision_id
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
344
        if kind == 'file':
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
345
            ie.executable = is_executable
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
346
            # lines = osutils.split_lines(data)
347
            ie.text_sha1 = osutils.sha_string(data)
348
            ie.text_size = len(data)
349
            self.data_for_commit[file_id] = data
0.102.14 by Ian Clatworthy
export and import empty directories
350
        elif kind == 'directory':
351
            self.directory_entries[path] = ie
352
            # There are no lines stored for a directory so
353
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
354
            self.data_for_commit[file_id] = ''
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
355
        elif kind == 'symlink':
0.124.1 by Daniel Clemente
pass unicode object (rather than str) to match CHKInventory._entry_to_bytes requirements
356
            ie.symlink_target = data.decode('utf8')
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
357
            # There are no lines stored for a symlink so
358
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
359
            self.data_for_commit[file_id] = ''
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
360
        else:
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
361
            self.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
362
                % (kind, path))
363
            return
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
364
        # Record it
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
365
        if file_id in inv:
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
366
            old_ie = inv[file_id]
367
            if old_ie.kind == 'directory':
368
                self.record_delete(path, old_ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
369
            self.record_changed(path, ie, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
370
        else:
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
371
            try:
372
                self.record_new(path, ie)
373
            except:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
374
                print "failed to add path '%s' with entry '%s' in command %s" \
375
                    % (path, ie, self.command.id)
376
                print "parent's children are:\n%r\n" % (ie.parent_id.children,)
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
377
                raise
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
378
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
379
    def _ensure_directory(self, path, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
380
        """Ensure that the containing directory exists for 'path'"""
381
        dirname, basename = osutils.split(path)
382
        if dirname == '':
383
            # the root node doesn't get updated
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
384
            return basename, self.inventory_root_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
385
        try:
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
386
            ie = self._get_directory_entry(inv, dirname)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
387
        except KeyError:
388
            # We will create this entry, since it doesn't exist
389
            pass
390
        else:
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
391
            return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
392
393
        # No directory existed, we will just create one, first, make sure
394
        # the parent exists
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
395
        dir_basename, parent_id = self._ensure_directory(dirname, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
396
        dir_file_id = self.bzr_file_id(dirname)
397
        ie = inventory.entry_factory['directory'](dir_file_id,
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
398
            dir_basename, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
399
        ie.revision = self.revision_id
400
        self.directory_entries[dirname] = ie
401
        # There are no lines stored for a directory so
402
        # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
403
        self.data_for_commit[dir_file_id] = ''
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
404
405
        # It's possible that a file or symlink with that file-id
406
        # already exists. If it does, we need to delete it.
407
        if dir_file_id in inv:
408
            self.record_delete(dirname, ie)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
409
        self.record_new(dirname, ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
410
        return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
411
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
412
    def _get_directory_entry(self, inv, dirname):
413
        """Get the inventory entry for a directory.
414
        
415
        Raises KeyError if dirname is not a directory in inv.
416
        """
417
        result = self.directory_entries.get(dirname)
418
        if result is None:
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
419
            if dirname in self._paths_deleted_this_commit:
420
                raise KeyError
0.64.146 by Ian Clatworthy
fix first file is in a subdirectory bug for chk formats
421
            try:
422
                file_id = inv.path2id(dirname)
423
            except errors.NoSuchId:
424
                # In a CHKInventory, this is raised if there's no root yet
425
                raise KeyError
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
426
            if file_id is None:
427
                raise KeyError
428
            result = inv[file_id]
429
            # dirname must be a directory for us to return it
430
            if result.kind == 'directory':
431
                self.directory_entries[dirname] = result
432
            else:
433
                raise KeyError
434
        return result
435
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
436
    def _delete_item(self, path, inv):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
437
        newly_added = self._new_file_ids.get(path)
438
        if newly_added:
439
            # We've only just added this path earlier in this commit.
440
            file_id = newly_added
441
            # note: delta entries look like (old, new, file-id, ie)
442
            ie = self._delta_entries_by_fileid[file_id][3]
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
443
        else:
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
444
            file_id = inv.path2id(path)
445
            if file_id is None:
446
                self.mutter("ignoring delete of %s as not in inventory", path)
447
                return
448
            try:
449
                ie = inv[file_id]
450
            except errors.NoSuchId:
451
                self.mutter("ignoring delete of %s as not in inventory", path)
452
                return
453
        self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
454
455
    def _copy_item(self, src_path, dest_path, inv):
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
456
        newly_changed = self._new_file_ids.get(src_path) or \
457
            self._modified_file_ids.get(src_path)
458
        if newly_changed:
459
            # We've only just added/changed this path earlier in this commit.
460
            file_id = newly_changed
0.99.8 by Ian Clatworthy
handle copy of a newly added file
461
            # note: delta entries look like (old, new, file-id, ie)
462
            ie = self._delta_entries_by_fileid[file_id][3]
463
        else:
464
            file_id = inv.path2id(src_path)
465
            if file_id is None:
466
                self.warning("ignoring copy of %s to %s - source does not exist",
467
                    src_path, dest_path)
468
                return
469
            ie = inv[file_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
470
        kind = ie.kind
471
        if kind == 'file':
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
472
            if newly_changed:
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
473
                content = self.data_for_commit[file_id]
0.99.8 by Ian Clatworthy
handle copy of a newly added file
474
            else:
475
                content = self.rev_store.get_file_text(self.parents[0], file_id)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
476
            self._modify_item(dest_path, kind, ie.executable, content, inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
477
        elif kind == 'symlink':
0.64.289 by Jelmer Vernooij
Cope with non-ascii characters in symbolic links.
478
            self._modify_item(dest_path, kind, False, ie.symlink_target.encode("utf-8"), inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
479
        else:
480
            self.warning("ignoring copy of %s %s - feature not yet supported",
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
481
                kind, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
482
483
    def _rename_item(self, old_path, new_path, inv):
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
484
        existing = self._new_file_ids.get(old_path) or \
485
            self._modified_file_ids.get(old_path)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
486
        if existing:
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
487
            # We've only just added/modified this path earlier in this commit.
488
            # Change the add/modify of old_path to an add of new_path
489
            self._rename_pending_change(old_path, new_path, existing)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
490
            return
491
0.81.8 by Ian Clatworthy
refactor rename_item
492
        file_id = inv.path2id(old_path)
0.64.167 by Ian Clatworthy
incremental packing for chk formats
493
        if file_id is None:
494
            self.warning(
495
                "ignoring rename of %s to %s - old path does not exist" %
496
                (old_path, new_path))
497
            return
0.81.8 by Ian Clatworthy
refactor rename_item
498
        ie = inv[file_id]
499
        rev_id = ie.revision
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
500
        new_file_id = inv.path2id(new_path)
501
        if new_file_id is not None:
0.81.9 by Ian Clatworthy
refactor delete_item
502
            self.record_delete(new_path, inv[new_file_id])
0.81.8 by Ian Clatworthy
refactor rename_item
503
        self.record_rename(old_path, new_path, file_id, ie)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
504
0.81.8 by Ian Clatworthy
refactor rename_item
505
        # The revision-id for this entry will be/has been updated and
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
506
        # that means the loader then needs to know what the "new" text is.
507
        # We therefore must go back to the revision store to get it.
0.81.8 by Ian Clatworthy
refactor rename_item
508
        lines = self.rev_store.get_file_lines(rev_id, file_id)
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
509
        self.data_for_commit[file_id] = ''.join(lines)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
510
511
    def _delete_all_items(self, inv):
512
        for name, root_item in inv.root.children.iteritems():
513
            inv.remove_recursive_id(root_item.file_id)
514
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
515
    def _warn_unless_in_merges(self, fileid, path):
516
        if len(self.parents) <= 1:
517
            return
518
        for parent in self.parents[1:]:
519
            if fileid in self.get_inventory(parent):
520
                return
521
        self.warning("ignoring delete of %s as not in parent inventories", path)
522
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
523
524
class InventoryCommitHandler(GenericCommitHandler):
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
525
    """A CommitHandler that builds and saves Inventory objects."""
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
526
527
    def pre_process_files(self):
528
        super(InventoryCommitHandler, self).pre_process_files()
529
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
530
        # Seed the inventory from the previous one. Note that
531
        # the parent class version of pre_process_files() has
532
        # already set the right basis_inventory for this branch
533
        # but we need to copy it in order to mutate it safely
534
        # without corrupting the cached inventory value.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
535
        if len(self.parents) == 0:
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
536
            self.inventory = self.basis_inventory
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
537
        else:
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
538
            self.inventory = copy_inventory(self.basis_inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
539
        self.inventory_root = self.inventory.root
540
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
541
        # directory-path -> inventory-entry for current inventory
542
        self.directory_entries = dict(self.inventory.directories())
543
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
544
        # Initialise the inventory revision info as required
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
545
        if self.rev_store.expects_rich_root():
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
546
            self.inventory.revision_id = self.revision_id
547
        else:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
548
            # In this revision store, root entries have no knit or weave.
549
            # When serializing out to disk and back in, root.revision is
550
            # always the new revision_id.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
551
            self.inventory.root.revision = self.revision_id
552
553
    def post_process_files(self):
554
        """Save the revision."""
555
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.85.2 by Ian Clatworthy
improve per-file graph generation
556
        self.rev_store.load(self.revision, self.inventory, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
557
            lambda file_id: self._get_data(file_id),
0.85.2 by Ian Clatworthy
improve per-file graph generation
558
            lambda file_id: self._get_per_file_parents(file_id),
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
559
            lambda revision_ids: self._get_inventories(revision_ids))
560
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
561
    def record_new(self, path, ie):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
562
        try:
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
563
            # If this is a merge, the file was most likely added already.
564
            # The per-file parent(s) must therefore be calculated and
565
            # we can't assume there are none.
566
            per_file_parents, ie.revision = \
567
                self.rev_store.get_parents_and_revision_for_entry(ie)
568
            self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
569
            self.inventory.add(ie)
570
        except errors.DuplicateFileId:
571
            # Directory already exists as a file or symlink
572
            del self.inventory[ie.file_id]
573
            # Try again
574
            self.inventory.add(ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
575
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
576
    def record_changed(self, path, ie, parent_id):
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
577
        # HACK: no API for this (del+add does more than it needs to)
0.85.2 by Ian Clatworthy
improve per-file graph generation
578
        per_file_parents, ie.revision = \
579
            self.rev_store.get_parents_and_revision_for_entry(ie)
580
        self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
581
        self.inventory._byid[ie.file_id] = ie
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
582
        parent_ie = self.inventory._byid[parent_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
583
        parent_ie.children[ie.name] = ie
584
0.81.9 by Ian Clatworthy
refactor delete_item
585
    def record_delete(self, path, ie):
586
        self.inventory.remove_recursive_id(ie.file_id)
0.81.8 by Ian Clatworthy
refactor rename_item
587
588
    def record_rename(self, old_path, new_path, file_id, ie):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
589
        # For a rename, the revision-id is always the new one so
590
        # no need to change/set it here
591
        ie.revision = self.revision_id
592
        per_file_parents, _ = \
593
            self.rev_store.get_parents_and_revision_for_entry(ie)
594
        self.per_file_parents_for_commit[file_id] = per_file_parents
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
595
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
596
            self.inventory)
0.81.8 by Ian Clatworthy
refactor rename_item
597
        self.inventory.rename(file_id, new_parent_id, new_basename)
598
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
599
    def modify_handler(self, filecmd):
600
        if filecmd.dataref is not None:
601
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
602
        else:
603
            data = filecmd.data
604
        self.debug("modifying %s", filecmd.path)
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
605
        (kind, is_executable) = mode_to_kind(filecmd.mode)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
606
        self._modify_item(filecmd.path.decode('utf8'), kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
607
            is_executable, data, self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
608
609
    def delete_handler(self, filecmd):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
610
        self.debug("deleting %s", filecmd.path)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
611
        self._delete_item(filecmd.path.decode('utf8'), self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
612
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
613
    def copy_handler(self, filecmd):
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
614
        src_path = filecmd.src_path.decode('utf8')
615
        dest_path = filecmd.dest_path.decode('utf8')
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
616
        self.debug("copying %s to %s", src_path, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
617
        self._copy_item(src_path, dest_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
618
619
    def rename_handler(self, filecmd):
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
620
        old_path = filecmd.old_path.decode('utf8')
621
        new_path = filecmd.new_path.decode('utf8')
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
622
        self.debug("renaming %s to %s", old_path, new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
623
        self._rename_item(old_path, new_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
624
625
    def deleteall_handler(self, filecmd):
626
        self.debug("deleting all files (and also all directories)")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
627
        self._delete_all_items(self.inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
628
629
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
630
class InventoryDeltaCommitHandler(GenericCommitHandler):
631
    """A CommitHandler that builds Inventories by applying a delta."""
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
632
633
    def pre_process_files(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
634
        super(InventoryDeltaCommitHandler, self).pre_process_files()
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
635
        self._dirs_that_might_become_empty = set()
636
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
637
        # A given file-id can only appear once so we accumulate
638
        # the entries in a dict then build the actual delta at the end
639
        self._delta_entries_by_fileid = {}
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
640
        if len(self.parents) == 0 or not self.rev_store.expects_rich_root():
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
641
            if self.parents:
642
                old_path = ''
643
            else:
644
                old_path = None
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
645
            # Need to explicitly add the root entry for the first revision
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
646
            # and for non rich-root inventories
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
647
            root_id = inventory.ROOT_ID
648
            root_ie = inventory.InventoryDirectory(root_id, u'', None)
649
            root_ie.revision = self.revision_id
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
650
            self._add_entry((old_path, '', root_id, root_ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
651
652
    def post_process_files(self):
653
        """Save the revision."""
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
654
        delta = self._get_final_delta()
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
655
        inv = self.rev_store.load_using_delta(self.revision,
656
            self.basis_inventory, delta, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
657
            self._get_data,
658
            self._get_per_file_parents,
659
            self._get_inventories)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
660
        self.cache_mgr.inventories[self.revision_id] = inv
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
661
        #print "committed %s" % self.revision_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
662
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
663
    def _get_final_delta(self):
664
        """Generate the final delta.
665
666
        Smart post-processing of changes, e.g. pruning of directories
667
        that would become empty, goes here.
668
        """
669
        delta = list(self._delta_entries_by_fileid.values())
670
        if self.prune_empty_dirs and self._dirs_that_might_become_empty:
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
671
            candidates = self._dirs_that_might_become_empty
672
            while candidates:
673
                never_born = set()
674
                parent_dirs_that_might_become_empty = set()
675
                for path, file_id in self._empty_after_delta(delta, candidates):
676
                    newly_added = self._new_file_ids.get(path)
677
                    if newly_added:
678
                        never_born.add(newly_added)
679
                    else:
680
                        delta.append((path, None, file_id, None))
681
                    parent_dir = osutils.dirname(path)
682
                    if parent_dir:
683
                        parent_dirs_that_might_become_empty.add(parent_dir)
684
                candidates = parent_dirs_that_might_become_empty
0.101.5 by Tom Widmer
Add missing tab characters to ensure that never born dirs are correctly removed during each pass of parent directory pruning.
685
                # Clean up entries that got deleted before they were ever added
686
                if never_born:
687
                    delta = [de for de in delta if de[2] not in never_born]
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
688
        return delta
689
690
    def _empty_after_delta(self, delta, candidates):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
691
        #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
692
        #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
693
        new_inv = self._get_proposed_inventory(delta)
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
694
        result = []
695
        for dir in candidates:
696
            file_id = new_inv.path2id(dir)
0.64.219 by Ian Clatworthy
More robust implicit delete logic when file-id not found
697
            if file_id is None:
698
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
699
            ie = new_inv[file_id]
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
700
            if ie.kind != 'directory':
701
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
702
            if len(ie.children) == 0:
703
                result.append((dir, file_id))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
704
                if self.verbose:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
705
                    self.note("pruning empty directory %s" % (dir,))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
706
        return result
707
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
708
    def _get_proposed_inventory(self, delta):
709
        if len(self.parents):
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
710
            # new_inv = self.basis_inventory._get_mutable_inventory()
711
            # Note that this will create unreferenced chk pages if we end up
712
            # deleting entries, because this 'test' inventory won't end up
713
            # used. However, it is cheaper than having to create a full copy of
714
            # the inventory for every commit.
715
            new_inv = self.basis_inventory.create_by_apply_delta(delta,
716
                'not-a-valid-revision-id:')
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
717
        else:
718
            new_inv = inventory.Inventory(revision_id=self.revision_id)
719
            # This is set in the delta so remove it to prevent a duplicate
720
            del new_inv[inventory.ROOT_ID]
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
721
            try:
722
                new_inv.apply_delta(delta)
723
            except errors.InconsistentDelta:
724
                self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
725
                raise
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
726
        return new_inv
727
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
728
    def _add_entry(self, entry):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
729
        # We need to combine the data if multiple entries have the same file-id.
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
730
        # For example, a rename followed by a modification looks like:
731
        #
732
        # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
733
        #
734
        # Likewise, a modification followed by a rename looks like:
735
        #
736
        # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
737
        #
738
        # Here's a rename followed by a delete and a modification followed by
739
        # a delete:
740
        #
741
        # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
742
        # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
743
        #
744
        # In summary, we use the original old-path, new new-path and new ie
745
        # when combining entries.
0.85.2 by Ian Clatworthy
improve per-file graph generation
746
        old_path = entry[0]
747
        new_path = entry[1]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
748
        file_id = entry[2]
0.85.2 by Ian Clatworthy
improve per-file graph generation
749
        ie = entry[3]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
750
        existing = self._delta_entries_by_fileid.get(file_id, None)
751
        if existing is not None:
0.85.2 by Ian Clatworthy
improve per-file graph generation
752
            old_path = existing[0]
753
            entry = (old_path, new_path, file_id, ie)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
754
        if new_path is None and old_path is None:
755
            # This is a delete cancelling a previous add
756
            del self._delta_entries_by_fileid[file_id]
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
757
            parent_dir = osutils.dirname(existing[1])
758
            self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir))
759
            if parent_dir:
760
                self._dirs_that_might_become_empty.add(parent_dir)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
761
            return
762
        else:
763
            self._delta_entries_by_fileid[file_id] = entry
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
764
0.99.6 by Ian Clatworthy
Handle rename of a just added file
765
        # Collect parent directories that might become empty
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
766
        if new_path is None:
767
            # delete
768
            parent_dir = osutils.dirname(old_path)
769
            # note: no need to check the root
770
            if parent_dir:
771
                self._dirs_that_might_become_empty.add(parent_dir)
772
        elif old_path is not None and old_path != new_path:
773
            # rename
774
            old_parent_dir = osutils.dirname(old_path)
775
            new_parent_dir = osutils.dirname(new_path)
776
            if old_parent_dir and old_parent_dir != new_parent_dir:
777
                self._dirs_that_might_become_empty.add(old_parent_dir)
778
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
779
        # Calculate the per-file parents, if not already done
780
        if file_id in self.per_file_parents_for_commit:
781
            return
0.85.2 by Ian Clatworthy
improve per-file graph generation
782
        if old_path is None:
783
            # add
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
784
            # If this is a merge, the file was most likely added already.
785
            # The per-file parent(s) must therefore be calculated and
786
            # we can't assume there are none.
787
            per_file_parents, ie.revision = \
788
                self.rev_store.get_parents_and_revision_for_entry(ie)
789
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
790
        elif new_path is None:
791
            # delete
792
            pass
793
        elif old_path != new_path:
794
            # rename
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
795
            per_file_parents, _ = \
796
                self.rev_store.get_parents_and_revision_for_entry(ie)
797
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
798
        else:
799
            # modify
800
            per_file_parents, ie.revision = \
801
                self.rev_store.get_parents_and_revision_for_entry(ie)
802
            self.per_file_parents_for_commit[file_id] = per_file_parents
803
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
804
    def record_new(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
805
        self._add_entry((None, path, ie.file_id, ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
806
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
807
    def record_changed(self, path, ie, parent_id=None):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
808
        self._add_entry((path, path, ie.file_id, ie))
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
809
        self._modified_file_ids[path] = ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
810
0.81.9 by Ian Clatworthy
refactor delete_item
811
    def record_delete(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
812
        self._add_entry((path, None, ie.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
813
        self._paths_deleted_this_commit.add(path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
814
        if ie.kind == 'directory':
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
815
            try:
816
                del self.directory_entries[path]
817
            except KeyError:
818
                pass
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
819
            for child_relpath, entry in \
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
820
                self.basis_inventory.iter_entries_by_dir(from_dir=ie):
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
821
                child_path = osutils.pathjoin(path, child_relpath)
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
822
                self._add_entry((child_path, None, entry.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
823
                self._paths_deleted_this_commit.add(child_path)
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
824
                if entry.kind == 'directory':
825
                    try:
826
                        del self.directory_entries[child_path]
827
                    except KeyError:
828
                        pass
0.81.8 by Ian Clatworthy
refactor rename_item
829
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
830
    def record_rename(self, old_path, new_path, file_id, old_ie):
831
        new_ie = old_ie.copy()
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
832
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
833
            self.basis_inventory)
834
        new_ie.name = new_basename
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
835
        new_ie.parent_id = new_parent_id
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
836
        new_ie.revision = self.revision_id
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
837
        self._add_entry((old_path, new_path, file_id, new_ie))
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
838
        self._modified_file_ids[new_path] = file_id
0.64.233 by Ian Clatworthy
Handle delete, rename then modify all in the one commit
839
        self._paths_deleted_this_commit.discard(new_path)
0.64.234 by Ian Clatworthy
Make sure renamed directories are found in file-id lookups
840
        if new_ie.kind == 'directory':
841
            self.directory_entries[new_path] = new_ie
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
842
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
843
    def _rename_pending_change(self, old_path, new_path, file_id):
844
        """Instead of adding/modifying old-path, add new-path instead."""
0.99.6 by Ian Clatworthy
Handle rename of a just added file
845
        # note: delta entries look like (old, new, file-id, ie)
846
        old_ie = self._delta_entries_by_fileid[file_id][3]
847
848
        # Delete the old path. Note that this might trigger implicit
849
        # deletion of newly created parents that could now become empty.
850
        self.record_delete(old_path, old_ie)
851
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
852
        # Update the dictionaries used for tracking new file-ids
853
        if old_path in self._new_file_ids:
854
            del self._new_file_ids[old_path]
855
        else:
856
            del self._modified_file_ids[old_path]
0.99.6 by Ian Clatworthy
Handle rename of a just added file
857
        self._new_file_ids[new_path] = file_id
858
859
        # Create the new InventoryEntry
860
        kind = old_ie.kind
861
        basename, parent_id = self._ensure_directory(new_path,
862
            self.basis_inventory)
863
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
864
        ie.revision = self.revision_id
865
        if kind == 'file':
866
            ie.executable = old_ie.executable
867
            ie.text_sha1 = old_ie.text_sha1
868
            ie.text_size = old_ie.text_size
869
        elif kind == 'symlink':
870
            ie.symlink_target = old_ie.symlink_target
871
872
        # Record it
873
        self.record_new(new_path, ie)
874
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
875
    def modify_handler(self, filecmd):
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
876
        (kind, executable) = mode_to_kind(filecmd.mode)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
877
        if filecmd.dataref is not None:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
878
            if kind == "directory":
0.102.14 by Ian Clatworthy
export and import empty directories
879
                data = None
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
880
            elif kind == "tree-reference":
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
881
                data = filecmd.dataref
882
            else:
883
                data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
884
        else:
885
            data = filecmd.data
886
        self.debug("modifying %s", filecmd.path)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
887
        self._modify_item(filecmd.path.decode('utf8'), kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
888
            executable, data, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
889
890
    def delete_handler(self, filecmd):
891
        self.debug("deleting %s", filecmd.path)
0.128.1 by INADA Naoki
Fix UnicodeError for non-ASCII paths.
892
        self._delete_item(filecmd.path.decode('utf8'), self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
893
894
    def copy_handler(self, filecmd):
0.124.2 by Daniel Clemente
use unicode paths when handling copy and rename
895
        src_path = filecmd.src_path.decode("utf8")
896
        dest_path = filecmd.dest_path.decode("utf8")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
897
        self.debug("copying %s to %s", src_path, dest_path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
898
        self._copy_item(src_path, dest_path, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
899
900
    def rename_handler(self, filecmd):
0.124.2 by Daniel Clemente
use unicode paths when handling copy and rename
901
        old_path = filecmd.old_path.decode("utf8")
902
        new_path = filecmd.new_path.decode("utf8")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
903
        self.debug("renaming %s to %s", old_path, new_path)
904
        self._rename_item(old_path, new_path, self.basis_inventory)
905
906
    def deleteall_handler(self, filecmd):
907
        self.debug("deleting all files (and also all directories)")
908
        # I'm not 100% sure this will work in the delta case.
909
        # But clearing out the basis inventory so that everything
910
        # is added sounds ok in theory ...
911
        # We grab a copy as the basis is likely to be cached and
912
        # we don't want to destroy the cached version
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
913
        self.basis_inventory = copy_inventory(self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
914
        self._delete_all_items(self.basis_inventory)