/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""CommitHandlers that build and save revisions & their inventories."""
18
19
20
from bzrlib import (
21
    errors,
22
    generate_ids,
23
    inventory,
24
    osutils,
25
    revision,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
26
    serializer,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
27
    )
28
from bzrlib.plugins.fastimport import helpers, processor
29
30
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
31
_serializer_handles_escaping = hasattr(serializer.Serializer,
32
    'squashes_xml_invalid_characters')
33
34
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
35
def copy_inventory(inv):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
36
    # This currently breaks revision-id matching
37
    #if hasattr(inv, "_get_mutable_inventory"):
38
    #    # TODO: Make this a public API on inventory
39
    #    return inv._get_mutable_inventory()
40
41
    # TODO: Shallow copy - deep inventory copying is expensive
42
    return inv.copy()
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
43
44
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
45
class GenericCommitHandler(processor.CommitHandler):
46
    """Base class for Bazaar CommitHandlers."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
47
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
48
    def __init__(self, command, cache_mgr, rev_store, verbose=False,
49
        prune_empty_dirs=True):
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
50
        super(GenericCommitHandler, self).__init__(command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
51
        self.cache_mgr = cache_mgr
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
52
        self.rev_store = rev_store
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
53
        self.verbose = verbose
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
54
        self.branch_ref = command.ref
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
55
        self.prune_empty_dirs = prune_empty_dirs
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
56
57
    def pre_process_files(self):
58
        """Prepare for committing."""
59
        self.revision_id = self.gen_revision_id()
60
        # cache of texts for this commit, indexed by file-id
61
        self.lines_for_commit = {}
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
62
        #if self.rev_store.expects_rich_root():
63
        self.lines_for_commit[inventory.ROOT_ID] = []
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
64
65
        # Track the heads and get the real parent list
66
        parents = self.cache_mgr.track_heads(self.command)
67
68
        # Convert the parent commit-ids to bzr revision-ids
69
        if parents:
70
            self.parents = [self.cache_mgr.revision_ids[p]
71
                for p in parents]
72
        else:
73
            self.parents = []
74
        self.debug("%s id: %s, parents: %s", self.command.id,
75
            self.revision_id, str(self.parents))
76
0.85.2 by Ian Clatworthy
improve per-file graph generation
77
        # Tell the RevisionStore we're starting a new commit
78
        self.revision = self.build_revision()
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
79
        self.parent_invs = [self.get_inventory(p) for p in self.parents]
0.85.2 by Ian Clatworthy
improve per-file graph generation
80
        self.rev_store.start_new_revision(self.revision, self.parents,
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
81
            self.parent_invs)
0.85.2 by Ian Clatworthy
improve per-file graph generation
82
83
        # cache of per-file parents for this commit, indexed by file-id
84
        self.per_file_parents_for_commit = {}
85
        if self.rev_store.expects_rich_root():
0.64.160 by Ian Clatworthy
make per-file parents tuples and fix text loading in chk formats
86
            self.per_file_parents_for_commit[inventory.ROOT_ID] = ()
0.85.2 by Ian Clatworthy
improve per-file graph generation
87
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
88
        # Keep the basis inventory. This needs to be treated as read-only.
89
        if len(self.parents) == 0:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
90
            self.basis_inventory = self._init_inventory()
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
91
        else:
92
            self.basis_inventory = self.get_inventory(self.parents[0])
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
93
        if hasattr(self.basis_inventory, "root_id"):
94
            self.inventory_root_id = self.basis_inventory.root_id
95
        else:
96
            self.inventory_root_id = self.basis_inventory.root.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
97
98
        # directory-path -> inventory-entry for current inventory
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
99
        self.directory_entries = {}
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
100
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
101
    def _init_inventory(self):
102
        return self.rev_store.init_inventory(self.revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
103
104
    def get_inventory(self, revision_id):
105
        """Get the inventory for a revision id."""
106
        try:
107
            inv = self.cache_mgr.inventories[revision_id]
108
        except KeyError:
109
            if self.verbose:
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
110
                self.mutter("get_inventory cache miss for %s", revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
111
            # Not cached so reconstruct from the RevisionStore
112
            inv = self.rev_store.get_inventory(revision_id)
113
            self.cache_mgr.inventories[revision_id] = inv
114
        return inv
115
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
116
    def _get_lines(self, file_id):
117
        """Get the lines for a file-id."""
118
        return self.lines_for_commit[file_id]
119
0.85.2 by Ian Clatworthy
improve per-file graph generation
120
    def _get_per_file_parents(self, file_id):
121
        """Get the lines for a file-id."""
122
        return self.per_file_parents_for_commit[file_id]
123
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
124
    def _get_inventories(self, revision_ids):
125
        """Get the inventories for revision-ids.
126
        
127
        This is a callback used by the RepositoryStore to
128
        speed up inventory reconstruction.
129
        """
130
        present = []
131
        inventories = []
132
        # If an inventory is in the cache, we assume it was
133
        # successfully loaded into the revision store
134
        for revision_id in revision_ids:
135
            try:
136
                inv = self.cache_mgr.inventories[revision_id]
137
                present.append(revision_id)
138
            except KeyError:
139
                if self.verbose:
140
                    self.note("get_inventories cache miss for %s", revision_id)
141
                # Not cached so reconstruct from the revision store
142
                try:
143
                    inv = self.get_inventory(revision_id)
144
                    present.append(revision_id)
145
                except:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
146
                    inv = self._init_inventory()
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
147
                self.cache_mgr.inventories[revision_id] = inv
148
            inventories.append(inv)
149
        return present, inventories
150
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
151
    def bzr_file_id_and_new(self, path):
152
        """Get a Bazaar file identifier and new flag for a path.
153
        
154
        :return: file_id, is_new where
155
          is_new = True if the file_id is newly created
156
        """
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
157
        # Try the basis inventory
158
        id = self.basis_inventory.path2id(path)
159
        if id is not None:
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
160
            return id, False
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
161
        
162
        # Try the other inventories
163
        if len(self.parents) > 1:
164
            for inv in self.parent_invs[1:]:
165
                id = self.basis_inventory.path2id(path)
166
                if id is not None:
167
                    return id, False
168
169
        # Doesn't exist yet so create it
170
        id = generate_ids.gen_file_id(path)
171
        self.debug("Generated new file id %s for '%s' in revision-id '%s'",
172
            id, path, self.revision_id)
173
        return id, True
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
174
175
    def bzr_file_id(self, path):
176
        """Get a Bazaar file identifier for a path."""
177
        return self.bzr_file_id_and_new(path)[0]
178
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
179
    def _format_name_email(self, name, email):
180
        """Format name & email as a string."""
181
        if email:
182
            return "%s <%s>" % (name, email)
183
        else:
184
            return name
185
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
186
    def gen_revision_id(self):
187
        """Generate a revision id.
188
189
        Subclasses may override this to produce deterministic ids say.
190
        """
191
        committer = self.command.committer
192
        # Perhaps 'who' being the person running the import is ok? If so,
193
        # it might be a bit quicker and give slightly better compression?
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
194
        who = self._format_name_email(committer[0], committer[1])
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
195
        timestamp = committer[2]
196
        return generate_ids.gen_revision_id(who, timestamp)
197
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
198
    def build_revision(self):
199
        rev_props = {}
200
        committer = self.command.committer
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
201
        who = self._format_name_email(committer[0], committer[1])
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
202
        author = self.command.author
203
        if author is not None:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
204
            author_id = self._format_name_email(author[0], author[1])
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
205
            if author_id != who:
206
                rev_props['author'] = author_id
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
207
        message = self.command.message
208
        if not _serializer_handles_escaping:
209
            # We need to assume the bad ol' days
210
            message = helpers.escape_commit_message(message)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
211
        return revision.Revision(
212
           timestamp=committer[2],
213
           timezone=committer[3],
214
           committer=who,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
215
           message=message,
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
216
           revision_id=self.revision_id,
217
           properties=rev_props,
218
           parent_ids=self.parents)
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
219
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
220
    def _modify_item(self, path, kind, is_executable, data, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
221
        """Add to or change an item in the inventory."""
222
        # Create the new InventoryEntry
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
223
        basename, parent_id = self._ensure_directory(path, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
224
        file_id = self.bzr_file_id(path)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
225
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
226
        ie.revision = self.revision_id
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
227
        if kind == 'file':
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
228
            ie.executable = is_executable
229
            lines = osutils.split_lines(data)
230
            ie.text_sha1 = osutils.sha_strings(lines)
231
            ie.text_size = sum(map(len, lines))
232
            self.lines_for_commit[file_id] = lines
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
233
        elif kind == 'symlink':
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
234
            ie.symlink_target = data.encode('utf8')
235
            # There are no lines stored for a symlink so
236
            # make sure the cache used by get_lines knows that
237
            self.lines_for_commit[file_id] = []
238
        else:
239
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
240
                (kind,))
241
        # Record it
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
242
        if file_id in inv:
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
243
            old_ie = inv[file_id]
244
            if old_ie.kind == 'directory':
245
                self.record_delete(path, old_ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
246
            self.record_changed(path, ie, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
247
        else:
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
248
            try:
249
                self.record_new(path, ie)
250
            except:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
251
                print "failed to add path '%s' with entry '%s' in command %s" \
252
                    % (path, ie, self.command.id)
253
                print "parent's children are:\n%r\n" % (ie.parent_id.children,)
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
254
                raise
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
255
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
256
    def _ensure_directory(self, path, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
257
        """Ensure that the containing directory exists for 'path'"""
258
        dirname, basename = osutils.split(path)
259
        if dirname == '':
260
            # the root node doesn't get updated
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
261
            return basename, self.inventory_root_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
262
        try:
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
263
            ie = self._get_directory_entry(inv, dirname)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
264
        except KeyError:
265
            # We will create this entry, since it doesn't exist
266
            pass
267
        else:
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
268
            return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
269
270
        # No directory existed, we will just create one, first, make sure
271
        # the parent exists
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
272
        dir_basename, parent_id = self._ensure_directory(dirname, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
273
        dir_file_id = self.bzr_file_id(dirname)
274
        ie = inventory.entry_factory['directory'](dir_file_id,
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
275
            dir_basename, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
276
        ie.revision = self.revision_id
277
        self.directory_entries[dirname] = ie
278
        # There are no lines stored for a directory so
279
        # make sure the cache used by get_lines knows that
280
        self.lines_for_commit[dir_file_id] = []
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
281
282
        # It's possible that a file or symlink with that file-id
283
        # already exists. If it does, we need to delete it.
284
        if dir_file_id in inv:
285
            self.record_delete(dirname, ie)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
286
        self.record_new(dirname, ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
287
        return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
288
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
289
    def _get_directory_entry(self, inv, dirname):
290
        """Get the inventory entry for a directory.
291
        
292
        Raises KeyError if dirname is not a directory in inv.
293
        """
294
        result = self.directory_entries.get(dirname)
295
        if result is None:
0.64.146 by Ian Clatworthy
fix first file is in a subdirectory bug for chk formats
296
            try:
297
                file_id = inv.path2id(dirname)
298
            except errors.NoSuchId:
299
                # In a CHKInventory, this is raised if there's no root yet
300
                raise KeyError
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
301
            if file_id is None:
302
                raise KeyError
303
            result = inv[file_id]
304
            # dirname must be a directory for us to return it
305
            if result.kind == 'directory':
306
                self.directory_entries[dirname] = result
307
            else:
308
                raise KeyError
309
        return result
310
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
311
    def _delete_item(self, path, inv):
312
        file_id = inv.path2id(path)
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
313
        if file_id is None:
314
            self.mutter("ignoring delete of %s as not in inventory", path)
315
            return
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
316
        try:
317
            ie = inv[file_id]
318
        except errors.NoSuchId:
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
319
            self.mutter("ignoring delete of %s as not in inventory", path)
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
320
        else:
321
            self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
322
323
    def _copy_item(self, src_path, dest_path, inv):
324
        if not self.parents:
325
            self.warning("ignoring copy of %s to %s - no parent revisions",
326
                src_path, dest_path)
327
            return
328
        file_id = inv.path2id(src_path)
329
        if file_id is None:
330
            self.warning("ignoring copy of %s to %s - source does not exist",
331
                src_path, dest_path)
332
            return
333
        ie = inv[file_id]
334
        kind = ie.kind
335
        if kind == 'file':
336
            content = self.rev_store.get_file_text(self.parents[0], file_id)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
337
            self._modify_item(dest_path, kind, ie.executable, content, inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
338
        elif kind == 'symlink':
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
339
            self._modify_item(dest_path, kind, False, ie.symlink_target, inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
340
        else:
341
            self.warning("ignoring copy of %s %s - feature not yet supported",
342
                kind, path)
343
344
    def _rename_item(self, old_path, new_path, inv):
0.81.8 by Ian Clatworthy
refactor rename_item
345
        file_id = inv.path2id(old_path)
0.64.167 by Ian Clatworthy
incremental packing for chk formats
346
        if file_id is None:
347
            self.warning(
348
                "ignoring rename of %s to %s - old path does not exist" %
349
                (old_path, new_path))
350
            return
0.81.8 by Ian Clatworthy
refactor rename_item
351
        ie = inv[file_id]
352
        rev_id = ie.revision
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
353
        new_file_id = inv.path2id(new_path)
354
        if new_file_id is not None:
0.81.9 by Ian Clatworthy
refactor delete_item
355
            self.record_delete(new_path, inv[new_file_id])
0.81.8 by Ian Clatworthy
refactor rename_item
356
        self.record_rename(old_path, new_path, file_id, ie)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
357
0.81.8 by Ian Clatworthy
refactor rename_item
358
        # The revision-id for this entry will be/has been updated and
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
359
        # that means the loader then needs to know what the "new" text is.
360
        # We therefore must go back to the revision store to get it.
0.81.8 by Ian Clatworthy
refactor rename_item
361
        lines = self.rev_store.get_file_lines(rev_id, file_id)
362
        self.lines_for_commit[file_id] = lines
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
363
364
    def _delete_all_items(self, inv):
365
        for name, root_item in inv.root.children.iteritems():
366
            inv.remove_recursive_id(root_item.file_id)
367
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
368
    def _warn_unless_in_merges(self, fileid, path):
369
        if len(self.parents) <= 1:
370
            return
371
        for parent in self.parents[1:]:
372
            if fileid in self.get_inventory(parent):
373
                return
374
        self.warning("ignoring delete of %s as not in parent inventories", path)
375
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
376
377
class InventoryCommitHandler(GenericCommitHandler):
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
378
    """A CommitHandler that builds and saves Inventory objects."""
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
379
380
    def pre_process_files(self):
381
        super(InventoryCommitHandler, self).pre_process_files()
382
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
383
        # Seed the inventory from the previous one. Note that
384
        # the parent class version of pre_process_files() has
385
        # already set the right basis_inventory for this branch
386
        # but we need to copy it in order to mutate it safely
387
        # without corrupting the cached inventory value.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
388
        if len(self.parents) == 0:
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
389
            self.inventory = self.basis_inventory
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
390
        else:
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
391
            self.inventory = copy_inventory(self.basis_inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
392
        self.inventory_root = self.inventory.root
393
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
394
        # directory-path -> inventory-entry for current inventory
395
        self.directory_entries = dict(self.inventory.directories())
396
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
397
        # Initialise the inventory revision info as required
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
398
        if self.rev_store.expects_rich_root():
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
399
            self.inventory.revision_id = self.revision_id
400
        else:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
401
            # In this revision store, root entries have no knit or weave.
402
            # When serializing out to disk and back in, root.revision is
403
            # always the new revision_id.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
404
            self.inventory.root.revision = self.revision_id
405
406
    def post_process_files(self):
407
        """Save the revision."""
408
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.85.2 by Ian Clatworthy
improve per-file graph generation
409
        self.rev_store.load(self.revision, self.inventory, None,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
410
            lambda file_id: self._get_lines(file_id),
0.85.2 by Ian Clatworthy
improve per-file graph generation
411
            lambda file_id: self._get_per_file_parents(file_id),
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
412
            lambda revision_ids: self._get_inventories(revision_ids))
413
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
414
    def record_new(self, path, ie):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
415
        try:
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
416
            # If this is a merge, the file was most likely added already.
417
            # The per-file parent(s) must therefore be calculated and
418
            # we can't assume there are none.
419
            per_file_parents, ie.revision = \
420
                self.rev_store.get_parents_and_revision_for_entry(ie)
421
            self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
422
            self.inventory.add(ie)
423
        except errors.DuplicateFileId:
424
            # Directory already exists as a file or symlink
425
            del self.inventory[ie.file_id]
426
            # Try again
427
            self.inventory.add(ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
428
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
429
    def record_changed(self, path, ie, parent_id):
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
430
        # HACK: no API for this (del+add does more than it needs to)
0.85.2 by Ian Clatworthy
improve per-file graph generation
431
        per_file_parents, ie.revision = \
432
            self.rev_store.get_parents_and_revision_for_entry(ie)
433
        self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
434
        self.inventory._byid[ie.file_id] = ie
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
435
        parent_ie = self.inventory._byid[parent_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
436
        parent_ie.children[ie.name] = ie
437
0.81.9 by Ian Clatworthy
refactor delete_item
438
    def record_delete(self, path, ie):
439
        self.inventory.remove_recursive_id(ie.file_id)
0.81.8 by Ian Clatworthy
refactor rename_item
440
441
    def record_rename(self, old_path, new_path, file_id, ie):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
442
        # For a rename, the revision-id is always the new one so
443
        # no need to change/set it here
444
        ie.revision = self.revision_id
445
        per_file_parents, _ = \
446
            self.rev_store.get_parents_and_revision_for_entry(ie)
447
        self.per_file_parents_for_commit[file_id] = per_file_parents
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
448
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
449
            self.inventory)
0.81.8 by Ian Clatworthy
refactor rename_item
450
        self.inventory.rename(file_id, new_parent_id, new_basename)
451
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
452
    def modify_handler(self, filecmd):
453
        if filecmd.dataref is not None:
454
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
455
        else:
456
            data = filecmd.data
457
        self.debug("modifying %s", filecmd.path)
458
        self._modify_item(filecmd.path, filecmd.kind,
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
459
            filecmd.is_executable, data, self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
460
461
    def delete_handler(self, filecmd):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
462
        self.debug("deleting %s", filecmd.path)
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
463
        self._delete_item(filecmd.path, self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
464
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
465
    def copy_handler(self, filecmd):
466
        src_path = filecmd.src_path
467
        dest_path = filecmd.dest_path
468
        self.debug("copying %s to %s", src_path, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
469
        self._copy_item(src_path, dest_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
470
471
    def rename_handler(self, filecmd):
472
        old_path = filecmd.old_path
473
        new_path = filecmd.new_path
474
        self.debug("renaming %s to %s", old_path, new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
475
        self._rename_item(old_path, new_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
476
477
    def deleteall_handler(self, filecmd):
478
        self.debug("deleting all files (and also all directories)")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
479
        self._delete_all_items(self.inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
480
481
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
482
class InventoryDeltaCommitHandler(GenericCommitHandler):
483
    """A CommitHandler that builds Inventories by applying a delta."""
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
484
485
    def pre_process_files(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
486
        super(InventoryDeltaCommitHandler, self).pre_process_files()
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
487
        self._dirs_that_might_become_empty = set()
488
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
489
        # A given file-id can only appear once so we accumulate
490
        # the entries in a dict then build the actual delta at the end
491
        self._delta_entries_by_fileid = {}
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
492
        if len(self.parents) == 0 or not self.rev_store.expects_rich_root():
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
493
            if self.parents:
494
                old_path = ''
495
            else:
496
                old_path = None
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
497
            # Need to explicitly add the root entry for the first revision
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
498
            # and for non rich-root inventories
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
499
            root_id = inventory.ROOT_ID
500
            root_ie = inventory.InventoryDirectory(root_id, u'', None)
501
            root_ie.revision = self.revision_id
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
502
            self._add_entry((old_path, '', root_id, root_ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
503
504
    def post_process_files(self):
505
        """Save the revision."""
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
506
        delta = self._get_final_delta()
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
507
        inv = self.rev_store.load_using_delta(self.revision,
508
            self.basis_inventory, delta, None,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
509
            lambda file_id: self._get_lines(file_id),
0.85.2 by Ian Clatworthy
improve per-file graph generation
510
            lambda file_id: self._get_per_file_parents(file_id),
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
511
            lambda revision_ids: self._get_inventories(revision_ids))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
512
        self.cache_mgr.inventories[self.revision_id] = inv
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
513
        #print "committed %s" % self.revision_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
514
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
515
    def _get_final_delta(self):
516
        """Generate the final delta.
517
518
        Smart post-processing of changes, e.g. pruning of directories
519
        that would become empty, goes here.
520
        """
521
        delta = list(self._delta_entries_by_fileid.values())
522
        if self.prune_empty_dirs and self._dirs_that_might_become_empty:
523
            candidates = osutils.minimum_path_selection(
524
                self._dirs_that_might_become_empty)
525
            for path, file_id in self._empty_after_delta(delta, candidates):
526
                delta.append((path, None, file_id, None))
527
        #print "delta:\n%s\n\n" % "\n".join([str(de) for de in delta])
528
        return delta
529
530
    def _empty_after_delta(self, delta, candidates):
531
        new_inv = self.basis_inventory._get_mutable_inventory()
532
        new_inv.apply_delta(delta)
533
        result = []
534
        for dir in candidates:
535
            file_id = new_inv.path2id(dir)
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
536
            ie = new_inv[file_id]
537
            if len(ie.children) == 0:
538
                result.append((dir, file_id))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
539
                if self.verbose:
540
                    self.note("pruning empty directory %s" % (dir,))
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
541
                # Check parents in case deleting this dir makes *them* empty
542
                while True:
543
                    file_id = ie.parent_id
544
                    if file_id == inventory.ROOT_ID:
545
                        # We've reach the root
546
                        break
547
                    try:
548
                        ie = new_inv[file_id]
549
                    except errors.NoSuchId:
550
                        break
551
                    if len(ie.children) > 1:
552
                        break
553
                    dir = new_inv.id2path(file_id)
554
                    result.append((dir, file_id))
555
                    if self.verbose:
556
                        self.note("pruning empty directory parent %s" % (dir,))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
557
        return result
558
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
559
    def _add_entry(self, entry):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
560
        # We need to combine the data if multiple entries have the same file-id.
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
561
        # For example, a rename followed by a modification looks like:
562
        #
563
        # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
564
        #
565
        # Likewise, a modification followed by a rename looks like:
566
        #
567
        # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
568
        #
569
        # Here's a rename followed by a delete and a modification followed by
570
        # a delete:
571
        #
572
        # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
573
        # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
574
        #
575
        # In summary, we use the original old-path, new new-path and new ie
576
        # when combining entries.
0.85.2 by Ian Clatworthy
improve per-file graph generation
577
        old_path = entry[0]
578
        new_path = entry[1]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
579
        file_id = entry[2]
0.85.2 by Ian Clatworthy
improve per-file graph generation
580
        ie = entry[3]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
581
        existing = self._delta_entries_by_fileid.get(file_id, None)
582
        if existing is not None:
0.85.2 by Ian Clatworthy
improve per-file graph generation
583
            old_path = existing[0]
584
            entry = (old_path, new_path, file_id, ie)
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
585
        self._delta_entries_by_fileid[file_id] = entry
586
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
587
        # Collect parent direcctories that might become empty
588
        if new_path is None:
589
            # delete
590
            parent_dir = osutils.dirname(old_path)
591
            # note: no need to check the root
592
            if parent_dir:
593
                self._dirs_that_might_become_empty.add(parent_dir)
594
        elif old_path is not None and old_path != new_path:
595
            # rename
596
            old_parent_dir = osutils.dirname(old_path)
597
            new_parent_dir = osutils.dirname(new_path)
598
            if old_parent_dir and old_parent_dir != new_parent_dir:
599
                self._dirs_that_might_become_empty.add(old_parent_dir)
600
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
601
        # Calculate the per-file parents, if not already done
602
        if file_id in self.per_file_parents_for_commit:
603
            return
0.85.2 by Ian Clatworthy
improve per-file graph generation
604
        if old_path is None:
605
            # add
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
606
            # If this is a merge, the file was most likely added already.
607
            # The per-file parent(s) must therefore be calculated and
608
            # we can't assume there are none.
609
            per_file_parents, ie.revision = \
610
                self.rev_store.get_parents_and_revision_for_entry(ie)
611
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
612
        elif new_path is None:
613
            # delete
614
            pass
615
        elif old_path != new_path:
616
            # rename
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
617
            per_file_parents, _ = \
618
                self.rev_store.get_parents_and_revision_for_entry(ie)
619
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
620
        else:
621
            # modify
622
            per_file_parents, ie.revision = \
623
                self.rev_store.get_parents_and_revision_for_entry(ie)
624
            self.per_file_parents_for_commit[file_id] = per_file_parents
625
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
626
    def record_new(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
627
        self._add_entry((None, path, ie.file_id, ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
628
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
629
    def record_changed(self, path, ie, parent_id=None):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
630
        self._add_entry((path, path, ie.file_id, ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
631
0.81.9 by Ian Clatworthy
refactor delete_item
632
    def record_delete(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
633
        self._add_entry((path, None, ie.file_id, None))
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
634
        if ie.kind == 'directory':
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
635
            for child_relpath, entry in \
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
636
                self.basis_inventory.iter_entries_by_dir(from_dir=ie):
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
637
                child_path = osutils.pathjoin(path, child_relpath)
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
638
                self._add_entry((child_path, None, entry.file_id, None))
0.81.8 by Ian Clatworthy
refactor rename_item
639
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
640
    def record_rename(self, old_path, new_path, file_id, old_ie):
641
        new_ie = old_ie.copy()
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
642
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
643
            self.basis_inventory)
644
        new_ie.name = new_basename
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
645
        new_ie.parent_id = new_parent_id
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
646
        new_ie.revision = self.revision_id
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
647
        self._add_entry((old_path, new_path, file_id, new_ie))
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
648
649
    def modify_handler(self, filecmd):
650
        if filecmd.dataref is not None:
651
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
652
        else:
653
            data = filecmd.data
654
        self.debug("modifying %s", filecmd.path)
655
        self._modify_item(filecmd.path, filecmd.kind,
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
656
            filecmd.is_executable, data, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
657
658
    def delete_handler(self, filecmd):
659
        self.debug("deleting %s", filecmd.path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
660
        self._delete_item(filecmd.path, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
661
662
    def copy_handler(self, filecmd):
663
        src_path = filecmd.src_path
664
        dest_path = filecmd.dest_path
665
        self.debug("copying %s to %s", src_path, dest_path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
666
        self._copy_item(src_path, dest_path, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
667
668
    def rename_handler(self, filecmd):
669
        old_path = filecmd.old_path
670
        new_path = filecmd.new_path
671
        self.debug("renaming %s to %s", old_path, new_path)
672
        self._rename_item(old_path, new_path, self.basis_inventory)
673
674
    def deleteall_handler(self, filecmd):
675
        self.debug("deleting all files (and also all directories)")
676
        # I'm not 100% sure this will work in the delta case.
677
        # But clearing out the basis inventory so that everything
678
        # is added sounds ok in theory ...
679
        # We grab a copy as the basis is likely to be cached and
680
        # we don't want to destroy the cached version
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
681
        self.basis_inventory = copy_inventory(self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
682
        self._delete_all_items(self.basis_inventory)