/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""CommitHandlers that build and save revisions & their inventories."""
18
19
20
from bzrlib import (
21
    errors,
22
    generate_ids,
23
    inventory,
24
    osutils,
25
    revision,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
26
    serializer,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
27
    )
28
from bzrlib.plugins.fastimport import helpers, processor
29
30
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
31
_serializer_handles_escaping = hasattr(serializer.Serializer,
32
    'squashes_xml_invalid_characters')
33
34
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
35
def copy_inventory(inv):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
36
    # This currently breaks revision-id matching
37
    #if hasattr(inv, "_get_mutable_inventory"):
38
    #    # TODO: Make this a public API on inventory
39
    #    return inv._get_mutable_inventory()
40
41
    # TODO: Shallow copy - deep inventory copying is expensive
42
    return inv.copy()
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
43
44
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
45
class GenericCommitHandler(processor.CommitHandler):
46
    """Base class for Bazaar CommitHandlers."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
47
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
48
    def __init__(self, command, cache_mgr, rev_store, verbose=False,
49
        prune_empty_dirs=True):
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
50
        super(GenericCommitHandler, self).__init__(command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
51
        self.cache_mgr = cache_mgr
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
52
        self.rev_store = rev_store
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
53
        self.verbose = verbose
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
54
        self.branch_ref = command.ref
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
55
        self.prune_empty_dirs = prune_empty_dirs
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
56
        # This tracks path->file-id for things we're creating this commit.
57
        # If the same path is created multiple times, we need to warn the
58
        # user and add it just once.
59
        self._new_file_ids = {}
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
60
61
    def pre_process_files(self):
62
        """Prepare for committing."""
63
        self.revision_id = self.gen_revision_id()
64
        # cache of texts for this commit, indexed by file-id
65
        self.lines_for_commit = {}
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
66
        #if self.rev_store.expects_rich_root():
67
        self.lines_for_commit[inventory.ROOT_ID] = []
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
68
69
        # Track the heads and get the real parent list
70
        parents = self.cache_mgr.track_heads(self.command)
71
72
        # Convert the parent commit-ids to bzr revision-ids
73
        if parents:
74
            self.parents = [self.cache_mgr.revision_ids[p]
75
                for p in parents]
76
        else:
77
            self.parents = []
78
        self.debug("%s id: %s, parents: %s", self.command.id,
79
            self.revision_id, str(self.parents))
80
0.85.2 by Ian Clatworthy
improve per-file graph generation
81
        # Tell the RevisionStore we're starting a new commit
82
        self.revision = self.build_revision()
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
83
        self.parent_invs = [self.get_inventory(p) for p in self.parents]
0.85.2 by Ian Clatworthy
improve per-file graph generation
84
        self.rev_store.start_new_revision(self.revision, self.parents,
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
85
            self.parent_invs)
0.85.2 by Ian Clatworthy
improve per-file graph generation
86
87
        # cache of per-file parents for this commit, indexed by file-id
88
        self.per_file_parents_for_commit = {}
89
        if self.rev_store.expects_rich_root():
0.64.160 by Ian Clatworthy
make per-file parents tuples and fix text loading in chk formats
90
            self.per_file_parents_for_commit[inventory.ROOT_ID] = ()
0.85.2 by Ian Clatworthy
improve per-file graph generation
91
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
92
        # Keep the basis inventory. This needs to be treated as read-only.
93
        if len(self.parents) == 0:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
94
            self.basis_inventory = self._init_inventory()
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
95
        else:
96
            self.basis_inventory = self.get_inventory(self.parents[0])
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
97
        if hasattr(self.basis_inventory, "root_id"):
98
            self.inventory_root_id = self.basis_inventory.root_id
99
        else:
100
            self.inventory_root_id = self.basis_inventory.root.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
101
102
        # directory-path -> inventory-entry for current inventory
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
103
        self.directory_entries = {}
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
104
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
105
    def _init_inventory(self):
106
        return self.rev_store.init_inventory(self.revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
107
108
    def get_inventory(self, revision_id):
109
        """Get the inventory for a revision id."""
110
        try:
111
            inv = self.cache_mgr.inventories[revision_id]
112
        except KeyError:
113
            if self.verbose:
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
114
                self.mutter("get_inventory cache miss for %s", revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
115
            # Not cached so reconstruct from the RevisionStore
116
            inv = self.rev_store.get_inventory(revision_id)
117
            self.cache_mgr.inventories[revision_id] = inv
118
        return inv
119
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
120
    def _get_lines(self, file_id):
121
        """Get the lines for a file-id."""
122
        return self.lines_for_commit[file_id]
123
0.85.2 by Ian Clatworthy
improve per-file graph generation
124
    def _get_per_file_parents(self, file_id):
125
        """Get the lines for a file-id."""
126
        return self.per_file_parents_for_commit[file_id]
127
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
128
    def _get_inventories(self, revision_ids):
129
        """Get the inventories for revision-ids.
130
        
131
        This is a callback used by the RepositoryStore to
132
        speed up inventory reconstruction.
133
        """
134
        present = []
135
        inventories = []
136
        # If an inventory is in the cache, we assume it was
137
        # successfully loaded into the revision store
138
        for revision_id in revision_ids:
139
            try:
140
                inv = self.cache_mgr.inventories[revision_id]
141
                present.append(revision_id)
142
            except KeyError:
143
                if self.verbose:
144
                    self.note("get_inventories cache miss for %s", revision_id)
145
                # Not cached so reconstruct from the revision store
146
                try:
147
                    inv = self.get_inventory(revision_id)
148
                    present.append(revision_id)
149
                except:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
150
                    inv = self._init_inventory()
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
151
                self.cache_mgr.inventories[revision_id] = inv
152
            inventories.append(inv)
153
        return present, inventories
154
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
155
    def bzr_file_id_and_new(self, path):
156
        """Get a Bazaar file identifier and new flag for a path.
157
        
158
        :return: file_id, is_new where
159
          is_new = True if the file_id is newly created
160
        """
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
161
        # Try the basis inventory
162
        id = self.basis_inventory.path2id(path)
163
        if id is not None:
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
164
            return id, False
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
165
        
166
        # Try the other inventories
167
        if len(self.parents) > 1:
168
            for inv in self.parent_invs[1:]:
169
                id = self.basis_inventory.path2id(path)
170
                if id is not None:
171
                    return id, False
172
173
        # Doesn't exist yet so create it
174
        id = generate_ids.gen_file_id(path)
175
        self.debug("Generated new file id %s for '%s' in revision-id '%s'",
176
            id, path, self.revision_id)
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
177
        self._new_file_ids[path] = id
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
178
        return id, True
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
179
180
    def bzr_file_id(self, path):
181
        """Get a Bazaar file identifier for a path."""
182
        return self.bzr_file_id_and_new(path)[0]
183
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
184
    def _format_name_email(self, name, email):
185
        """Format name & email as a string."""
186
        if email:
187
            return "%s <%s>" % (name, email)
188
        else:
189
            return name
190
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
191
    def gen_revision_id(self):
192
        """Generate a revision id.
193
194
        Subclasses may override this to produce deterministic ids say.
195
        """
196
        committer = self.command.committer
197
        # Perhaps 'who' being the person running the import is ok? If so,
198
        # it might be a bit quicker and give slightly better compression?
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
199
        who = self._format_name_email(committer[0], committer[1])
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
200
        timestamp = committer[2]
201
        return generate_ids.gen_revision_id(who, timestamp)
202
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
203
    def build_revision(self):
204
        rev_props = {}
205
        committer = self.command.committer
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
206
        who = self._format_name_email(committer[0], committer[1])
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
207
        author = self.command.author
208
        if author is not None:
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
209
            author_id = self._format_name_email(author[0], author[1])
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
210
            if author_id != who:
211
                rev_props['author'] = author_id
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
212
        message = self.command.message
213
        if not _serializer_handles_escaping:
214
            # We need to assume the bad ol' days
215
            message = helpers.escape_commit_message(message)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
216
        return revision.Revision(
217
           timestamp=committer[2],
218
           timezone=committer[3],
219
           committer=who,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
220
           message=message,
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
221
           revision_id=self.revision_id,
222
           properties=rev_props,
223
           parent_ids=self.parents)
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
224
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
225
    def _modify_item(self, path, kind, is_executable, data, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
226
        """Add to or change an item in the inventory."""
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
227
        # If we've already added this, warn the user that we're ignoring it.
228
        # In the future, it might be nice to double check that the new data
229
        # is the same as the old but, frankly, exporters should be fixed
230
        # not to produce bad data streams in the first place ...
231
        existing = self._new_file_ids.get(path)
232
        if existing:
233
            self.warning("%s already added in this commit - ignoring" % (path,))
234
            return
235
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
236
        # Create the new InventoryEntry
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
237
        basename, parent_id = self._ensure_directory(path, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
238
        file_id = self.bzr_file_id(path)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
239
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
240
        ie.revision = self.revision_id
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
241
        if kind == 'file':
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
242
            ie.executable = is_executable
243
            lines = osutils.split_lines(data)
244
            ie.text_sha1 = osutils.sha_strings(lines)
245
            ie.text_size = sum(map(len, lines))
246
            self.lines_for_commit[file_id] = lines
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
247
        elif kind == 'symlink':
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
248
            ie.symlink_target = data.encode('utf8')
249
            # There are no lines stored for a symlink so
250
            # make sure the cache used by get_lines knows that
251
            self.lines_for_commit[file_id] = []
252
        else:
253
            raise errors.BzrError("Cannot import items of kind '%s' yet" %
254
                (kind,))
255
        # Record it
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
256
        if file_id in inv:
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
257
            old_ie = inv[file_id]
258
            if old_ie.kind == 'directory':
259
                self.record_delete(path, old_ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
260
            self.record_changed(path, ie, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
261
        else:
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
262
            try:
263
                self.record_new(path, ie)
264
            except:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
265
                print "failed to add path '%s' with entry '%s' in command %s" \
266
                    % (path, ie, self.command.id)
267
                print "parent's children are:\n%r\n" % (ie.parent_id.children,)
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
268
                raise
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
269
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
270
    def _ensure_directory(self, path, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
271
        """Ensure that the containing directory exists for 'path'"""
272
        dirname, basename = osutils.split(path)
273
        if dirname == '':
274
            # the root node doesn't get updated
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
275
            return basename, self.inventory_root_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
276
        try:
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
277
            ie = self._get_directory_entry(inv, dirname)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
278
        except KeyError:
279
            # We will create this entry, since it doesn't exist
280
            pass
281
        else:
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
282
            return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
283
284
        # No directory existed, we will just create one, first, make sure
285
        # the parent exists
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
286
        dir_basename, parent_id = self._ensure_directory(dirname, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
287
        dir_file_id = self.bzr_file_id(dirname)
288
        ie = inventory.entry_factory['directory'](dir_file_id,
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
289
            dir_basename, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
290
        ie.revision = self.revision_id
291
        self.directory_entries[dirname] = ie
292
        # There are no lines stored for a directory so
293
        # make sure the cache used by get_lines knows that
294
        self.lines_for_commit[dir_file_id] = []
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
295
296
        # It's possible that a file or symlink with that file-id
297
        # already exists. If it does, we need to delete it.
298
        if dir_file_id in inv:
299
            self.record_delete(dirname, ie)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
300
        self.record_new(dirname, ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
301
        return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
302
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
303
    def _get_directory_entry(self, inv, dirname):
304
        """Get the inventory entry for a directory.
305
        
306
        Raises KeyError if dirname is not a directory in inv.
307
        """
308
        result = self.directory_entries.get(dirname)
309
        if result is None:
0.64.146 by Ian Clatworthy
fix first file is in a subdirectory bug for chk formats
310
            try:
311
                file_id = inv.path2id(dirname)
312
            except errors.NoSuchId:
313
                # In a CHKInventory, this is raised if there's no root yet
314
                raise KeyError
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
315
            if file_id is None:
316
                raise KeyError
317
            result = inv[file_id]
318
            # dirname must be a directory for us to return it
319
            if result.kind == 'directory':
320
                self.directory_entries[dirname] = result
321
            else:
322
                raise KeyError
323
        return result
324
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
325
    def _delete_item(self, path, inv):
326
        file_id = inv.path2id(path)
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
327
        if file_id is None:
328
            self.mutter("ignoring delete of %s as not in inventory", path)
329
            return
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
330
        try:
331
            ie = inv[file_id]
332
        except errors.NoSuchId:
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
333
            self.mutter("ignoring delete of %s as not in inventory", path)
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
334
        else:
335
            self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
336
337
    def _copy_item(self, src_path, dest_path, inv):
338
        if not self.parents:
339
            self.warning("ignoring copy of %s to %s - no parent revisions",
340
                src_path, dest_path)
341
            return
342
        file_id = inv.path2id(src_path)
343
        if file_id is None:
344
            self.warning("ignoring copy of %s to %s - source does not exist",
345
                src_path, dest_path)
346
            return
347
        ie = inv[file_id]
348
        kind = ie.kind
349
        if kind == 'file':
350
            content = self.rev_store.get_file_text(self.parents[0], file_id)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
351
            self._modify_item(dest_path, kind, ie.executable, content, inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
352
        elif kind == 'symlink':
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
353
            self._modify_item(dest_path, kind, False, ie.symlink_target, inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
354
        else:
355
            self.warning("ignoring copy of %s %s - feature not yet supported",
356
                kind, path)
357
358
    def _rename_item(self, old_path, new_path, inv):
0.81.8 by Ian Clatworthy
refactor rename_item
359
        file_id = inv.path2id(old_path)
0.64.167 by Ian Clatworthy
incremental packing for chk formats
360
        if file_id is None:
361
            self.warning(
362
                "ignoring rename of %s to %s - old path does not exist" %
363
                (old_path, new_path))
364
            return
0.81.8 by Ian Clatworthy
refactor rename_item
365
        ie = inv[file_id]
366
        rev_id = ie.revision
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
367
        new_file_id = inv.path2id(new_path)
368
        if new_file_id is not None:
0.81.9 by Ian Clatworthy
refactor delete_item
369
            self.record_delete(new_path, inv[new_file_id])
0.81.8 by Ian Clatworthy
refactor rename_item
370
        self.record_rename(old_path, new_path, file_id, ie)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
371
0.81.8 by Ian Clatworthy
refactor rename_item
372
        # The revision-id for this entry will be/has been updated and
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
373
        # that means the loader then needs to know what the "new" text is.
374
        # We therefore must go back to the revision store to get it.
0.81.8 by Ian Clatworthy
refactor rename_item
375
        lines = self.rev_store.get_file_lines(rev_id, file_id)
376
        self.lines_for_commit[file_id] = lines
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
377
378
    def _delete_all_items(self, inv):
379
        for name, root_item in inv.root.children.iteritems():
380
            inv.remove_recursive_id(root_item.file_id)
381
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
382
    def _warn_unless_in_merges(self, fileid, path):
383
        if len(self.parents) <= 1:
384
            return
385
        for parent in self.parents[1:]:
386
            if fileid in self.get_inventory(parent):
387
                return
388
        self.warning("ignoring delete of %s as not in parent inventories", path)
389
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
390
391
class InventoryCommitHandler(GenericCommitHandler):
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
392
    """A CommitHandler that builds and saves Inventory objects."""
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
393
394
    def pre_process_files(self):
395
        super(InventoryCommitHandler, self).pre_process_files()
396
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
397
        # Seed the inventory from the previous one. Note that
398
        # the parent class version of pre_process_files() has
399
        # already set the right basis_inventory for this branch
400
        # but we need to copy it in order to mutate it safely
401
        # without corrupting the cached inventory value.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
402
        if len(self.parents) == 0:
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
403
            self.inventory = self.basis_inventory
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
404
        else:
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
405
            self.inventory = copy_inventory(self.basis_inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
406
        self.inventory_root = self.inventory.root
407
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
408
        # directory-path -> inventory-entry for current inventory
409
        self.directory_entries = dict(self.inventory.directories())
410
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
411
        # Initialise the inventory revision info as required
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
412
        if self.rev_store.expects_rich_root():
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
413
            self.inventory.revision_id = self.revision_id
414
        else:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
415
            # In this revision store, root entries have no knit or weave.
416
            # When serializing out to disk and back in, root.revision is
417
            # always the new revision_id.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
418
            self.inventory.root.revision = self.revision_id
419
420
    def post_process_files(self):
421
        """Save the revision."""
422
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.85.2 by Ian Clatworthy
improve per-file graph generation
423
        self.rev_store.load(self.revision, self.inventory, None,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
424
            lambda file_id: self._get_lines(file_id),
0.85.2 by Ian Clatworthy
improve per-file graph generation
425
            lambda file_id: self._get_per_file_parents(file_id),
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
426
            lambda revision_ids: self._get_inventories(revision_ids))
427
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
428
    def record_new(self, path, ie):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
429
        try:
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
430
            # If this is a merge, the file was most likely added already.
431
            # The per-file parent(s) must therefore be calculated and
432
            # we can't assume there are none.
433
            per_file_parents, ie.revision = \
434
                self.rev_store.get_parents_and_revision_for_entry(ie)
435
            self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
436
            self.inventory.add(ie)
437
        except errors.DuplicateFileId:
438
            # Directory already exists as a file or symlink
439
            del self.inventory[ie.file_id]
440
            # Try again
441
            self.inventory.add(ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
442
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
443
    def record_changed(self, path, ie, parent_id):
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
444
        # HACK: no API for this (del+add does more than it needs to)
0.85.2 by Ian Clatworthy
improve per-file graph generation
445
        per_file_parents, ie.revision = \
446
            self.rev_store.get_parents_and_revision_for_entry(ie)
447
        self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
448
        self.inventory._byid[ie.file_id] = ie
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
449
        parent_ie = self.inventory._byid[parent_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
450
        parent_ie.children[ie.name] = ie
451
0.81.9 by Ian Clatworthy
refactor delete_item
452
    def record_delete(self, path, ie):
453
        self.inventory.remove_recursive_id(ie.file_id)
0.81.8 by Ian Clatworthy
refactor rename_item
454
455
    def record_rename(self, old_path, new_path, file_id, ie):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
456
        # For a rename, the revision-id is always the new one so
457
        # no need to change/set it here
458
        ie.revision = self.revision_id
459
        per_file_parents, _ = \
460
            self.rev_store.get_parents_and_revision_for_entry(ie)
461
        self.per_file_parents_for_commit[file_id] = per_file_parents
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
462
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
463
            self.inventory)
0.81.8 by Ian Clatworthy
refactor rename_item
464
        self.inventory.rename(file_id, new_parent_id, new_basename)
465
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
466
    def modify_handler(self, filecmd):
467
        if filecmd.dataref is not None:
468
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
469
        else:
470
            data = filecmd.data
471
        self.debug("modifying %s", filecmd.path)
472
        self._modify_item(filecmd.path, filecmd.kind,
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
473
            filecmd.is_executable, data, self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
474
475
    def delete_handler(self, filecmd):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
476
        self.debug("deleting %s", filecmd.path)
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
477
        self._delete_item(filecmd.path, self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
478
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
479
    def copy_handler(self, filecmd):
480
        src_path = filecmd.src_path
481
        dest_path = filecmd.dest_path
482
        self.debug("copying %s to %s", src_path, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
483
        self._copy_item(src_path, dest_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
484
485
    def rename_handler(self, filecmd):
486
        old_path = filecmd.old_path
487
        new_path = filecmd.new_path
488
        self.debug("renaming %s to %s", old_path, new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
489
        self._rename_item(old_path, new_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
490
491
    def deleteall_handler(self, filecmd):
492
        self.debug("deleting all files (and also all directories)")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
493
        self._delete_all_items(self.inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
494
495
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
496
class InventoryDeltaCommitHandler(GenericCommitHandler):
497
    """A CommitHandler that builds Inventories by applying a delta."""
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
498
499
    def pre_process_files(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
500
        super(InventoryDeltaCommitHandler, self).pre_process_files()
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
501
        self._dirs_that_might_become_empty = set()
502
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
503
        # A given file-id can only appear once so we accumulate
504
        # the entries in a dict then build the actual delta at the end
505
        self._delta_entries_by_fileid = {}
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
506
        if len(self.parents) == 0 or not self.rev_store.expects_rich_root():
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
507
            if self.parents:
508
                old_path = ''
509
            else:
510
                old_path = None
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
511
            # Need to explicitly add the root entry for the first revision
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
512
            # and for non rich-root inventories
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
513
            root_id = inventory.ROOT_ID
514
            root_ie = inventory.InventoryDirectory(root_id, u'', None)
515
            root_ie.revision = self.revision_id
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
516
            self._add_entry((old_path, '', root_id, root_ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
517
518
    def post_process_files(self):
519
        """Save the revision."""
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
520
        delta = self._get_final_delta()
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
521
        inv = self.rev_store.load_using_delta(self.revision,
522
            self.basis_inventory, delta, None,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
523
            lambda file_id: self._get_lines(file_id),
0.85.2 by Ian Clatworthy
improve per-file graph generation
524
            lambda file_id: self._get_per_file_parents(file_id),
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
525
            lambda revision_ids: self._get_inventories(revision_ids))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
526
        self.cache_mgr.inventories[self.revision_id] = inv
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
527
        #print "committed %s" % self.revision_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
528
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
529
    def _get_final_delta(self):
530
        """Generate the final delta.
531
532
        Smart post-processing of changes, e.g. pruning of directories
533
        that would become empty, goes here.
534
        """
535
        delta = list(self._delta_entries_by_fileid.values())
536
        if self.prune_empty_dirs and self._dirs_that_might_become_empty:
537
            candidates = osutils.minimum_path_selection(
538
                self._dirs_that_might_become_empty)
539
            for path, file_id in self._empty_after_delta(delta, candidates):
540
                delta.append((path, None, file_id, None))
541
        #print "delta:\n%s\n\n" % "\n".join([str(de) for de in delta])
542
        return delta
543
544
    def _empty_after_delta(self, delta, candidates):
545
        new_inv = self.basis_inventory._get_mutable_inventory()
546
        new_inv.apply_delta(delta)
547
        result = []
548
        for dir in candidates:
549
            file_id = new_inv.path2id(dir)
0.64.219 by Ian Clatworthy
More robust implicit delete logic when file-id not found
550
            if file_id is None:
551
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
552
            ie = new_inv[file_id]
553
            if len(ie.children) == 0:
554
                result.append((dir, file_id))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
555
                if self.verbose:
556
                    self.note("pruning empty directory %s" % (dir,))
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
557
                # Check parents in case deleting this dir makes *them* empty
558
                while True:
559
                    file_id = ie.parent_id
560
                    if file_id == inventory.ROOT_ID:
561
                        # We've reach the root
562
                        break
563
                    try:
564
                        ie = new_inv[file_id]
565
                    except errors.NoSuchId:
566
                        break
567
                    if len(ie.children) > 1:
568
                        break
569
                    dir = new_inv.id2path(file_id)
570
                    result.append((dir, file_id))
571
                    if self.verbose:
572
                        self.note("pruning empty directory parent %s" % (dir,))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
573
        return result
574
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
575
    def _add_entry(self, entry):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
576
        # We need to combine the data if multiple entries have the same file-id.
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
577
        # For example, a rename followed by a modification looks like:
578
        #
579
        # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
580
        #
581
        # Likewise, a modification followed by a rename looks like:
582
        #
583
        # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
584
        #
585
        # Here's a rename followed by a delete and a modification followed by
586
        # a delete:
587
        #
588
        # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
589
        # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
590
        #
591
        # In summary, we use the original old-path, new new-path and new ie
592
        # when combining entries.
0.85.2 by Ian Clatworthy
improve per-file graph generation
593
        old_path = entry[0]
594
        new_path = entry[1]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
595
        file_id = entry[2]
0.85.2 by Ian Clatworthy
improve per-file graph generation
596
        ie = entry[3]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
597
        existing = self._delta_entries_by_fileid.get(file_id, None)
598
        if existing is not None:
0.85.2 by Ian Clatworthy
improve per-file graph generation
599
            old_path = existing[0]
600
            entry = (old_path, new_path, file_id, ie)
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
601
        self._delta_entries_by_fileid[file_id] = entry
602
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
603
        # Collect parent direcctories that might become empty
604
        if new_path is None:
605
            # delete
606
            parent_dir = osutils.dirname(old_path)
607
            # note: no need to check the root
608
            if parent_dir:
609
                self._dirs_that_might_become_empty.add(parent_dir)
610
        elif old_path is not None and old_path != new_path:
611
            # rename
612
            old_parent_dir = osutils.dirname(old_path)
613
            new_parent_dir = osutils.dirname(new_path)
614
            if old_parent_dir and old_parent_dir != new_parent_dir:
615
                self._dirs_that_might_become_empty.add(old_parent_dir)
616
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
617
        # Calculate the per-file parents, if not already done
618
        if file_id in self.per_file_parents_for_commit:
619
            return
0.85.2 by Ian Clatworthy
improve per-file graph generation
620
        if old_path is None:
621
            # add
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
622
            # If this is a merge, the file was most likely added already.
623
            # The per-file parent(s) must therefore be calculated and
624
            # we can't assume there are none.
625
            per_file_parents, ie.revision = \
626
                self.rev_store.get_parents_and_revision_for_entry(ie)
627
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
628
        elif new_path is None:
629
            # delete
630
            pass
631
        elif old_path != new_path:
632
            # rename
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
633
            per_file_parents, _ = \
634
                self.rev_store.get_parents_and_revision_for_entry(ie)
635
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
636
        else:
637
            # modify
638
            per_file_parents, ie.revision = \
639
                self.rev_store.get_parents_and_revision_for_entry(ie)
640
            self.per_file_parents_for_commit[file_id] = per_file_parents
641
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
642
    def record_new(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
643
        self._add_entry((None, path, ie.file_id, ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
644
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
645
    def record_changed(self, path, ie, parent_id=None):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
646
        self._add_entry((path, path, ie.file_id, ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
647
0.81.9 by Ian Clatworthy
refactor delete_item
648
    def record_delete(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
649
        self._add_entry((path, None, ie.file_id, None))
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
650
        if ie.kind == 'directory':
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
651
            for child_relpath, entry in \
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
652
                self.basis_inventory.iter_entries_by_dir(from_dir=ie):
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
653
                child_path = osutils.pathjoin(path, child_relpath)
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
654
                self._add_entry((child_path, None, entry.file_id, None))
0.81.8 by Ian Clatworthy
refactor rename_item
655
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
656
    def record_rename(self, old_path, new_path, file_id, old_ie):
657
        new_ie = old_ie.copy()
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
658
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
659
            self.basis_inventory)
660
        new_ie.name = new_basename
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
661
        new_ie.parent_id = new_parent_id
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
662
        new_ie.revision = self.revision_id
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
663
        self._add_entry((old_path, new_path, file_id, new_ie))
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
664
665
    def modify_handler(self, filecmd):
666
        if filecmd.dataref is not None:
667
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
668
        else:
669
            data = filecmd.data
670
        self.debug("modifying %s", filecmd.path)
671
        self._modify_item(filecmd.path, filecmd.kind,
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
672
            filecmd.is_executable, data, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
673
674
    def delete_handler(self, filecmd):
675
        self.debug("deleting %s", filecmd.path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
676
        self._delete_item(filecmd.path, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
677
678
    def copy_handler(self, filecmd):
679
        src_path = filecmd.src_path
680
        dest_path = filecmd.dest_path
681
        self.debug("copying %s to %s", src_path, dest_path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
682
        self._copy_item(src_path, dest_path, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
683
684
    def rename_handler(self, filecmd):
685
        old_path = filecmd.old_path
686
        new_path = filecmd.new_path
687
        self.debug("renaming %s to %s", old_path, new_path)
688
        self._rename_item(old_path, new_path, self.basis_inventory)
689
690
    def deleteall_handler(self, filecmd):
691
        self.debug("deleting all files (and also all directories)")
692
        # I'm not 100% sure this will work in the delta case.
693
        # But clearing out the basis inventory so that everything
694
        # is added sounds ok in theory ...
695
        # We grab a copy as the basis is likely to be cached and
696
        # we don't want to destroy the cached version
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
697
        self.basis_inventory = copy_inventory(self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
698
        self._delete_all_items(self.basis_inventory)