/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
1
# Copyright (C) 2008 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
0.64.334 by Jelmer Vernooij
Remove old FSF address. Thanks Dan Callaghan.
14
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
15
16
"""CommitHandlers that build and save revisions & their inventories."""
17
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
18
from __future__ import absolute_import
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
19
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
20
from ... import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
21
    debug,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
22
    errors,
23
    generate_ids,
24
    inventory,
25
    osutils,
26
    revision,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
27
    serializer,
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
28
    )
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
29
from ...trace import (
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
30
    mutter,
31
    note,
32
    warning,
33
    )
0.123.2 by Jelmer Vernooij
Split out fastimport, import it from the system.
34
from fastimport import (
0.123.1 by Jelmer Vernooij
Move pure-fastimport code into its own directory, in preparation of splitting it into a separate package.
35
    helpers,
36
    processor,
37
    )
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
38
6628.1.2 by Jelmer Vernooij
Fix imports, move exporter.py, drop explorer metadata.
39
from .helpers import (
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
40
    mode_to_kind,
41
    )
42
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
43
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
44
_serializer_handles_escaping = hasattr(serializer.Serializer,
45
    'squashes_xml_invalid_characters')
46
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
47
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
48
def copy_inventory(inv):
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
49
    entries = inv.iter_entries_by_dir()
0.64.319 by Jelmer Vernooij
fix typo.
50
    inv = inventory.Inventory(None, inv.revision_id)
0.64.318 by Jelmer Vernooij
Avoid Inventory.copy, which has disappeared in newer versions of Bazaar.
51
    for path, inv_entry in entries:
52
        inv.add(inv_entry.copy())
53
    return inv
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
54
55
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
56
class GenericCommitHandler(processor.CommitHandler):
57
    """Base class for Bazaar CommitHandlers."""
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
58
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
59
    def __init__(self, command, cache_mgr, rev_store, verbose=False,
60
        prune_empty_dirs=True):
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
61
        super(GenericCommitHandler, self).__init__(command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
62
        self.cache_mgr = cache_mgr
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
63
        self.rev_store = rev_store
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
64
        self.verbose = verbose
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
65
        self.branch_ref = command.ref
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
66
        self.prune_empty_dirs = prune_empty_dirs
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
67
        # This tracks path->file-id for things we're creating this commit.
68
        # If the same path is created multiple times, we need to warn the
69
        # user and add it just once.
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
70
        # If a path is added then renamed or copied, we need to handle that.
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
71
        self._new_file_ids = {}
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
72
        # This tracks path->file-id for things we're modifying this commit.
73
        # If a path is modified then renamed or copied, we need the make
74
        # sure we grab the new content.
75
        self._modified_file_ids = {}
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
76
        # This tracks the paths for things we're deleting this commit.
77
        # If the same path is added or the destination of a rename say,
78
        # then a fresh file-id is required.
79
        self._paths_deleted_this_commit = set()
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
80
0.123.9 by Jelmer Vernooij
Provide stubs for logging functions no longer provided by python-fastimport.
81
    def mutter(self, msg, *args):
82
        """Output a mutter but add context."""
83
        msg = "%s (%s)" % (msg, self.command.id)
84
        mutter(msg, *args)
85
86
    def debug(self, msg, *args):
87
        """Output a mutter if the appropriate -D option was given."""
88
        if "fast-import" in debug.debug_flags:
89
            msg = "%s (%s)" % (msg, self.command.id)
90
            mutter(msg, *args)
91
92
    def note(self, msg, *args):
93
        """Output a note but add context."""
94
        msg = "%s (%s)" % (msg, self.command.id)
95
        note(msg, *args)
96
97
    def warning(self, msg, *args):
98
        """Output a warning but add context."""
99
        msg = "%s (%s)" % (msg, self.command.id)
100
        warning(msg, *args)
101
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
102
    def pre_process_files(self):
103
        """Prepare for committing."""
104
        self.revision_id = self.gen_revision_id()
105
        # cache of texts for this commit, indexed by file-id
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
106
        self.data_for_commit = {}
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
107
        #if self.rev_store.expects_rich_root():
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
108
        self.data_for_commit[inventory.ROOT_ID] = []
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
109
110
        # Track the heads and get the real parent list
0.123.6 by Jelmer Vernooij
Split out reftracker.
111
        parents = self.cache_mgr.reftracker.track_heads(self.command)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
112
113
        # Convert the parent commit-ids to bzr revision-ids
114
        if parents:
0.129.2 by Jelmer Vernooij
Use lookup functions for committish.
115
            self.parents = [self.cache_mgr.lookup_committish(p)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
116
                for p in parents]
117
        else:
118
            self.parents = []
119
        self.debug("%s id: %s, parents: %s", self.command.id,
120
            self.revision_id, str(self.parents))
121
0.85.2 by Ian Clatworthy
improve per-file graph generation
122
        # Tell the RevisionStore we're starting a new commit
123
        self.revision = self.build_revision()
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
124
        self.parent_invs = [self.get_inventory(p) for p in self.parents]
0.85.2 by Ian Clatworthy
improve per-file graph generation
125
        self.rev_store.start_new_revision(self.revision, self.parents,
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
126
            self.parent_invs)
0.85.2 by Ian Clatworthy
improve per-file graph generation
127
128
        # cache of per-file parents for this commit, indexed by file-id
129
        self.per_file_parents_for_commit = {}
130
        if self.rev_store.expects_rich_root():
0.64.160 by Ian Clatworthy
make per-file parents tuples and fix text loading in chk formats
131
            self.per_file_parents_for_commit[inventory.ROOT_ID] = ()
0.85.2 by Ian Clatworthy
improve per-file graph generation
132
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
133
        # Keep the basis inventory. This needs to be treated as read-only.
134
        if len(self.parents) == 0:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
135
            self.basis_inventory = self._init_inventory()
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
136
        else:
137
            self.basis_inventory = self.get_inventory(self.parents[0])
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
138
        if hasattr(self.basis_inventory, "root_id"):
139
            self.inventory_root_id = self.basis_inventory.root_id
140
        else:
141
            self.inventory_root_id = self.basis_inventory.root.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
142
143
        # directory-path -> inventory-entry for current inventory
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
144
        self.directory_entries = {}
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
145
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
146
    def _init_inventory(self):
147
        return self.rev_store.init_inventory(self.revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
148
149
    def get_inventory(self, revision_id):
150
        """Get the inventory for a revision id."""
151
        try:
152
            inv = self.cache_mgr.inventories[revision_id]
153
        except KeyError:
154
            if self.verbose:
0.64.148 by Ian Clatworthy
handle delete of unknown file in chk formats & reduce noise
155
                self.mutter("get_inventory cache miss for %s", revision_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
156
            # Not cached so reconstruct from the RevisionStore
157
            inv = self.rev_store.get_inventory(revision_id)
158
            self.cache_mgr.inventories[revision_id] = inv
159
        return inv
160
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
161
    def _get_data(self, file_id):
162
        """Get the data bytes for a file-id."""
163
        return self.data_for_commit[file_id]
164
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
165
    def _get_lines(self, file_id):
166
        """Get the lines for a file-id."""
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
167
        return osutils.split_lines(self._get_data(file_id))
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
168
0.85.2 by Ian Clatworthy
improve per-file graph generation
169
    def _get_per_file_parents(self, file_id):
170
        """Get the lines for a file-id."""
171
        return self.per_file_parents_for_commit[file_id]
172
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
173
    def _get_inventories(self, revision_ids):
174
        """Get the inventories for revision-ids.
175
        
176
        This is a callback used by the RepositoryStore to
177
        speed up inventory reconstruction.
178
        """
179
        present = []
180
        inventories = []
181
        # If an inventory is in the cache, we assume it was
182
        # successfully loaded into the revision store
183
        for revision_id in revision_ids:
184
            try:
185
                inv = self.cache_mgr.inventories[revision_id]
186
                present.append(revision_id)
187
            except KeyError:
188
                if self.verbose:
189
                    self.note("get_inventories cache miss for %s", revision_id)
190
                # Not cached so reconstruct from the revision store
191
                try:
192
                    inv = self.get_inventory(revision_id)
193
                    present.append(revision_id)
194
                except:
0.84.4 by Ian Clatworthy
improved-but-not-yet-working CHKInventory support
195
                    inv = self._init_inventory()
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
196
                self.cache_mgr.inventories[revision_id] = inv
197
            inventories.append(inv)
198
        return present, inventories
199
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
200
    def bzr_file_id_and_new(self, path):
201
        """Get a Bazaar file identifier and new flag for a path.
202
        
203
        :return: file_id, is_new where
204
          is_new = True if the file_id is newly created
205
        """
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
206
        if path not in self._paths_deleted_this_commit:
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
207
            # Try file-ids renamed in this commit
208
            id = self._modified_file_ids.get(path)
209
            if id is not None:
210
                return id, False
211
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
212
            # Try the basis inventory
213
            id = self.basis_inventory.path2id(path)
214
            if id is not None:
215
                return id, False
216
            
217
            # Try the other inventories
218
            if len(self.parents) > 1:
219
                for inv in self.parent_invs[1:]:
220
                    id = self.basis_inventory.path2id(path)
221
                    if id is not None:
222
                        return id, False
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
223
224
        # Doesn't exist yet so create it
0.64.247 by Ian Clatworthy
base file-ids on the basename, not path, as jam suggested. This improves the samba import from 565M to 353M.
225
        dirname, basename = osutils.split(path)
226
        id = generate_ids.gen_file_id(basename)
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
227
        self.debug("Generated new file id %s for '%s' in revision-id '%s'",
228
            id, path, self.revision_id)
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
229
        self._new_file_ids[path] = id
0.99.1 by Ian Clatworthy
lookup file-ids in inventories instead of a cache
230
        return id, True
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
231
232
    def bzr_file_id(self, path):
233
        """Get a Bazaar file identifier for a path."""
234
        return self.bzr_file_id_and_new(path)[0]
235
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
236
    def _utf8_decode(self, field, value):
237
        try:
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
238
            return value.decode('utf-8')
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
239
        except UnicodeDecodeError:
240
            # The spec says fields are *typically* utf8 encoded
241
            # but that isn't enforced by git-fast-export (at least)
242
            self.warning("%s not in utf8 - replacing unknown "
243
                "characters" % (field,))
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
244
            return value.decode('utf-8', 'replace')
245
246
    def _decode_path(self, path):
247
        try:
248
            return path.decode('utf-8')
249
        except UnicodeDecodeError:
250
            # The spec says fields are *typically* utf8 encoded
251
            # but that isn't enforced by git-fast-export (at least)
252
            self.warning("path %r not in utf8 - replacing unknown "
253
                "characters" % (path,))
254
            return path.decode('utf-8', 'replace')
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
255
256
    def _format_name_email(self, section, name, email):
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
257
        """Format name & email as a string."""
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
258
        name = self._utf8_decode("%s name" % section, name)
259
        email = self._utf8_decode("%s email" % section, email)
260
0.64.177 by Ian Clatworthy
fix round-tripping of committer & author when name is an email
261
        if email:
262
            return "%s <%s>" % (name, email)
263
        else:
264
            return name
265
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
266
    def gen_revision_id(self):
267
        """Generate a revision id.
268
269
        Subclasses may override this to produce deterministic ids say.
270
        """
271
        committer = self.command.committer
272
        # Perhaps 'who' being the person running the import is ok? If so,
273
        # it might be a bit quicker and give slightly better compression?
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
274
        who = self._format_name_email("committer", committer[0], committer[1])
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
275
        timestamp = committer[2]
276
        return generate_ids.gen_revision_id(who, timestamp)
277
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
278
    def build_revision(self):
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
279
        rev_props = self._legal_revision_properties(self.command.properties)
0.112.5 by Max Bowsher
Default branch-nick to mapped git ref name.
280
        if 'branch-nick' not in rev_props:
281
            rev_props['branch-nick'] = self.cache_mgr.branch_mapper.git_to_bzr(
282
                    self.branch_ref)
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
283
        self._save_author_info(rev_props)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
284
        committer = self.command.committer
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
285
        who = self._format_name_email("committer", committer[0], committer[1])
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
286
        try:
287
            message = self.command.message.decode("utf-8")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
288
0.64.298 by Jelmer Vernooij
Handle unicode decoding of commit messages in bzr-fastimport, python-fastimport no longer takes care of this.
289
        except UnicodeDecodeError:
290
            self.warning(
291
                "commit message not in utf8 - replacing unknown characters")
0.64.303 by Jelmer Vernooij
Cope with non-utf8 characters in commit messages.
292
            message = self.command.message.decode('utf-8', 'replace')
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
293
        if not _serializer_handles_escaping:
294
            # We need to assume the bad ol' days
295
            message = helpers.escape_commit_message(message)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
296
        return revision.Revision(
297
           timestamp=committer[2],
298
           timezone=committer[3],
299
           committer=who,
0.64.192 by Ian Clatworthy
delegate commit message escaping to the serializer if it's a modern one
300
           message=message,
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
301
           revision_id=self.revision_id,
302
           properties=rev_props,
303
           parent_ids=self.parents)
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
304
0.64.235 by Ian Clatworthy
Sanitize None revision properties to empty string
305
    def _legal_revision_properties(self, props):
306
        """Clean-up any revision properties we can't handle."""
307
        # For now, we just check for None because that's not allowed in 2.0rc1
308
        result = {}
309
        if props is not None:
310
            for name, value in props.items():
311
                if value is None:
312
                    self.warning(
313
                        "converting None to empty string for property %s"
314
                        % (name,))
315
                    result[name] = ''
316
                else:
317
                    result[name] = value
318
        return result
319
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
320
    def _save_author_info(self, rev_props):
321
        author = self.command.author
322
        if author is None:
323
            return
324
        if self.command.more_authors:
325
            authors = [author] + self.command.more_authors
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
326
            author_ids = [self._format_name_email("author", a[0], a[1]) for a in authors]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
327
        elif author != self.command.committer:
0.64.299 by Jelmer Vernooij
utf8 decode/encode paths and committer/author email/name, as python-fastimport no longer does so.
328
            author_ids = [self._format_name_email("author", author[0], author[1])]
0.102.10 by Ian Clatworthy
Store multiple authors and revision properties when defined
329
        else:
330
            return
331
        # If we reach here, there are authors worth storing
332
        rev_props['authors'] = "\n".join(author_ids)
333
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
334
    def _modify_item(self, path, kind, is_executable, data, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
335
        """Add to or change an item in the inventory."""
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
336
        # If we've already added this, warn the user that we're ignoring it.
337
        # In the future, it might be nice to double check that the new data
338
        # is the same as the old but, frankly, exporters should be fixed
339
        # not to produce bad data streams in the first place ...
340
        existing = self._new_file_ids.get(path)
341
        if existing:
0.102.18 by Ian Clatworthy
Tweak some diagnostic messages
342
            # We don't warn about directories because it's fine for them
343
            # to be created already by a previous rename
344
            if kind != 'directory':
345
                self.warning("%s already added in this commit - ignoring" %
346
                    (path,))
0.99.5 by Ian Clatworthy
handle adding the same file twice in the one commit
347
            return
348
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
349
        # Create the new InventoryEntry
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
350
        basename, parent_id = self._ensure_directory(path, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
351
        file_id = self.bzr_file_id(path)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
352
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
353
        ie.revision = self.revision_id
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
354
        if kind == 'file':
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
355
            ie.executable = is_executable
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
356
            # lines = osutils.split_lines(data)
357
            ie.text_sha1 = osutils.sha_string(data)
358
            ie.text_size = len(data)
359
            self.data_for_commit[file_id] = data
0.102.14 by Ian Clatworthy
export and import empty directories
360
        elif kind == 'directory':
361
            self.directory_entries[path] = ie
362
            # There are no lines stored for a directory so
363
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
364
            self.data_for_commit[file_id] = ''
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
365
        elif kind == 'symlink':
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
366
            ie.symlink_target = self._decode_path(data)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
367
            # There are no lines stored for a symlink so
368
            # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
369
            self.data_for_commit[file_id] = ''
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
370
        else:
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
371
            self.warning("Cannot import items of kind '%s' yet - ignoring '%s'"
372
                % (kind, path))
373
            return
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
374
        # Record it
0.64.323 by Jelmer Vernooij
Avoid deprecated Inventory.__contains__.
375
        if inv.has_id(file_id):
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
376
            old_ie = inv[file_id]
377
            if old_ie.kind == 'directory':
378
                self.record_delete(path, old_ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
379
            self.record_changed(path, ie, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
380
        else:
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
381
            try:
382
                self.record_new(path, ie)
383
            except:
0.64.167 by Ian Clatworthy
incremental packing for chk formats
384
                print "failed to add path '%s' with entry '%s' in command %s" \
385
                    % (path, ie, self.command.id)
386
                print "parent's children are:\n%r\n" % (ie.parent_id.children,)
0.64.165 by Ian Clatworthy
handle adding a file to a dir deleted in the same commit
387
                raise
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
388
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
389
    def _ensure_directory(self, path, inv):
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
390
        """Ensure that the containing directory exists for 'path'"""
391
        dirname, basename = osutils.split(path)
392
        if dirname == '':
393
            # the root node doesn't get updated
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
394
            return basename, self.inventory_root_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
395
        try:
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
396
            ie = self._get_directory_entry(inv, dirname)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
397
        except KeyError:
398
            # We will create this entry, since it doesn't exist
399
            pass
400
        else:
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
401
            return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
402
403
        # No directory existed, we will just create one, first, make sure
404
        # the parent exists
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
405
        dir_basename, parent_id = self._ensure_directory(dirname, inv)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
406
        dir_file_id = self.bzr_file_id(dirname)
407
        ie = inventory.entry_factory['directory'](dir_file_id,
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
408
            dir_basename, parent_id)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
409
        ie.revision = self.revision_id
410
        self.directory_entries[dirname] = ie
411
        # There are no lines stored for a directory so
412
        # make sure the cache used by get_lines knows that
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
413
        self.data_for_commit[dir_file_id] = ''
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
414
415
        # It's possible that a file or symlink with that file-id
416
        # already exists. If it does, we need to delete it.
0.64.323 by Jelmer Vernooij
Avoid deprecated Inventory.__contains__.
417
        if inv.has_id(dir_file_id):
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
418
            self.record_delete(dirname, ie)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
419
        self.record_new(dirname, ie)
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
420
        return basename, ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
421
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
422
    def _get_directory_entry(self, inv, dirname):
423
        """Get the inventory entry for a directory.
424
        
425
        Raises KeyError if dirname is not a directory in inv.
426
        """
427
        result = self.directory_entries.get(dirname)
428
        if result is None:
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
429
            if dirname in self._paths_deleted_this_commit:
430
                raise KeyError
0.64.146 by Ian Clatworthy
fix first file is in a subdirectory bug for chk formats
431
            try:
432
                file_id = inv.path2id(dirname)
433
            except errors.NoSuchId:
434
                # In a CHKInventory, this is raised if there's no root yet
435
                raise KeyError
0.84.12 by Ian Clatworthy
lookup directories on demand in CHKInventories, not all upfront
436
            if file_id is None:
437
                raise KeyError
438
            result = inv[file_id]
439
            # dirname must be a directory for us to return it
440
            if result.kind == 'directory':
441
                self.directory_entries[dirname] = result
442
            else:
443
                raise KeyError
444
        return result
445
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
446
    def _delete_item(self, path, inv):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
447
        newly_added = self._new_file_ids.get(path)
448
        if newly_added:
449
            # We've only just added this path earlier in this commit.
450
            file_id = newly_added
451
            # note: delta entries look like (old, new, file-id, ie)
452
            ie = self._delta_entries_by_fileid[file_id][3]
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
453
        else:
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
454
            file_id = inv.path2id(path)
455
            if file_id is None:
456
                self.mutter("ignoring delete of %s as not in inventory", path)
457
                return
458
            try:
459
                ie = inv[file_id]
460
            except errors.NoSuchId:
461
                self.mutter("ignoring delete of %s as not in inventory", path)
462
                return
463
        self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
464
465
    def _copy_item(self, src_path, dest_path, inv):
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
466
        newly_changed = self._new_file_ids.get(src_path) or \
467
            self._modified_file_ids.get(src_path)
468
        if newly_changed:
469
            # We've only just added/changed this path earlier in this commit.
470
            file_id = newly_changed
0.99.8 by Ian Clatworthy
handle copy of a newly added file
471
            # note: delta entries look like (old, new, file-id, ie)
472
            ie = self._delta_entries_by_fileid[file_id][3]
473
        else:
474
            file_id = inv.path2id(src_path)
475
            if file_id is None:
476
                self.warning("ignoring copy of %s to %s - source does not exist",
477
                    src_path, dest_path)
478
                return
479
            ie = inv[file_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
480
        kind = ie.kind
481
        if kind == 'file':
0.99.18 by Ian Clatworthy
Handle copy of a file/symlink already modified in this commit
482
            if newly_changed:
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
483
                content = self.data_for_commit[file_id]
0.99.8 by Ian Clatworthy
handle copy of a newly added file
484
            else:
485
                content = self.rev_store.get_file_text(self.parents[0], file_id)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
486
            self._modify_item(dest_path, kind, ie.executable, content, inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
487
        elif kind == 'symlink':
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
488
            self._modify_item(dest_path, kind, False,
489
                ie.symlink_target.encode("utf-8"), inv)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
490
        else:
491
            self.warning("ignoring copy of %s %s - feature not yet supported",
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
492
                kind, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
493
494
    def _rename_item(self, old_path, new_path, inv):
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
495
        existing = self._new_file_ids.get(old_path) or \
496
            self._modified_file_ids.get(old_path)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
497
        if existing:
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
498
            # We've only just added/modified this path earlier in this commit.
499
            # Change the add/modify of old_path to an add of new_path
500
            self._rename_pending_change(old_path, new_path, existing)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
501
            return
502
0.81.8 by Ian Clatworthy
refactor rename_item
503
        file_id = inv.path2id(old_path)
0.64.167 by Ian Clatworthy
incremental packing for chk formats
504
        if file_id is None:
505
            self.warning(
506
                "ignoring rename of %s to %s - old path does not exist" %
507
                (old_path, new_path))
508
            return
0.81.8 by Ian Clatworthy
refactor rename_item
509
        ie = inv[file_id]
510
        rev_id = ie.revision
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
511
        new_file_id = inv.path2id(new_path)
512
        if new_file_id is not None:
0.81.9 by Ian Clatworthy
refactor delete_item
513
            self.record_delete(new_path, inv[new_file_id])
0.81.8 by Ian Clatworthy
refactor rename_item
514
        self.record_rename(old_path, new_path, file_id, ie)
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
515
0.81.8 by Ian Clatworthy
refactor rename_item
516
        # The revision-id for this entry will be/has been updated and
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
517
        # that means the loader then needs to know what the "new" text is.
518
        # We therefore must go back to the revision store to get it.
0.81.8 by Ian Clatworthy
refactor rename_item
519
        lines = self.rev_store.get_file_lines(rev_id, file_id)
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
520
        self.data_for_commit[file_id] = ''.join(lines)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
521
522
    def _delete_all_items(self, inv):
0.64.320 by Jelmer Vernooij
Fix deleteall handler.
523
        if len(inv) == 0:
524
            return
525
        for path, ie in inv.iter_entries_by_dir():
526
            if path != "":
527
                self.record_delete(path, ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
528
0.64.145 by Ian Clatworthy
handle delete of missing files for chk formats
529
    def _warn_unless_in_merges(self, fileid, path):
530
        if len(self.parents) <= 1:
531
            return
532
        for parent in self.parents[1:]:
533
            if fileid in self.get_inventory(parent):
534
                return
535
        self.warning("ignoring delete of %s as not in parent inventories", path)
536
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
537
538
class InventoryCommitHandler(GenericCommitHandler):
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
539
    """A CommitHandler that builds and saves Inventory objects."""
0.81.2 by Ian Clatworthy
refactor InventoryCommitHandler general stuff into parent class
540
541
    def pre_process_files(self):
542
        super(InventoryCommitHandler, self).pre_process_files()
543
0.64.159 by Ian Clatworthy
make the file-id cache optional and branch-ref aware
544
        # Seed the inventory from the previous one. Note that
545
        # the parent class version of pre_process_files() has
546
        # already set the right basis_inventory for this branch
547
        # but we need to copy it in order to mutate it safely
548
        # without corrupting the cached inventory value.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
549
        if len(self.parents) == 0:
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
550
            self.inventory = self.basis_inventory
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
551
        else:
0.84.3 by Ian Clatworthy
fix inventory copying when using deltas
552
            self.inventory = copy_inventory(self.basis_inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
553
        self.inventory_root = self.inventory.root
554
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
555
        # directory-path -> inventory-entry for current inventory
556
        self.directory_entries = dict(self.inventory.directories())
557
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
558
        # Initialise the inventory revision info as required
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
559
        if self.rev_store.expects_rich_root():
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
560
            self.inventory.revision_id = self.revision_id
561
        else:
0.81.4 by Ian Clatworthy
generalise RevisionLoader to RevisionStore as a repo abstraction
562
            # In this revision store, root entries have no knit or weave.
563
            # When serializing out to disk and back in, root.revision is
564
            # always the new revision_id.
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
565
            self.inventory.root.revision = self.revision_id
566
567
    def post_process_files(self):
568
        """Save the revision."""
569
        self.cache_mgr.inventories[self.revision_id] = self.inventory
0.85.2 by Ian Clatworthy
improve per-file graph generation
570
        self.rev_store.load(self.revision, self.inventory, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
571
            lambda file_id: self._get_data(file_id),
0.85.2 by Ian Clatworthy
improve per-file graph generation
572
            lambda file_id: self._get_per_file_parents(file_id),
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
573
            lambda revision_ids: self._get_inventories(revision_ids))
574
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
575
    def record_new(self, path, ie):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
576
        try:
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
577
            # If this is a merge, the file was most likely added already.
578
            # The per-file parent(s) must therefore be calculated and
579
            # we can't assume there are none.
580
            per_file_parents, ie.revision = \
581
                self.rev_store.get_parents_and_revision_for_entry(ie)
582
            self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
583
            self.inventory.add(ie)
584
        except errors.DuplicateFileId:
585
            # Directory already exists as a file or symlink
586
            del self.inventory[ie.file_id]
587
            # Try again
588
            self.inventory.add(ie)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
589
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
590
    def record_changed(self, path, ie, parent_id):
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
591
        # HACK: no API for this (del+add does more than it needs to)
0.85.2 by Ian Clatworthy
improve per-file graph generation
592
        per_file_parents, ie.revision = \
593
            self.rev_store.get_parents_and_revision_for_entry(ie)
594
        self.per_file_parents_for_commit[ie.file_id] = per_file_parents
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
595
        self.inventory._byid[ie.file_id] = ie
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
596
        parent_ie = self.inventory._byid[parent_id]
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
597
        parent_ie.children[ie.name] = ie
598
0.81.9 by Ian Clatworthy
refactor delete_item
599
    def record_delete(self, path, ie):
600
        self.inventory.remove_recursive_id(ie.file_id)
0.81.8 by Ian Clatworthy
refactor rename_item
601
602
    def record_rename(self, old_path, new_path, file_id, ie):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
603
        # For a rename, the revision-id is always the new one so
604
        # no need to change/set it here
605
        ie.revision = self.revision_id
606
        per_file_parents, _ = \
607
            self.rev_store.get_parents_and_revision_for_entry(ie)
608
        self.per_file_parents_for_commit[file_id] = per_file_parents
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
609
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
610
            self.inventory)
0.81.8 by Ian Clatworthy
refactor rename_item
611
        self.inventory.rename(file_id, new_parent_id, new_basename)
612
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
613
    def modify_handler(self, filecmd):
614
        if filecmd.dataref is not None:
615
            data = self.cache_mgr.fetch_blob(filecmd.dataref)
616
        else:
617
            data = filecmd.data
618
        self.debug("modifying %s", filecmd.path)
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
619
        (kind, is_executable) = mode_to_kind(filecmd.mode)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
620
        self._modify_item(self._decode_path(filecmd.path), kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
621
            is_executable, data, self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
622
623
    def delete_handler(self, filecmd):
0.81.7 by Ian Clatworthy
merge import tests and tweaks to make them pass
624
        self.debug("deleting %s", filecmd.path)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
625
        self._delete_item(self._decode_path(filecmd.path), self.inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
626
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
627
    def copy_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
628
        src_path = self._decode_path(filecmd.src_path)
629
        dest_path = self._decode_path(filecmd.dest_path)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
630
        self.debug("copying %s to %s", src_path, dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
631
        self._copy_item(src_path, dest_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
632
633
    def rename_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
634
        old_path = self._decode_path(filecmd.old_path)
635
        new_path = self._decode_path(filecmd.new_path)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
636
        self.debug("renaming %s to %s", old_path, new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
637
        self._rename_item(old_path, new_path, self.inventory)
0.81.1 by Ian Clatworthy
move GenericCommitHandler into its own module in prep for a delta-based one
638
639
    def deleteall_handler(self, filecmd):
640
        self.debug("deleting all files (and also all directories)")
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
641
        self._delete_all_items(self.inventory)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
642
643
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
644
class InventoryDeltaCommitHandler(GenericCommitHandler):
645
    """A CommitHandler that builds Inventories by applying a delta."""
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
646
647
    def pre_process_files(self):
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
648
        super(InventoryDeltaCommitHandler, self).pre_process_files()
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
649
        self._dirs_that_might_become_empty = set()
650
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
651
        # A given file-id can only appear once so we accumulate
652
        # the entries in a dict then build the actual delta at the end
653
        self._delta_entries_by_fileid = {}
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
654
        if len(self.parents) == 0 or not self.rev_store.expects_rich_root():
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
655
            if self.parents:
656
                old_path = ''
657
            else:
658
                old_path = None
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
659
            # Need to explicitly add the root entry for the first revision
0.84.7 by Ian Clatworthy
CHKInventory support for non rich-root repos working, for simple imports at least
660
            # and for non rich-root inventories
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
661
            root_id = inventory.ROOT_ID
662
            root_ie = inventory.InventoryDirectory(root_id, u'', None)
663
            root_ie.revision = self.revision_id
0.84.10 by Ian Clatworthy
fix TREE_ROOT delta entry after 1st revision & tweak _delete_item usage
664
            self._add_entry((old_path, '', root_id, root_ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
665
666
    def post_process_files(self):
667
        """Save the revision."""
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
668
        delta = self._get_final_delta()
0.64.171 by Ian Clatworthy
use inv deltas by default for all formats now: --classic to get old algorithm for packs
669
        inv = self.rev_store.load_using_delta(self.revision,
670
            self.basis_inventory, delta, None,
0.115.4 by John Arbash Meinel
(broken) Start working towards using CommitBuilder rather than using a custom implementation.
671
            self._get_data,
672
            self._get_per_file_parents,
673
            self._get_inventories)
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
674
        self.cache_mgr.inventories[self.revision_id] = inv
0.84.8 by Ian Clatworthy
ensure the chk stuff is only used on formats actually supporting it
675
        #print "committed %s" % self.revision_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
676
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
677
    def _get_final_delta(self):
678
        """Generate the final delta.
679
680
        Smart post-processing of changes, e.g. pruning of directories
681
        that would become empty, goes here.
682
        """
683
        delta = list(self._delta_entries_by_fileid.values())
684
        if self.prune_empty_dirs and self._dirs_that_might_become_empty:
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
685
            candidates = self._dirs_that_might_become_empty
686
            while candidates:
687
                never_born = set()
688
                parent_dirs_that_might_become_empty = set()
689
                for path, file_id in self._empty_after_delta(delta, candidates):
690
                    newly_added = self._new_file_ids.get(path)
691
                    if newly_added:
692
                        never_born.add(newly_added)
693
                    else:
694
                        delta.append((path, None, file_id, None))
695
                    parent_dir = osutils.dirname(path)
696
                    if parent_dir:
697
                        parent_dirs_that_might_become_empty.add(parent_dir)
698
                candidates = parent_dirs_that_might_become_empty
0.101.5 by Tom Widmer
Add missing tab characters to ensure that never born dirs are correctly removed during each pass of parent directory pruning.
699
                # Clean up entries that got deleted before they were ever added
700
                if never_born:
701
                    delta = [de for de in delta if de[2] not in never_born]
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
702
        return delta
703
704
    def _empty_after_delta(self, delta, candidates):
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
705
        #self.mutter("delta so far is:\n%s" % "\n".join([str(de) for de in delta]))
706
        #self.mutter("candidates for deletion are:\n%s" % "\n".join([c for c in candidates]))
707
        new_inv = self._get_proposed_inventory(delta)
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
708
        result = []
709
        for dir in candidates:
710
            file_id = new_inv.path2id(dir)
0.64.219 by Ian Clatworthy
More robust implicit delete logic when file-id not found
711
            if file_id is None:
712
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
713
            ie = new_inv[file_id]
0.101.2 by Tom Widmer
Update pruning code to operate in multiple passes, with subsequent passes operating on the parent dirs of dirs pruned in the previous pass.
714
            if ie.kind != 'directory':
715
                continue
0.96.2 by Ian Clatworthy
test and fix for implicit directory delete recursing up
716
            if len(ie.children) == 0:
717
                result.append((dir, file_id))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
718
                if self.verbose:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
719
                    self.note("pruning empty directory %s" % (dir,))
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
720
        return result
721
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
722
    def _get_proposed_inventory(self, delta):
723
        if len(self.parents):
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
724
            # new_inv = self.basis_inventory._get_mutable_inventory()
725
            # Note that this will create unreferenced chk pages if we end up
726
            # deleting entries, because this 'test' inventory won't end up
727
            # used. However, it is cheaper than having to create a full copy of
728
            # the inventory for every commit.
729
            new_inv = self.basis_inventory.create_by_apply_delta(delta,
730
                'not-a-valid-revision-id:')
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
731
        else:
732
            new_inv = inventory.Inventory(revision_id=self.revision_id)
733
            # This is set in the delta so remove it to prevent a duplicate
734
            del new_inv[inventory.ROOT_ID]
0.114.1 by John Arbash Meinel
When post-processing the delta stream, don't ask to generate a full inventory to check for deletions.
735
            try:
736
                new_inv.apply_delta(delta)
737
            except errors.InconsistentDelta:
738
                self.mutter("INCONSISTENT DELTA IS:\n%s" % "\n".join([str(de) for de in delta]))
739
                raise
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
740
        return new_inv
741
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
742
    def _add_entry(self, entry):
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
743
        # We need to combine the data if multiple entries have the same file-id.
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
744
        # For example, a rename followed by a modification looks like:
745
        #
746
        # (x, y, f, e) & (y, y, f, g) => (x, y, f, g)
747
        #
748
        # Likewise, a modification followed by a rename looks like:
749
        #
750
        # (x, x, f, e) & (x, y, f, g) => (x, y, f, g)
751
        #
752
        # Here's a rename followed by a delete and a modification followed by
753
        # a delete:
754
        #
755
        # (x, y, f, e) & (y, None, f, None) => (x, None, f, None)
756
        # (x, x, f, e) & (x, None, f, None) => (x, None, f, None)
757
        #
758
        # In summary, we use the original old-path, new new-path and new ie
759
        # when combining entries.
0.85.2 by Ian Clatworthy
improve per-file graph generation
760
        old_path = entry[0]
761
        new_path = entry[1]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
762
        file_id = entry[2]
0.85.2 by Ian Clatworthy
improve per-file graph generation
763
        ie = entry[3]
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
764
        existing = self._delta_entries_by_fileid.get(file_id, None)
765
        if existing is not None:
0.85.2 by Ian Clatworthy
improve per-file graph generation
766
            old_path = existing[0]
767
            entry = (old_path, new_path, file_id, ie)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
768
        if new_path is None and old_path is None:
769
            # This is a delete cancelling a previous add
770
            del self._delta_entries_by_fileid[file_id]
0.99.7 by Ian Clatworthy
handle a delete of a newly added file
771
            parent_dir = osutils.dirname(existing[1])
772
            self.mutter("cancelling add of %s with parent %s" % (existing[1], parent_dir))
773
            if parent_dir:
774
                self._dirs_that_might_become_empty.add(parent_dir)
0.99.6 by Ian Clatworthy
Handle rename of a just added file
775
            return
776
        else:
777
            self._delta_entries_by_fileid[file_id] = entry
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
778
0.99.6 by Ian Clatworthy
Handle rename of a just added file
779
        # Collect parent directories that might become empty
0.64.195 by Ian Clatworthy
prune directories that become empty after a delete or rename
780
        if new_path is None:
781
            # delete
782
            parent_dir = osutils.dirname(old_path)
783
            # note: no need to check the root
784
            if parent_dir:
785
                self._dirs_that_might_become_empty.add(parent_dir)
786
        elif old_path is not None and old_path != new_path:
787
            # rename
788
            old_parent_dir = osutils.dirname(old_path)
789
            new_parent_dir = osutils.dirname(new_path)
790
            if old_parent_dir and old_parent_dir != new_parent_dir:
791
                self._dirs_that_might_become_empty.add(old_parent_dir)
792
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
793
        # Calculate the per-file parents, if not already done
794
        if file_id in self.per_file_parents_for_commit:
795
            return
0.85.2 by Ian Clatworthy
improve per-file graph generation
796
        if old_path is None:
797
            # add
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
798
            # If this is a merge, the file was most likely added already.
799
            # The per-file parent(s) must therefore be calculated and
800
            # we can't assume there are none.
801
            per_file_parents, ie.revision = \
802
                self.rev_store.get_parents_and_revision_for_entry(ie)
803
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
804
        elif new_path is None:
805
            # delete
806
            pass
807
        elif old_path != new_path:
808
            # rename
0.64.161 by Ian Clatworthy
fix per-graph parent handling for adds and renames
809
            per_file_parents, _ = \
810
                self.rev_store.get_parents_and_revision_for_entry(ie)
811
            self.per_file_parents_for_commit[file_id] = per_file_parents
0.85.2 by Ian Clatworthy
improve per-file graph generation
812
        else:
813
            # modify
814
            per_file_parents, ie.revision = \
815
                self.rev_store.get_parents_and_revision_for_entry(ie)
816
            self.per_file_parents_for_commit[file_id] = per_file_parents
817
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
818
    def record_new(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
819
        self._add_entry((None, path, ie.file_id, ie))
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
820
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
821
    def record_changed(self, path, ie, parent_id=None):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
822
        self._add_entry((path, path, ie.file_id, ie))
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
823
        self._modified_file_ids[path] = ie.file_id
0.81.5 by Ian Clatworthy
basic DeltaCommitHandler generating deltas
824
0.81.9 by Ian Clatworthy
refactor delete_item
825
    def record_delete(self, path, ie):
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
826
        self._add_entry((path, None, ie.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
827
        self._paths_deleted_this_commit.add(path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
828
        if ie.kind == 'directory':
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
829
            try:
830
                del self.directory_entries[path]
831
            except KeyError:
832
                pass
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
833
            for child_relpath, entry in \
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
834
                self.basis_inventory.iter_entries_by_dir(from_dir=ie):
0.64.187 by Ian Clatworthy
fix inv-delta generation when deleting directories
835
                child_path = osutils.pathjoin(path, child_relpath)
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
836
                self._add_entry((child_path, None, entry.file_id, None))
0.99.13 by Ian Clatworthy
Handle delete then add of a file/symlink in the one commit
837
                self._paths_deleted_this_commit.add(child_path)
0.99.21 by Ian Clatworthy
Handle deleting a directory then adding a file within it in the same commit
838
                if entry.kind == 'directory':
839
                    try:
840
                        del self.directory_entries[child_path]
841
                    except KeyError:
842
                        pass
0.81.8 by Ian Clatworthy
refactor rename_item
843
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
844
    def record_rename(self, old_path, new_path, file_id, old_ie):
845
        new_ie = old_ie.copy()
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
846
        new_basename, new_parent_id = self._ensure_directory(new_path,
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
847
            self.basis_inventory)
848
        new_ie.name = new_basename
0.84.5 by Ian Clatworthy
_ensure_directory to return parent_id, not parent_ie
849
        new_ie.parent_id = new_parent_id
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
850
        new_ie.revision = self.revision_id
0.84.9 by Ian Clatworthy
get non-chk formats working again & combine delta entries when required
851
        self._add_entry((old_path, new_path, file_id, new_ie))
0.99.19 by Ian Clatworthy
Handle rename then modification of the new path
852
        self._modified_file_ids[new_path] = file_id
0.64.233 by Ian Clatworthy
Handle delete, rename then modify all in the one commit
853
        self._paths_deleted_this_commit.discard(new_path)
0.64.234 by Ian Clatworthy
Make sure renamed directories are found in file-id lookups
854
        if new_ie.kind == 'directory':
855
            self.directory_entries[new_path] = new_ie
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
856
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
857
    def _rename_pending_change(self, old_path, new_path, file_id):
858
        """Instead of adding/modifying old-path, add new-path instead."""
0.99.6 by Ian Clatworthy
Handle rename of a just added file
859
        # note: delta entries look like (old, new, file-id, ie)
860
        old_ie = self._delta_entries_by_fileid[file_id][3]
861
862
        # Delete the old path. Note that this might trigger implicit
863
        # deletion of newly created parents that could now become empty.
864
        self.record_delete(old_path, old_ie)
865
0.99.17 by Ian Clatworthy
Handle rename of a file/symlink modified already in this commit
866
        # Update the dictionaries used for tracking new file-ids
867
        if old_path in self._new_file_ids:
868
            del self._new_file_ids[old_path]
869
        else:
870
            del self._modified_file_ids[old_path]
0.99.6 by Ian Clatworthy
Handle rename of a just added file
871
        self._new_file_ids[new_path] = file_id
872
873
        # Create the new InventoryEntry
874
        kind = old_ie.kind
875
        basename, parent_id = self._ensure_directory(new_path,
876
            self.basis_inventory)
877
        ie = inventory.make_entry(kind, basename, parent_id, file_id)
878
        ie.revision = self.revision_id
879
        if kind == 'file':
880
            ie.executable = old_ie.executable
881
            ie.text_sha1 = old_ie.text_sha1
882
            ie.text_size = old_ie.text_size
883
        elif kind == 'symlink':
884
            ie.symlink_target = old_ie.symlink_target
885
886
        # Record it
887
        self.record_new(new_path, ie)
888
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
889
    def modify_handler(self, filecmd):
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
890
        (kind, executable) = mode_to_kind(filecmd.mode)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
891
        if filecmd.dataref is not None:
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
892
            if kind == "directory":
0.102.14 by Ian Clatworthy
export and import empty directories
893
                data = None
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
894
            elif kind == "tree-reference":
0.64.229 by Ian Clatworthy
Handle git submodules in the stream by warning about + ignoring them
895
                data = filecmd.dataref
896
            else:
897
                data = self.cache_mgr.fetch_blob(filecmd.dataref)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
898
        else:
899
            data = filecmd.data
900
        self.debug("modifying %s", filecmd.path)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
901
        decoded_path = self._decode_path(filecmd.path)
902
        self._modify_item(decoded_path, kind,
0.123.8 by Jelmer Vernooij
Use modes for FileModifyCommand.
903
            executable, data, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
904
905
    def delete_handler(self, filecmd):
906
        self.debug("deleting %s", filecmd.path)
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
907
        self._delete_item(
908
            self._decode_path(filecmd.path), self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
909
910
    def copy_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
911
        src_path = self._decode_path(filecmd.src_path)
912
        dest_path = self._decode_path(filecmd.dest_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
913
        self.debug("copying %s to %s", src_path, dest_path)
0.81.10 by Ian Clatworthy
get DeltaCommitHandler passing all tests
914
        self._copy_item(src_path, dest_path, self.basis_inventory)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
915
916
    def rename_handler(self, filecmd):
0.64.332 by Jelmer Vernooij
Cope with non-utf8 characters in paths when importing.
917
        old_path = self._decode_path(filecmd.old_path)
918
        new_path = self._decode_path(filecmd.new_path)
0.81.6 by Ian Clatworthy
basic DeltaCommitHandler mostly going bar rename
919
        self.debug("renaming %s to %s", old_path, new_path)
920
        self._rename_item(old_path, new_path, self.basis_inventory)
921
922
    def deleteall_handler(self, filecmd):
923
        self.debug("deleting all files (and also all directories)")
924
        self._delete_all_items(self.basis_inventory)