/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3193.8.13 by Aaron Bentley
Update texts
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3193.8.32 by Aaron Bentley
Update GPL preamble
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3193.8.13 by Aaron Bentley
Update texts
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
3193.8.13 by Aaron Bentley
Update texts
18
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
19
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
20
from . import (
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
21
    osutils,
22
    progress,
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
23
    trace,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
24
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
25
from .i18n import gettext
26
from .sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
27
    BytesIO,
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
28
    viewitems,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
29
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
30
from .ui import ui_factory
3193.8.4 by Aaron Bentley
Get rename detection working for files.
31
32
class RenameMap(object):
3193.8.13 by Aaron Bentley
Update texts
33
    """Determine a mapping of renames."""
3193.8.4 by Aaron Bentley
Get rename detection working for files.
34
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
35
    def __init__(self, tree):
36
        self.tree = tree
3193.8.4 by Aaron Bentley
Get rename detection working for files.
37
        self.edge_hashes = {}
38
3193.8.11 by Aaron Bentley
Make hash iterator static
39
    @staticmethod
40
    def iter_edge_hashes(lines):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
41
        """Iterate through the hashes of line pairs (which make up an edge).
42
43
        The hash is truncated using a modulus to avoid excessive memory
44
        consumption by the hitscount dict.  A modulus of 10Mi means that the
45
        maximum number of keys is 10Mi.  (Keys are normally 32 bits, e.g.
46
        4 Gi)
47
        """
3193.8.10 by Aaron Bentley
Update to weight hits and use 10M of keyspace
48
        modulus = 1024 * 1024 * 10
3193.8.4 by Aaron Bentley
Get rename detection working for files.
49
        for n in range(len(lines)):
3193.8.10 by Aaron Bentley
Update to weight hits and use 10M of keyspace
50
            yield hash(tuple(lines[n:n+2])) % modulus
3193.8.4 by Aaron Bentley
Get rename detection working for files.
51
52
    def add_edge_hashes(self, lines, tag):
3193.8.13 by Aaron Bentley
Update texts
53
        """Update edge_hashes to include the given lines.
54
55
        :param lines: The lines to update the hashes for.
56
        :param tag: A tag uniquely associated with these lines (i.e. file-id)
57
        """
3193.8.4 by Aaron Bentley
Get rename detection working for files.
58
        for my_hash in self.iter_edge_hashes(lines):
59
            self.edge_hashes.setdefault(my_hash, set()).add(tag)
60
61
    def add_file_edge_hashes(self, tree, file_ids):
3193.8.13 by Aaron Bentley
Update texts
62
        """Update to reflect the hashes for files in the tree.
63
64
        :param tree: The tree containing the files.
65
        :param file_ids: A list of file_ids to perform the updates for.
66
        """
3193.8.4 by Aaron Bentley
Get rename detection working for files.
67
        desired_files = [(f, f) for f in file_ids]
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
68
        task = ui_factory.nested_progress_bar()
69
        try:
70
            for num, (file_id, contents) in enumerate(
71
                tree.iter_files_bytes(desired_files)):
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
72
                task.update(gettext('Calculating hashes'), num, len(file_ids))
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
73
                s = BytesIO()
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
74
                s.writelines(contents)
75
                s.seek(0)
76
                self.add_edge_hashes(s.readlines(), file_id)
77
        finally:
78
            task.finished()
3193.8.4 by Aaron Bentley
Get rename detection working for files.
79
80
    def hitcounts(self, lines):
3193.8.13 by Aaron Bentley
Update texts
81
        """Count the number of hash hits for each tag, for the given lines.
82
83
        Hits are weighted according to the number of tags the hash is
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
84
        associated with; more tags means that the hash is less rare and should
85
        tend to be ignored.
3193.8.13 by Aaron Bentley
Update texts
86
        :param lines: The lines to calculate hashes of.
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
87
        :return: a dict of {tag: hitcount}
3193.8.13 by Aaron Bentley
Update texts
88
        """
3193.8.4 by Aaron Bentley
Get rename detection working for files.
89
        hits = {}
90
        for my_hash in self.iter_edge_hashes(lines):
91
            tags = self.edge_hashes.get(my_hash)
92
            if tags is None:
93
                continue
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
94
            taglen = len(tags)
3193.8.4 by Aaron Bentley
Get rename detection working for files.
95
            for tag in tags:
96
                if tag not in hits:
97
                    hits[tag] = 0
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
98
                hits[tag] += 1.0 / taglen
3193.8.4 by Aaron Bentley
Get rename detection working for files.
99
        return hits
100
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
101
    def get_all_hits(self, paths):
102
        """Find all the hit counts for the listed paths in the tree.
3193.8.13 by Aaron Bentley
Update texts
103
104
        :return: A list of tuples of count, path, file_id.
105
        """
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
106
        all_hits = []
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
107
        task = ui_factory.nested_progress_bar()
108
        try:
109
            for num, path in enumerate(paths):
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
110
                task.update(gettext('Determining hash hits'), num, len(paths))
3193.8.26 by Aaron Bentley
Updates from review.
111
                hits = self.hitcounts(self.tree.get_file_lines(None,
112
                                                               path=path))
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
113
                all_hits.extend((v, path, k) for k, v in viewitems(hits))
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
114
        finally:
115
            task.finished()
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
116
        return all_hits
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
117
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
118
    def file_match(self, paths):
3193.8.13 by Aaron Bentley
Update texts
119
        """Return a mapping from file_ids to the supplied paths."""
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
120
        return self._match_hits(self.get_all_hits(paths))
3193.8.17 by Aaron Bentley
Get directory rename handling working.
121
122
    @staticmethod
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
123
    def _match_hits(hit_list):
3193.8.26 by Aaron Bentley
Updates from review.
124
        """Using a hit list, determine a path-to-fileid map.
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
125
126
        The hit list is a list of (count, path, file_id), where count is a
127
        (possibly float) number, with higher numbers indicating stronger
128
        matches.
129
        """
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
130
        seen_file_ids = set()
131
        path_map = {}
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
132
        for count, path, file_id in sorted(hit_list, reverse=True):
3193.8.26 by Aaron Bentley
Updates from review.
133
            if path in path_map or file_id in seen_file_ids:
3193.8.7 by Aaron Bentley
Saner algorithm for picking optimal file.
134
                continue
135
            path_map[path] = file_id
136
            seen_file_ids.add(file_id)
3193.8.4 by Aaron Bentley
Get rename detection working for files.
137
        return path_map
3193.8.16 by Aaron Bentley
Get a dict of required parents.
138
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
139
    def get_required_parents(self, matches):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
140
        """Return a dict of all file parents that must be versioned.
141
142
        The keys are the required parents and the values are sets of their
143
        children.
144
        """
3193.8.16 by Aaron Bentley
Get a dict of required parents.
145
        required_parents = {}
146
        for path in matches:
147
            while True:
148
                child = path
149
                path = osutils.dirname(path)
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
150
                if self.tree.path2id(path) is not None:
3193.8.16 by Aaron Bentley
Get a dict of required parents.
151
                    break
152
                required_parents.setdefault(path, []).append(child)
3193.8.17 by Aaron Bentley
Get directory rename handling working.
153
        require_ids = {}
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
154
        for parent, children in viewitems(required_parents):
3193.8.17 by Aaron Bentley
Get directory rename handling working.
155
            child_file_ids = set()
156
            for child in children:
157
                file_id = matches.get(child)
158
                if file_id is not None:
159
                    child_file_ids.add(file_id)
160
            require_ids[parent] = child_file_ids
161
        return require_ids
162
163
    def match_parents(self, required_parents, missing_parents):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
164
        """Map parent directories to file-ids.
165
166
        This is done by finding similarity between the file-ids of children of
167
        required parent directories and the file-ids of children of missing
168
        parent directories.
169
        """
170
        all_hits = []
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
171
        for file_id, file_id_children in viewitems(missing_parents):
172
            for path, path_children in viewitems(required_parents):
3193.8.17 by Aaron Bentley
Get directory rename handling working.
173
                hits = len(path_children.intersection(file_id_children))
174
                if hits > 0:
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
175
                    all_hits.append((hits, path, file_id))
176
        return self._match_hits(all_hits)
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
177
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
178
    def _find_missing_files(self, basis):
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
179
        missing_files = set()
180
        missing_parents = {}
181
        candidate_files = set()
3193.8.25 by Aaron Bentley
Improve progress reporting.
182
        task = ui_factory.nested_progress_bar()
183
        iterator = self.tree.iter_changes(basis, want_unversioned=True,
184
                                          pb=task)
185
        try:
186
            for (file_id, paths, changed_content, versioned, parent, name,
187
                 kind, executable) in iterator:
188
                if kind[1] is None and versioned[1]:
6547.1.5 by Jelmer Vernooij
Address review comments.
189
                    if not self.tree.has_filename(self.tree.id2path(parent[0])):
6547.1.1 by mnn
Fixed issue with RenameMap - also it supports renaming into new unversioned directory
190
                        missing_parents.setdefault(parent[0], set()).add(file_id)
3193.8.25 by Aaron Bentley
Improve progress reporting.
191
                    if kind[0] == 'file':
192
                        missing_files.add(file_id)
193
                    else:
194
                        #other kinds are not handled
195
                        pass
196
                if versioned == (False, False):
197
                    if self.tree.is_ignored(paths[1]):
198
                        continue
199
                    if kind[1] == 'file':
200
                        candidate_files.add(paths[1])
201
                    if kind[1] == 'directory':
202
                        for _dir, children in self.tree.walkdirs(paths[1]):
203
                            for child in children:
204
                                if child[2] == 'file':
205
                                    candidate_files.add(child[0])
206
        finally:
207
            task.finished()
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
208
        return missing_files, missing_parents, candidate_files
209
210
    @classmethod
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
211
    def guess_renames(klass, tree, dry_run=False):
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
212
        """Guess which files to rename, and perform the rename.
213
214
        We assume that unversioned files and missing files indicate that
215
        versioned files have been renamed outside of Bazaar.
3193.8.26 by Aaron Bentley
Updates from review.
216
217
        :param tree: A write-locked working tree.
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
218
        """
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
219
        required_parents = {}
3193.8.25 by Aaron Bentley
Improve progress reporting.
220
        task = ui_factory.nested_progress_bar()
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
221
        try:
3193.8.25 by Aaron Bentley
Improve progress reporting.
222
            pp = progress.ProgressPhase('Guessing renames', 4, task)
223
            basis = tree.basis_tree()
224
            basis.lock_read()
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
225
            try:
3193.8.25 by Aaron Bentley
Improve progress reporting.
226
                rn = klass(tree)
227
                pp.next_phase()
228
                missing_files, missing_parents, candidate_files = (
229
                    rn._find_missing_files(basis))
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
230
                pp.next_phase()
231
                rn.add_file_edge_hashes(basis, missing_files)
232
            finally:
3193.8.25 by Aaron Bentley
Improve progress reporting.
233
                basis.unlock()
234
            pp.next_phase()
235
            matches = rn.file_match(candidate_files)
236
            parents_matches = matches
237
            while len(parents_matches) > 0:
238
                required_parents = rn.get_required_parents(
239
                    parents_matches)
240
                parents_matches = rn.match_parents(required_parents,
241
                                                   missing_parents)
242
                matches.update(parents_matches)
243
            pp.next_phase()
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
244
            delta = rn._make_inventory_delta(matches)
245
            for old, new, file_id, entry in delta:
6138.3.4 by Jonathan Riddell
add gettext() to uses of trace.note()
246
                trace.note( gettext("{0} => {1}").format(old, new) )
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
247
            if not dry_run:
248
                tree.add(required_parents)
249
                tree.apply_inventory_delta(delta)
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
250
        finally:
3193.8.25 by Aaron Bentley
Improve progress reporting.
251
            task.finished()
3193.8.23 by Aaron Bentley
Split up guess_renames further.
252
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
253
    def _make_inventory_delta(self, matches):
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
254
        delta = []
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
255
        file_id_matches = dict((f, p) for p, f in viewitems(matches))
256
        for old_path, entry in self.tree.iter_entries_by_dir(file_id_matches):
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
257
            new_path = file_id_matches[entry.file_id]
3193.8.29 by Aaron Bentley
Use split instead of basename/dirname
258
            parent_path, new_name = osutils.split(new_path)
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
259
            parent_id = matches.get(parent_path)
260
            if parent_id is None:
261
                parent_id = self.tree.path2id(parent_path)
6547.1.1 by mnn
Fixed issue with RenameMap - also it supports renaming into new unversioned directory
262
                if parent_id is None:
263
                    added, ignored = self.tree.smart_add([parent_path], recurse=False)
264
                    if len(ignored) > 0 and ignored[0] == parent_path:
265
                        continue
266
                    else:
267
                        parent_id = self.tree.path2id(parent_path)
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
268
            if entry.name == new_name and entry.parent_id == parent_id:
269
                continue
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
270
            new_entry = entry.copy()
271
            new_entry.parent_id = parent_id
3193.8.29 by Aaron Bentley
Use split instead of basename/dirname
272
            new_entry.name = new_name
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
273
            delta.append((old_path, new_path, new_entry.file_id, new_entry))
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
274
        return delta