/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3193.8.13 by Aaron Bentley
Update texts
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3193.8.32 by Aaron Bentley
Update GPL preamble
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3193.8.13 by Aaron Bentley
Update texts
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
3193.8.13 by Aaron Bentley
Update texts
18
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
19
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
20
from . import (
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
21
    osutils,
22
    progress,
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
23
    trace,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
24
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
25
from .i18n import gettext
26
from .sixish import (
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
27
    BytesIO,
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
28
    viewitems,
6885.5.9 by Jelmer Vernooij
Remove specific_file_ids support from Tree.iter_entries_by_dir.
29
    viewvalues,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
30
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
31
from .ui import ui_factory
3193.8.4 by Aaron Bentley
Get rename detection working for files.
32
33
class RenameMap(object):
3193.8.13 by Aaron Bentley
Update texts
34
    """Determine a mapping of renames."""
3193.8.4 by Aaron Bentley
Get rename detection working for files.
35
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
36
    def __init__(self, tree):
37
        self.tree = tree
3193.8.4 by Aaron Bentley
Get rename detection working for files.
38
        self.edge_hashes = {}
39
3193.8.11 by Aaron Bentley
Make hash iterator static
40
    @staticmethod
41
    def iter_edge_hashes(lines):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
42
        """Iterate through the hashes of line pairs (which make up an edge).
43
44
        The hash is truncated using a modulus to avoid excessive memory
45
        consumption by the hitscount dict.  A modulus of 10Mi means that the
46
        maximum number of keys is 10Mi.  (Keys are normally 32 bits, e.g.
47
        4 Gi)
48
        """
3193.8.10 by Aaron Bentley
Update to weight hits and use 10M of keyspace
49
        modulus = 1024 * 1024 * 10
3193.8.4 by Aaron Bentley
Get rename detection working for files.
50
        for n in range(len(lines)):
3193.8.10 by Aaron Bentley
Update to weight hits and use 10M of keyspace
51
            yield hash(tuple(lines[n:n+2])) % modulus
3193.8.4 by Aaron Bentley
Get rename detection working for files.
52
53
    def add_edge_hashes(self, lines, tag):
3193.8.13 by Aaron Bentley
Update texts
54
        """Update edge_hashes to include the given lines.
55
56
        :param lines: The lines to update the hashes for.
57
        :param tag: A tag uniquely associated with these lines (i.e. file-id)
58
        """
3193.8.4 by Aaron Bentley
Get rename detection working for files.
59
        for my_hash in self.iter_edge_hashes(lines):
60
            self.edge_hashes.setdefault(my_hash, set()).add(tag)
61
62
    def add_file_edge_hashes(self, tree, file_ids):
3193.8.13 by Aaron Bentley
Update texts
63
        """Update to reflect the hashes for files in the tree.
64
65
        :param tree: The tree containing the files.
66
        :param file_ids: A list of file_ids to perform the updates for.
67
        """
6874.2.1 by Jelmer Vernooij
Make Tree.iter_files_bytes() take paths rather than file_ids.
68
        desired_files = [(tree.id2path(f), f) for f in file_ids]
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
69
        with ui_factory.nested_progress_bar() as task:
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
70
            for num, (file_id, contents) in enumerate(
71
                tree.iter_files_bytes(desired_files)):
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
72
                task.update(gettext('Calculating hashes'), num, len(file_ids))
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
73
                s = BytesIO()
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
74
                s.writelines(contents)
75
                s.seek(0)
76
                self.add_edge_hashes(s.readlines(), file_id)
3193.8.4 by Aaron Bentley
Get rename detection working for files.
77
78
    def hitcounts(self, lines):
3193.8.13 by Aaron Bentley
Update texts
79
        """Count the number of hash hits for each tag, for the given lines.
80
81
        Hits are weighted according to the number of tags the hash is
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
82
        associated with; more tags means that the hash is less rare and should
83
        tend to be ignored.
3193.8.13 by Aaron Bentley
Update texts
84
        :param lines: The lines to calculate hashes of.
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
85
        :return: a dict of {tag: hitcount}
3193.8.13 by Aaron Bentley
Update texts
86
        """
3193.8.4 by Aaron Bentley
Get rename detection working for files.
87
        hits = {}
88
        for my_hash in self.iter_edge_hashes(lines):
89
            tags = self.edge_hashes.get(my_hash)
90
            if tags is None:
91
                continue
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
92
            taglen = len(tags)
3193.8.4 by Aaron Bentley
Get rename detection working for files.
93
            for tag in tags:
94
                if tag not in hits:
95
                    hits[tag] = 0
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
96
                hits[tag] += 1.0 / taglen
3193.8.4 by Aaron Bentley
Get rename detection working for files.
97
        return hits
98
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
99
    def get_all_hits(self, paths):
100
        """Find all the hit counts for the listed paths in the tree.
3193.8.13 by Aaron Bentley
Update texts
101
102
        :return: A list of tuples of count, path, file_id.
103
        """
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
104
        all_hits = []
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
105
        with ui_factory.nested_progress_bar() as task:
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
106
            for num, path in enumerate(paths):
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
107
                task.update(gettext('Determining hash hits'), num, len(paths))
6809.4.5 by Jelmer Vernooij
Swap arguments for get_file_*.
108
                hits = self.hitcounts(self.tree.get_file_lines(path))
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
109
                all_hits.extend((v, path, k) for k, v in viewitems(hits))
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
110
        return all_hits
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
111
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
112
    def file_match(self, paths):
3193.8.13 by Aaron Bentley
Update texts
113
        """Return a mapping from file_ids to the supplied paths."""
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
114
        return self._match_hits(self.get_all_hits(paths))
3193.8.17 by Aaron Bentley
Get directory rename handling working.
115
116
    @staticmethod
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
117
    def _match_hits(hit_list):
3193.8.26 by Aaron Bentley
Updates from review.
118
        """Using a hit list, determine a path-to-fileid map.
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
119
120
        The hit list is a list of (count, path, file_id), where count is a
121
        (possibly float) number, with higher numbers indicating stronger
122
        matches.
123
        """
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
124
        seen_file_ids = set()
125
        path_map = {}
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
126
        for count, path, file_id in sorted(hit_list, reverse=True):
3193.8.26 by Aaron Bentley
Updates from review.
127
            if path in path_map or file_id in seen_file_ids:
3193.8.7 by Aaron Bentley
Saner algorithm for picking optimal file.
128
                continue
129
            path_map[path] = file_id
130
            seen_file_ids.add(file_id)
3193.8.4 by Aaron Bentley
Get rename detection working for files.
131
        return path_map
3193.8.16 by Aaron Bentley
Get a dict of required parents.
132
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
133
    def get_required_parents(self, matches):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
134
        """Return a dict of all file parents that must be versioned.
135
136
        The keys are the required parents and the values are sets of their
137
        children.
138
        """
3193.8.16 by Aaron Bentley
Get a dict of required parents.
139
        required_parents = {}
140
        for path in matches:
141
            while True:
142
                child = path
143
                path = osutils.dirname(path)
6852.3.1 by Jelmer Vernooij
add Tree.is_versioned.
144
                if self.tree.is_versioned(path):
3193.8.16 by Aaron Bentley
Get a dict of required parents.
145
                    break
146
                required_parents.setdefault(path, []).append(child)
3193.8.17 by Aaron Bentley
Get directory rename handling working.
147
        require_ids = {}
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
148
        for parent, children in viewitems(required_parents):
3193.8.17 by Aaron Bentley
Get directory rename handling working.
149
            child_file_ids = set()
150
            for child in children:
151
                file_id = matches.get(child)
152
                if file_id is not None:
153
                    child_file_ids.add(file_id)
154
            require_ids[parent] = child_file_ids
155
        return require_ids
156
157
    def match_parents(self, required_parents, missing_parents):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
158
        """Map parent directories to file-ids.
159
160
        This is done by finding similarity between the file-ids of children of
161
        required parent directories and the file-ids of children of missing
162
        parent directories.
163
        """
164
        all_hits = []
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
165
        for file_id, file_id_children in viewitems(missing_parents):
166
            for path, path_children in viewitems(required_parents):
3193.8.17 by Aaron Bentley
Get directory rename handling working.
167
                hits = len(path_children.intersection(file_id_children))
168
                if hits > 0:
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
169
                    all_hits.append((hits, path, file_id))
170
        return self._match_hits(all_hits)
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
171
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
172
    def _find_missing_files(self, basis):
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
173
        missing_files = set()
174
        missing_parents = {}
175
        candidate_files = set()
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
176
        with ui_factory.nested_progress_bar() as task:
177
            iterator = self.tree.iter_changes(basis, want_unversioned=True,
178
                                              pb=task)
3193.8.25 by Aaron Bentley
Improve progress reporting.
179
            for (file_id, paths, changed_content, versioned, parent, name,
180
                 kind, executable) in iterator:
181
                if kind[1] is None and versioned[1]:
6547.1.5 by Jelmer Vernooij
Address review comments.
182
                    if not self.tree.has_filename(self.tree.id2path(parent[0])):
6547.1.1 by mnn
Fixed issue with RenameMap - also it supports renaming into new unversioned directory
183
                        missing_parents.setdefault(parent[0], set()).add(file_id)
3193.8.25 by Aaron Bentley
Improve progress reporting.
184
                    if kind[0] == 'file':
185
                        missing_files.add(file_id)
186
                    else:
187
                        #other kinds are not handled
188
                        pass
189
                if versioned == (False, False):
190
                    if self.tree.is_ignored(paths[1]):
191
                        continue
192
                    if kind[1] == 'file':
193
                        candidate_files.add(paths[1])
194
                    if kind[1] == 'directory':
195
                        for _dir, children in self.tree.walkdirs(paths[1]):
196
                            for child in children:
197
                                if child[2] == 'file':
198
                                    candidate_files.add(child[0])
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
199
        return missing_files, missing_parents, candidate_files
200
201
    @classmethod
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
202
    def guess_renames(klass, from_tree, to_tree, dry_run=False):
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
203
        """Guess which files to rename, and perform the rename.
204
205
        We assume that unversioned files and missing files indicate that
206
        versioned files have been renamed outside of Bazaar.
3193.8.26 by Aaron Bentley
Updates from review.
207
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
208
        :param from_tree: A tree to compare from
209
        :param to_tree: A write-locked working tree.
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
210
        """
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
211
        required_parents = {}
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
212
        with ui_factory.nested_progress_bar() as task:
3193.8.25 by Aaron Bentley
Improve progress reporting.
213
            pp = progress.ProgressPhase('Guessing renames', 4, task)
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
214
            with from_tree.lock_read():
215
                rn = klass(to_tree)
3193.8.25 by Aaron Bentley
Improve progress reporting.
216
                pp.next_phase()
217
                missing_files, missing_parents, candidate_files = (
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
218
                    rn._find_missing_files(from_tree))
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
219
                pp.next_phase()
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
220
                rn.add_file_edge_hashes(from_tree, missing_files)
3193.8.25 by Aaron Bentley
Improve progress reporting.
221
            pp.next_phase()
222
            matches = rn.file_match(candidate_files)
223
            parents_matches = matches
224
            while len(parents_matches) > 0:
225
                required_parents = rn.get_required_parents(
226
                    parents_matches)
227
                parents_matches = rn.match_parents(required_parents,
228
                                                   missing_parents)
229
                matches.update(parents_matches)
230
            pp.next_phase()
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
231
            delta = rn._make_inventory_delta(matches)
232
            for old, new, file_id, entry in delta:
6138.3.4 by Jonathan Riddell
add gettext() to uses of trace.note()
233
                trace.note( gettext("{0} => {1}").format(old, new) )
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
234
            if not dry_run:
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
235
                to_tree.add(required_parents)
236
                to_tree.apply_inventory_delta(delta)
3193.8.23 by Aaron Bentley
Split up guess_renames further.
237
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
238
    def _make_inventory_delta(self, matches):
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
239
        delta = []
6656.1.1 by Martin
Apply 2to3 dict fixer and clean up resulting mess using view helpers
240
        file_id_matches = dict((f, p) for p, f in viewitems(matches))
6885.5.9 by Jelmer Vernooij
Remove specific_file_ids support from Tree.iter_entries_by_dir.
241
        file_id_query = []
242
        for f in viewvalues(matches):
243
            try:
244
                file_id_query.append(self.tree.id2path(f))
245
            except errors.NoSuchId:
246
                pass
6885.6.1 by Jelmer Vernooij
Support specific_files argument to Tree.iter_entries_by_dir.
247
        for old_path, entry in self.tree.iter_entries_by_dir(
6885.5.9 by Jelmer Vernooij
Remove specific_file_ids support from Tree.iter_entries_by_dir.
248
                specific_files=file_id_query):
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
249
            new_path = file_id_matches[entry.file_id]
3193.8.29 by Aaron Bentley
Use split instead of basename/dirname
250
            parent_path, new_name = osutils.split(new_path)
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
251
            parent_id = matches.get(parent_path)
252
            if parent_id is None:
253
                parent_id = self.tree.path2id(parent_path)
6547.1.1 by mnn
Fixed issue with RenameMap - also it supports renaming into new unversioned directory
254
                if parent_id is None:
255
                    added, ignored = self.tree.smart_add([parent_path], recurse=False)
256
                    if len(ignored) > 0 and ignored[0] == parent_path:
257
                        continue
258
                    else:
259
                        parent_id = self.tree.path2id(parent_path)
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
260
            if entry.name == new_name and entry.parent_id == parent_id:
261
                continue
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
262
            new_entry = entry.copy()
263
            new_entry.parent_id = parent_id
3193.8.29 by Aaron Bentley
Use split instead of basename/dirname
264
            new_entry.name = new_name
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
265
            delta.append((old_path, new_path, new_entry.file_id, new_entry))
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
266
        return delta