/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
3193.8.13 by Aaron Bentley
Update texts
1
# Copyright (C) 2009 Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3193.8.32 by Aaron Bentley
Update GPL preamble
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3193.8.13 by Aaron Bentley
Update texts
16
7479.2.1 by Jelmer Vernooij
Drop python2 support.
17
from io import BytesIO
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
18
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
19
from . import (
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
20
    osutils,
21
    progress,
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
22
    trace,
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
23
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
24
from .i18n import gettext
25
from .ui import ui_factory
3193.8.4 by Aaron Bentley
Get rename detection working for files.
26
7143.15.2 by Jelmer Vernooij
Run autopep8.
27
3193.8.4 by Aaron Bentley
Get rename detection working for files.
28
class RenameMap(object):
3193.8.13 by Aaron Bentley
Update texts
29
    """Determine a mapping of renames."""
3193.8.4 by Aaron Bentley
Get rename detection working for files.
30
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
31
    def __init__(self, tree):
32
        self.tree = tree
3193.8.4 by Aaron Bentley
Get rename detection working for files.
33
        self.edge_hashes = {}
34
3193.8.11 by Aaron Bentley
Make hash iterator static
35
    @staticmethod
36
    def iter_edge_hashes(lines):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
37
        """Iterate through the hashes of line pairs (which make up an edge).
38
39
        The hash is truncated using a modulus to avoid excessive memory
40
        consumption by the hitscount dict.  A modulus of 10Mi means that the
41
        maximum number of keys is 10Mi.  (Keys are normally 32 bits, e.g.
42
        4 Gi)
43
        """
3193.8.10 by Aaron Bentley
Update to weight hits and use 10M of keyspace
44
        modulus = 1024 * 1024 * 10
3193.8.4 by Aaron Bentley
Get rename detection working for files.
45
        for n in range(len(lines)):
7143.15.2 by Jelmer Vernooij
Run autopep8.
46
            yield hash(tuple(lines[n:n + 2])) % modulus
3193.8.4 by Aaron Bentley
Get rename detection working for files.
47
48
    def add_edge_hashes(self, lines, tag):
3193.8.13 by Aaron Bentley
Update texts
49
        """Update edge_hashes to include the given lines.
50
51
        :param lines: The lines to update the hashes for.
52
        :param tag: A tag uniquely associated with these lines (i.e. file-id)
53
        """
3193.8.4 by Aaron Bentley
Get rename detection working for files.
54
        for my_hash in self.iter_edge_hashes(lines):
55
            self.edge_hashes.setdefault(my_hash, set()).add(tag)
56
57
    def add_file_edge_hashes(self, tree, file_ids):
3193.8.13 by Aaron Bentley
Update texts
58
        """Update to reflect the hashes for files in the tree.
59
60
        :param tree: The tree containing the files.
61
        :param file_ids: A list of file_ids to perform the updates for.
62
        """
6874.2.1 by Jelmer Vernooij
Make Tree.iter_files_bytes() take paths rather than file_ids.
63
        desired_files = [(tree.id2path(f), f) for f in file_ids]
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
64
        with ui_factory.nested_progress_bar() as task:
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
65
            for num, (file_id, contents) in enumerate(
7143.15.2 by Jelmer Vernooij
Run autopep8.
66
                    tree.iter_files_bytes(desired_files)):
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
67
                task.update(gettext('Calculating hashes'), num, len(file_ids))
6621.22.2 by Martin
Use BytesIO or StringIO from bzrlib.sixish
68
                s = BytesIO()
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
69
                s.writelines(contents)
70
                s.seek(0)
71
                self.add_edge_hashes(s.readlines(), file_id)
3193.8.4 by Aaron Bentley
Get rename detection working for files.
72
73
    def hitcounts(self, lines):
3193.8.13 by Aaron Bentley
Update texts
74
        """Count the number of hash hits for each tag, for the given lines.
75
76
        Hits are weighted according to the number of tags the hash is
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
77
        associated with; more tags means that the hash is less rare and should
78
        tend to be ignored.
3193.8.13 by Aaron Bentley
Update texts
79
        :param lines: The lines to calculate hashes of.
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
80
        :return: a dict of {tag: hitcount}
3193.8.13 by Aaron Bentley
Update texts
81
        """
3193.8.4 by Aaron Bentley
Get rename detection working for files.
82
        hits = {}
83
        for my_hash in self.iter_edge_hashes(lines):
84
            tags = self.edge_hashes.get(my_hash)
85
            if tags is None:
86
                continue
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
87
            taglen = len(tags)
3193.8.4 by Aaron Bentley
Get rename detection working for files.
88
            for tag in tags:
89
                if tag not in hits:
90
                    hits[tag] = 0
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
91
                hits[tag] += 1.0 / taglen
3193.8.4 by Aaron Bentley
Get rename detection working for files.
92
        return hits
93
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
94
    def get_all_hits(self, paths):
95
        """Find all the hit counts for the listed paths in the tree.
3193.8.13 by Aaron Bentley
Update texts
96
97
        :return: A list of tuples of count, path, file_id.
98
        """
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
99
        all_hits = []
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
100
        with ui_factory.nested_progress_bar() as task:
3193.8.14 by Aaron Bentley
Add progress reporting to guess-renames
101
            for num, path in enumerate(paths):
6138.4.1 by Jonathan Riddell
add gettext to progress bar strings
102
                task.update(gettext('Determining hash hits'), num, len(paths))
6809.4.5 by Jelmer Vernooij
Swap arguments for get_file_*.
103
                hits = self.hitcounts(self.tree.get_file_lines(path))
7479.2.1 by Jelmer Vernooij
Drop python2 support.
104
                all_hits.extend((v, path, k) for k, v in hits.items())
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
105
        return all_hits
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
106
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
107
    def file_match(self, paths):
3193.8.13 by Aaron Bentley
Update texts
108
        """Return a mapping from file_ids to the supplied paths."""
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
109
        return self._match_hits(self.get_all_hits(paths))
3193.8.17 by Aaron Bentley
Get directory rename handling working.
110
111
    @staticmethod
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
112
    def _match_hits(hit_list):
3193.8.26 by Aaron Bentley
Updates from review.
113
        """Using a hit list, determine a path-to-fileid map.
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
114
115
        The hit list is a list of (count, path, file_id), where count is a
116
        (possibly float) number, with higher numbers indicating stronger
117
        matches.
118
        """
3193.8.12 by Aaron Bentley
Reorganize slightly for the benefit of kcachegrind
119
        seen_file_ids = set()
120
        path_map = {}
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
121
        for count, path, file_id in sorted(hit_list, reverse=True):
3193.8.26 by Aaron Bentley
Updates from review.
122
            if path in path_map or file_id in seen_file_ids:
3193.8.7 by Aaron Bentley
Saner algorithm for picking optimal file.
123
                continue
124
            path_map[path] = file_id
125
            seen_file_ids.add(file_id)
3193.8.4 by Aaron Bentley
Get rename detection working for files.
126
        return path_map
3193.8.16 by Aaron Bentley
Get a dict of required parents.
127
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
128
    def get_required_parents(self, matches):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
129
        """Return a dict of all file parents that must be versioned.
130
131
        The keys are the required parents and the values are sets of their
132
        children.
133
        """
3193.8.16 by Aaron Bentley
Get a dict of required parents.
134
        required_parents = {}
135
        for path in matches:
136
            while True:
137
                child = path
138
                path = osutils.dirname(path)
6852.3.1 by Jelmer Vernooij
add Tree.is_versioned.
139
                if self.tree.is_versioned(path):
3193.8.16 by Aaron Bentley
Get a dict of required parents.
140
                    break
141
                required_parents.setdefault(path, []).append(child)
3193.8.17 by Aaron Bentley
Get directory rename handling working.
142
        require_ids = {}
7479.2.1 by Jelmer Vernooij
Drop python2 support.
143
        for parent, children in required_parents.items():
3193.8.17 by Aaron Bentley
Get directory rename handling working.
144
            child_file_ids = set()
145
            for child in children:
146
                file_id = matches.get(child)
147
                if file_id is not None:
148
                    child_file_ids.add(file_id)
149
            require_ids[parent] = child_file_ids
150
        return require_ids
151
152
    def match_parents(self, required_parents, missing_parents):
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
153
        """Map parent directories to file-ids.
154
155
        This is done by finding similarity between the file-ids of children of
156
        required parent directories and the file-ids of children of missing
157
        parent directories.
158
        """
159
        all_hits = []
7479.2.1 by Jelmer Vernooij
Drop python2 support.
160
        for file_id, file_id_children in missing_parents.items():
161
            for path, path_children in required_parents.items():
3193.8.17 by Aaron Bentley
Get directory rename handling working.
162
                hits = len(path_children.intersection(file_id_children))
163
                if hits > 0:
3193.8.20 by Aaron Bentley
Cleanup and enhance tests.
164
                    all_hits.append((hits, path, file_id))
165
        return self._match_hits(all_hits)
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
166
3193.8.24 by Aaron Bentley
Use tree member instead of passing it in
167
    def _find_missing_files(self, basis):
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
168
        missing_files = set()
169
        missing_parents = {}
170
        candidate_files = set()
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
171
        with ui_factory.nested_progress_bar() as task:
172
            iterator = self.tree.iter_changes(basis, want_unversioned=True,
173
                                              pb=task)
7322.1.6 by Jelmer Vernooij
Use the new attributes on TreeChange.
174
            for change in iterator:
175
                if change.kind[1] is None and change.versioned[1]:
176
                    if not self.tree.has_filename(
177
                            self.tree.id2path(change.parent_id[0])):
7143.15.2 by Jelmer Vernooij
Run autopep8.
178
                        missing_parents.setdefault(
7322.1.6 by Jelmer Vernooij
Use the new attributes on TreeChange.
179
                            change.parent_id[0], set()).add(change.file_id)
180
                    if change.kind[0] == 'file':
181
                        missing_files.add(change.file_id)
3193.8.25 by Aaron Bentley
Improve progress reporting.
182
                    else:
7143.15.2 by Jelmer Vernooij
Run autopep8.
183
                        # other kinds are not handled
3193.8.25 by Aaron Bentley
Improve progress reporting.
184
                        pass
7322.1.6 by Jelmer Vernooij
Use the new attributes on TreeChange.
185
                if change.versioned == (False, False):
186
                    if self.tree.is_ignored(change.path[1]):
3193.8.25 by Aaron Bentley
Improve progress reporting.
187
                        continue
7322.1.6 by Jelmer Vernooij
Use the new attributes on TreeChange.
188
                    if change.kind[1] == 'file':
189
                        candidate_files.add(change.path[1])
190
                    if change.kind[1] == 'directory':
191
                        for _dir, children in self.tree.walkdirs(change.path[1]):
3193.8.25 by Aaron Bentley
Improve progress reporting.
192
                            for child in children:
193
                                if child[2] == 'file':
194
                                    candidate_files.add(child[0])
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
195
        return missing_files, missing_parents, candidate_files
196
197
    @classmethod
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
198
    def guess_renames(klass, from_tree, to_tree, dry_run=False):
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
199
        """Guess which files to rename, and perform the rename.
200
201
        We assume that unversioned files and missing files indicate that
202
        versioned files have been renamed outside of Bazaar.
3193.8.26 by Aaron Bentley
Updates from review.
203
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
204
        :param from_tree: A tree to compare from
205
        :param to_tree: A write-locked working tree.
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
206
        """
3193.8.22 by Aaron Bentley
Reduce unnecessary locking.
207
        required_parents = {}
6861.4.1 by Jelmer Vernooij
Make progress bars context managers.
208
        with ui_factory.nested_progress_bar() as task:
3193.8.25 by Aaron Bentley
Improve progress reporting.
209
            pp = progress.ProgressPhase('Guessing renames', 4, task)
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
210
            with from_tree.lock_read():
211
                rn = klass(to_tree)
3193.8.25 by Aaron Bentley
Improve progress reporting.
212
                pp.next_phase()
213
                missing_files, missing_parents, candidate_files = (
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
214
                    rn._find_missing_files(from_tree))
3193.8.18 by Aaron Bentley
Move all rename-guessing into RenameMap
215
                pp.next_phase()
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
216
                rn.add_file_edge_hashes(from_tree, missing_files)
3193.8.25 by Aaron Bentley
Improve progress reporting.
217
            pp.next_phase()
218
            matches = rn.file_match(candidate_files)
219
            parents_matches = matches
220
            while len(parents_matches) > 0:
221
                required_parents = rn.get_required_parents(
222
                    parents_matches)
223
                parents_matches = rn.match_parents(required_parents,
224
                                                   missing_parents)
225
                matches.update(parents_matches)
226
            pp.next_phase()
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
227
            delta = rn._make_inventory_delta(matches)
228
            for old, new, file_id, entry in delta:
7143.15.2 by Jelmer Vernooij
Run autopep8.
229
                trace.note(gettext("{0} => {1}").format(old, new))
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
230
            if not dry_run:
6883.5.1 by Jelmer Vernooij
Add from_tree argument to RenameMap.guess_renames.
231
                to_tree.add(required_parents)
232
                to_tree.apply_inventory_delta(delta)
3193.8.23 by Aaron Bentley
Split up guess_renames further.
233
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
234
    def _make_inventory_delta(self, matches):
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
235
        delta = []
7479.2.1 by Jelmer Vernooij
Drop python2 support.
236
        file_id_matches = dict((f, p) for p, f in matches.items())
6885.5.9 by Jelmer Vernooij
Remove specific_file_ids support from Tree.iter_entries_by_dir.
237
        file_id_query = []
7479.2.1 by Jelmer Vernooij
Drop python2 support.
238
        for f in matches.values():
6885.5.9 by Jelmer Vernooij
Remove specific_file_ids support from Tree.iter_entries_by_dir.
239
            try:
240
                file_id_query.append(self.tree.id2path(f))
241
            except errors.NoSuchId:
242
                pass
6885.6.1 by Jelmer Vernooij
Support specific_files argument to Tree.iter_entries_by_dir.
243
        for old_path, entry in self.tree.iter_entries_by_dir(
6885.5.9 by Jelmer Vernooij
Remove specific_file_ids support from Tree.iter_entries_by_dir.
244
                specific_files=file_id_query):
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
245
            new_path = file_id_matches[entry.file_id]
3193.8.29 by Aaron Bentley
Use split instead of basename/dirname
246
            parent_path, new_name = osutils.split(new_path)
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
247
            parent_id = matches.get(parent_path)
248
            if parent_id is None:
249
                parent_id = self.tree.path2id(parent_path)
6547.1.1 by mnn
Fixed issue with RenameMap - also it supports renaming into new unversioned directory
250
                if parent_id is None:
7143.15.2 by Jelmer Vernooij
Run autopep8.
251
                    added, ignored = self.tree.smart_add(
252
                        [parent_path], recurse=False)
6547.1.1 by mnn
Fixed issue with RenameMap - also it supports renaming into new unversioned directory
253
                    if len(ignored) > 0 and ignored[0] == parent_path:
254
                        continue
255
                    else:
256
                        parent_id = self.tree.path2id(parent_path)
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
257
            if entry.name == new_name and entry.parent_id == parent_id:
258
                continue
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
259
            new_entry = entry.copy()
260
            new_entry.parent_id = parent_id
3193.8.29 by Aaron Bentley
Use split instead of basename/dirname
261
            new_entry.name = new_name
3193.8.27 by Aaron Bentley
Use apply_inventory_delta to rename files.
262
            delta.append((old_path, new_path, new_entry.file_id, new_entry))
3193.8.33 by Aaron Bentley
Add output, emit minimal inventory delta.
263
        return delta