bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
3193.8.13
by Aaron Bentley
 Update texts  | 
1  | 
# Copyright (C) 2009 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
| 
3193.8.32
by Aaron Bentley
 Update GPL preamble  | 
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
| 
3193.8.13
by Aaron Bentley
 Update texts  | 
16  | 
|
17  | 
||
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
18  | 
from cStringIO import StringIO  | 
| 
3193.8.18
by Aaron Bentley
 Move all rename-guessing into RenameMap  | 
19  | 
|
20  | 
from bzrlib import (  | 
|
21  | 
osutils,  | 
|
22  | 
progress,  | 
|
| 
3193.8.33
by Aaron Bentley
 Add output, emit minimal inventory delta.  | 
23  | 
trace,  | 
| 
3193.8.18
by Aaron Bentley
 Move all rename-guessing into RenameMap  | 
24  | 
)
 | 
| 
3193.8.16
by Aaron Bentley
 Get a dict of required parents.  | 
25  | 
from bzrlib.ui import ui_factory  | 
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
26  | 
|
27  | 
||
28  | 
class RenameMap(object):  | 
|
| 
3193.8.13
by Aaron Bentley
 Update texts  | 
29  | 
"""Determine a mapping of renames."""  | 
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
30  | 
|
| 
3193.8.24
by Aaron Bentley
 Use tree member instead of passing it in  | 
31  | 
def __init__(self, tree):  | 
32  | 
self.tree = tree  | 
|
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
33  | 
self.edge_hashes = {}  | 
34  | 
||
| 
3193.8.11
by Aaron Bentley
 Make hash iterator static  | 
35  | 
    @staticmethod
 | 
36  | 
def iter_edge_hashes(lines):  | 
|
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
37  | 
"""Iterate through the hashes of line pairs (which make up an edge).  | 
38  | 
||
39  | 
        The hash is truncated using a modulus to avoid excessive memory
 | 
|
40  | 
        consumption by the hitscount dict.  A modulus of 10Mi means that the
 | 
|
41  | 
        maximum number of keys is 10Mi.  (Keys are normally 32 bits, e.g.
 | 
|
42  | 
        4 Gi)
 | 
|
43  | 
        """
 | 
|
| 
3193.8.10
by Aaron Bentley
 Update to weight hits and use 10M of keyspace  | 
44  | 
modulus = 1024 * 1024 * 10  | 
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
45  | 
for n in range(len(lines)):  | 
| 
3193.8.10
by Aaron Bentley
 Update to weight hits and use 10M of keyspace  | 
46  | 
yield hash(tuple(lines[n:n+2])) % modulus  | 
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
47  | 
|
48  | 
def add_edge_hashes(self, lines, tag):  | 
|
| 
3193.8.13
by Aaron Bentley
 Update texts  | 
49  | 
"""Update edge_hashes to include the given lines.  | 
50  | 
||
51  | 
        :param lines: The lines to update the hashes for.
 | 
|
52  | 
        :param tag: A tag uniquely associated with these lines (i.e. file-id)
 | 
|
53  | 
        """
 | 
|
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
54  | 
for my_hash in self.iter_edge_hashes(lines):  | 
55  | 
self.edge_hashes.setdefault(my_hash, set()).add(tag)  | 
|
56  | 
||
57  | 
def add_file_edge_hashes(self, tree, file_ids):  | 
|
| 
3193.8.13
by Aaron Bentley
 Update texts  | 
58  | 
"""Update to reflect the hashes for files in the tree.  | 
59  | 
||
60  | 
        :param tree: The tree containing the files.
 | 
|
61  | 
        :param file_ids: A list of file_ids to perform the updates for.
 | 
|
62  | 
        """
 | 
|
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
63  | 
desired_files = [(f, f) for f in file_ids]  | 
| 
3193.8.14
by Aaron Bentley
 Add progress reporting to guess-renames  | 
64  | 
task = ui_factory.nested_progress_bar()  | 
65  | 
try:  | 
|
66  | 
for num, (file_id, contents) in enumerate(  | 
|
67  | 
tree.iter_files_bytes(desired_files)):  | 
|
68  | 
task.update('Calculating hashes', num, len(file_ids))  | 
|
69  | 
s = StringIO()  | 
|
70  | 
s.writelines(contents)  | 
|
71  | 
s.seek(0)  | 
|
72  | 
self.add_edge_hashes(s.readlines(), file_id)  | 
|
73  | 
finally:  | 
|
74  | 
task.finished()  | 
|
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
75  | 
|
76  | 
def hitcounts(self, lines):  | 
|
| 
3193.8.13
by Aaron Bentley
 Update texts  | 
77  | 
"""Count the number of hash hits for each tag, for the given lines.  | 
78  | 
||
79  | 
        Hits are weighted according to the number of tags the hash is
 | 
|
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
80  | 
        associated with; more tags means that the hash is less rare and should
 | 
81  | 
        tend to be ignored.
 | 
|
| 
3193.8.13
by Aaron Bentley
 Update texts  | 
82  | 
        :param lines: The lines to calculate hashes of.
 | 
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
83  | 
        :return: a dict of {tag: hitcount}
 | 
| 
3193.8.13
by Aaron Bentley
 Update texts  | 
84  | 
        """
 | 
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
85  | 
hits = {}  | 
86  | 
for my_hash in self.iter_edge_hashes(lines):  | 
|
87  | 
tags = self.edge_hashes.get(my_hash)  | 
|
88  | 
if tags is None:  | 
|
89  | 
                continue
 | 
|
| 
3193.8.12
by Aaron Bentley
 Reorganize slightly for the benefit of kcachegrind  | 
90  | 
taglen = len(tags)  | 
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
91  | 
for tag in tags:  | 
92  | 
if tag not in hits:  | 
|
93  | 
hits[tag] = 0  | 
|
| 
3193.8.12
by Aaron Bentley
 Reorganize slightly for the benefit of kcachegrind  | 
94  | 
hits[tag] += 1.0 / taglen  | 
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
95  | 
return hits  | 
96  | 
||
| 
3193.8.24
by Aaron Bentley
 Use tree member instead of passing it in  | 
97  | 
def get_all_hits(self, paths):  | 
98  | 
"""Find all the hit counts for the listed paths in the tree.  | 
|
| 
3193.8.13
by Aaron Bentley
 Update texts  | 
99  | 
|
100  | 
        :return: A list of tuples of count, path, file_id.
 | 
|
101  | 
        """
 | 
|
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
102  | 
all_hits = []  | 
| 
3193.8.14
by Aaron Bentley
 Add progress reporting to guess-renames  | 
103  | 
task = ui_factory.nested_progress_bar()  | 
104  | 
try:  | 
|
105  | 
for num, path in enumerate(paths):  | 
|
106  | 
task.update('Determining hash hits', num, len(paths))  | 
|
| 
3193.8.26
by Aaron Bentley
 Updates from review.  | 
107  | 
hits = self.hitcounts(self.tree.get_file_lines(None,  | 
108  | 
path=path))  | 
|
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
109  | 
all_hits.extend((v, path, k) for k, v in hits.items())  | 
| 
3193.8.14
by Aaron Bentley
 Add progress reporting to guess-renames  | 
110  | 
finally:  | 
111  | 
task.finished()  | 
|
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
112  | 
return all_hits  | 
| 
3193.8.12
by Aaron Bentley
 Reorganize slightly for the benefit of kcachegrind  | 
113  | 
|
| 
3193.8.24
by Aaron Bentley
 Use tree member instead of passing it in  | 
114  | 
def file_match(self, paths):  | 
| 
3193.8.13
by Aaron Bentley
 Update texts  | 
115  | 
"""Return a mapping from file_ids to the supplied paths."""  | 
| 
3193.8.24
by Aaron Bentley
 Use tree member instead of passing it in  | 
116  | 
return self._match_hits(self.get_all_hits(paths))  | 
| 
3193.8.17
by Aaron Bentley
 Get directory rename handling working.  | 
117  | 
|
118  | 
    @staticmethod
 | 
|
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
119  | 
def _match_hits(hit_list):  | 
| 
3193.8.26
by Aaron Bentley
 Updates from review.  | 
120  | 
"""Using a hit list, determine a path-to-fileid map.  | 
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
121  | 
|
122  | 
        The hit list is a list of (count, path, file_id), where count is a
 | 
|
123  | 
        (possibly float) number, with higher numbers indicating stronger
 | 
|
124  | 
        matches.
 | 
|
125  | 
        """
 | 
|
| 
3193.8.12
by Aaron Bentley
 Reorganize slightly for the benefit of kcachegrind  | 
126  | 
seen_file_ids = set()  | 
127  | 
path_map = {}  | 
|
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
128  | 
for count, path, file_id in sorted(hit_list, reverse=True):  | 
| 
3193.8.26
by Aaron Bentley
 Updates from review.  | 
129  | 
if path in path_map or file_id in seen_file_ids:  | 
| 
3193.8.7
by Aaron Bentley
 Saner algorithm for picking optimal file.  | 
130  | 
                continue
 | 
131  | 
path_map[path] = file_id  | 
|
132  | 
seen_file_ids.add(file_id)  | 
|
| 
3193.8.4
by Aaron Bentley
 Get rename detection working for files.  | 
133  | 
return path_map  | 
| 
3193.8.16
by Aaron Bentley
 Get a dict of required parents.  | 
134  | 
|
| 
3193.8.24
by Aaron Bentley
 Use tree member instead of passing it in  | 
135  | 
def get_required_parents(self, matches):  | 
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
136  | 
"""Return a dict of all file parents that must be versioned.  | 
137  | 
||
138  | 
        The keys are the required parents and the values are sets of their
 | 
|
139  | 
        children.
 | 
|
140  | 
        """
 | 
|
| 
3193.8.16
by Aaron Bentley
 Get a dict of required parents.  | 
141  | 
required_parents = {}  | 
142  | 
for path in matches:  | 
|
143  | 
while True:  | 
|
144  | 
child = path  | 
|
145  | 
path = osutils.dirname(path)  | 
|
| 
3193.8.24
by Aaron Bentley
 Use tree member instead of passing it in  | 
146  | 
if self.tree.path2id(path) is not None:  | 
| 
3193.8.16
by Aaron Bentley
 Get a dict of required parents.  | 
147  | 
                    break
 | 
148  | 
required_parents.setdefault(path, []).append(child)  | 
|
| 
3193.8.17
by Aaron Bentley
 Get directory rename handling working.  | 
149  | 
require_ids = {}  | 
150  | 
for parent, children in required_parents.iteritems():  | 
|
151  | 
child_file_ids = set()  | 
|
152  | 
for child in children:  | 
|
153  | 
file_id = matches.get(child)  | 
|
154  | 
if file_id is not None:  | 
|
155  | 
child_file_ids.add(file_id)  | 
|
156  | 
require_ids[parent] = child_file_ids  | 
|
157  | 
return require_ids  | 
|
158  | 
||
159  | 
def match_parents(self, required_parents, missing_parents):  | 
|
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
160  | 
"""Map parent directories to file-ids.  | 
161  | 
||
162  | 
        This is done by finding similarity between the file-ids of children of
 | 
|
163  | 
        required parent directories and the file-ids of children of missing
 | 
|
164  | 
        parent directories.
 | 
|
165  | 
        """
 | 
|
166  | 
all_hits = []  | 
|
| 
3193.8.17
by Aaron Bentley
 Get directory rename handling working.  | 
167  | 
for file_id, file_id_children in missing_parents.iteritems():  | 
168  | 
for path, path_children in required_parents.iteritems():  | 
|
169  | 
hits = len(path_children.intersection(file_id_children))  | 
|
170  | 
if hits > 0:  | 
|
| 
3193.8.20
by Aaron Bentley
 Cleanup and enhance tests.  | 
171  | 
all_hits.append((hits, path, file_id))  | 
172  | 
return self._match_hits(all_hits)  | 
|
| 
3193.8.18
by Aaron Bentley
 Move all rename-guessing into RenameMap  | 
173  | 
|
| 
3193.8.24
by Aaron Bentley
 Use tree member instead of passing it in  | 
174  | 
def _find_missing_files(self, basis):  | 
| 
3193.8.22
by Aaron Bentley
 Reduce unnecessary locking.  | 
175  | 
missing_files = set()  | 
176  | 
missing_parents = {}  | 
|
177  | 
candidate_files = set()  | 
|
| 
3193.8.25
by Aaron Bentley
 Improve progress reporting.  | 
178  | 
task = ui_factory.nested_progress_bar()  | 
179  | 
iterator = self.tree.iter_changes(basis, want_unversioned=True,  | 
|
180  | 
pb=task)  | 
|
181  | 
try:  | 
|
182  | 
for (file_id, paths, changed_content, versioned, parent, name,  | 
|
183  | 
kind, executable) in iterator:  | 
|
184  | 
if kind[1] is None and versioned[1]:  | 
|
185  | 
missing_parents.setdefault(parent[0], set()).add(file_id)  | 
|
186  | 
if kind[0] == 'file':  | 
|
187  | 
missing_files.add(file_id)  | 
|
188  | 
else:  | 
|
189  | 
                        #other kinds are not handled
 | 
|
190  | 
                        pass
 | 
|
191  | 
if versioned == (False, False):  | 
|
192  | 
if self.tree.is_ignored(paths[1]):  | 
|
193  | 
                        continue
 | 
|
194  | 
if kind[1] == 'file':  | 
|
195  | 
candidate_files.add(paths[1])  | 
|
196  | 
if kind[1] == 'directory':  | 
|
197  | 
for _dir, children in self.tree.walkdirs(paths[1]):  | 
|
198  | 
for child in children:  | 
|
199  | 
if child[2] == 'file':  | 
|
200  | 
candidate_files.add(child[0])  | 
|
201  | 
finally:  | 
|
202  | 
task.finished()  | 
|
| 
3193.8.22
by Aaron Bentley
 Reduce unnecessary locking.  | 
203  | 
return missing_files, missing_parents, candidate_files  | 
204  | 
||
205  | 
    @classmethod
 | 
|
| 
3193.8.33
by Aaron Bentley
 Add output, emit minimal inventory delta.  | 
206  | 
def guess_renames(klass, tree, dry_run=False):  | 
| 
3193.8.18
by Aaron Bentley
 Move all rename-guessing into RenameMap  | 
207  | 
"""Guess which files to rename, and perform the rename.  | 
208  | 
||
209  | 
        We assume that unversioned files and missing files indicate that
 | 
|
210  | 
        versioned files have been renamed outside of Bazaar.
 | 
|
| 
3193.8.26
by Aaron Bentley
 Updates from review.  | 
211  | 
|
212  | 
        :param tree: A write-locked working tree.
 | 
|
| 
3193.8.18
by Aaron Bentley
 Move all rename-guessing into RenameMap  | 
213  | 
        """
 | 
| 
3193.8.22
by Aaron Bentley
 Reduce unnecessary locking.  | 
214  | 
required_parents = {}  | 
| 
3193.8.25
by Aaron Bentley
 Improve progress reporting.  | 
215  | 
task = ui_factory.nested_progress_bar()  | 
| 
3193.8.18
by Aaron Bentley
 Move all rename-guessing into RenameMap  | 
216  | 
try:  | 
| 
3193.8.25
by Aaron Bentley
 Improve progress reporting.  | 
217  | 
pp = progress.ProgressPhase('Guessing renames', 4, task)  | 
218  | 
basis = tree.basis_tree()  | 
|
219  | 
basis.lock_read()  | 
|
| 
3193.8.18
by Aaron Bentley
 Move all rename-guessing into RenameMap  | 
220  | 
try:  | 
| 
3193.8.25
by Aaron Bentley
 Improve progress reporting.  | 
221  | 
rn = klass(tree)  | 
222  | 
pp.next_phase()  | 
|
223  | 
missing_files, missing_parents, candidate_files = (  | 
|
224  | 
rn._find_missing_files(basis))  | 
|
| 
3193.8.18
by Aaron Bentley
 Move all rename-guessing into RenameMap  | 
225  | 
pp.next_phase()  | 
226  | 
rn.add_file_edge_hashes(basis, missing_files)  | 
|
227  | 
finally:  | 
|
| 
3193.8.25
by Aaron Bentley
 Improve progress reporting.  | 
228  | 
basis.unlock()  | 
229  | 
pp.next_phase()  | 
|
230  | 
matches = rn.file_match(candidate_files)  | 
|
231  | 
parents_matches = matches  | 
|
232  | 
while len(parents_matches) > 0:  | 
|
233  | 
required_parents = rn.get_required_parents(  | 
|
234  | 
parents_matches)  | 
|
235  | 
parents_matches = rn.match_parents(required_parents,  | 
|
236  | 
missing_parents)  | 
|
237  | 
matches.update(parents_matches)  | 
|
238  | 
pp.next_phase()  | 
|
| 
3193.8.33
by Aaron Bentley
 Add output, emit minimal inventory delta.  | 
239  | 
delta = rn._make_inventory_delta(matches)  | 
240  | 
for old, new, file_id, entry in delta:  | 
|
241  | 
trace.note("%s => %s", old, new)  | 
|
242  | 
if not dry_run:  | 
|
243  | 
tree.add(required_parents)  | 
|
244  | 
tree.apply_inventory_delta(delta)  | 
|
| 
3193.8.18
by Aaron Bentley
 Move all rename-guessing into RenameMap  | 
245  | 
finally:  | 
| 
3193.8.25
by Aaron Bentley
 Improve progress reporting.  | 
246  | 
task.finished()  | 
| 
3193.8.23
by Aaron Bentley
 Split up guess_renames further.  | 
247  | 
|
| 
3193.8.33
by Aaron Bentley
 Add output, emit minimal inventory delta.  | 
248  | 
def _make_inventory_delta(self, matches):  | 
| 
3193.8.27
by Aaron Bentley
 Use apply_inventory_delta to rename files.  | 
249  | 
delta = []  | 
250  | 
file_id_matches = dict((f, p) for p, f in matches.items())  | 
|
251  | 
for old_path, entry in self.tree.iter_entries_by_dir(matches.values()):  | 
|
252  | 
new_path = file_id_matches[entry.file_id]  | 
|
| 
3193.8.29
by Aaron Bentley
 Use split instead of basename/dirname  | 
253  | 
parent_path, new_name = osutils.split(new_path)  | 
| 
3193.8.27
by Aaron Bentley
 Use apply_inventory_delta to rename files.  | 
254  | 
parent_id = matches.get(parent_path)  | 
255  | 
if parent_id is None:  | 
|
256  | 
parent_id = self.tree.path2id(parent_path)  | 
|
| 
3193.8.33
by Aaron Bentley
 Add output, emit minimal inventory delta.  | 
257  | 
if entry.name == new_name and entry.parent_id == parent_id:  | 
258  | 
                continue
 | 
|
| 
3193.8.27
by Aaron Bentley
 Use apply_inventory_delta to rename files.  | 
259  | 
new_entry = entry.copy()  | 
260  | 
new_entry.parent_id = parent_id  | 
|
| 
3193.8.29
by Aaron Bentley
 Use split instead of basename/dirname  | 
261  | 
new_entry.name = new_name  | 
| 
3193.8.27
by Aaron Bentley
 Use apply_inventory_delta to rename files.  | 
262  | 
delta.append((old_path, new_path, new_entry.file_id, new_entry))  | 
| 
3193.8.33
by Aaron Bentley
 Add output, emit minimal inventory delta.  | 
263  | 
return delta  |