1
# Copyright (C) 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
from __future__ import absolute_import
25
from .i18n import gettext
30
from .ui import ui_factory
32
class RenameMap(object):
33
"""Determine a mapping of renames."""
35
def __init__(self, tree):
40
def iter_edge_hashes(lines):
41
"""Iterate through the hashes of line pairs (which make up an edge).
43
The hash is truncated using a modulus to avoid excessive memory
44
consumption by the hitscount dict. A modulus of 10Mi means that the
45
maximum number of keys is 10Mi. (Keys are normally 32 bits, e.g.
48
modulus = 1024 * 1024 * 10
49
for n in range(len(lines)):
50
yield hash(tuple(lines[n:n+2])) % modulus
52
def add_edge_hashes(self, lines, tag):
53
"""Update edge_hashes to include the given lines.
55
:param lines: The lines to update the hashes for.
56
:param tag: A tag uniquely associated with these lines (i.e. file-id)
58
for my_hash in self.iter_edge_hashes(lines):
59
self.edge_hashes.setdefault(my_hash, set()).add(tag)
61
def add_file_edge_hashes(self, tree, file_ids):
62
"""Update to reflect the hashes for files in the tree.
64
:param tree: The tree containing the files.
65
:param file_ids: A list of file_ids to perform the updates for.
67
desired_files = [(f, f) for f in file_ids]
68
task = ui_factory.nested_progress_bar()
70
for num, (file_id, contents) in enumerate(
71
tree.iter_files_bytes(desired_files)):
72
task.update(gettext('Calculating hashes'), num, len(file_ids))
74
s.writelines(contents)
76
self.add_edge_hashes(s.readlines(), file_id)
80
def hitcounts(self, lines):
81
"""Count the number of hash hits for each tag, for the given lines.
83
Hits are weighted according to the number of tags the hash is
84
associated with; more tags means that the hash is less rare and should
86
:param lines: The lines to calculate hashes of.
87
:return: a dict of {tag: hitcount}
90
for my_hash in self.iter_edge_hashes(lines):
91
tags = self.edge_hashes.get(my_hash)
98
hits[tag] += 1.0 / taglen
101
def get_all_hits(self, paths):
102
"""Find all the hit counts for the listed paths in the tree.
104
:return: A list of tuples of count, path, file_id.
107
task = ui_factory.nested_progress_bar()
109
for num, path in enumerate(paths):
110
task.update(gettext('Determining hash hits'), num, len(paths))
111
hits = self.hitcounts(self.tree.get_file_lines(None,
113
all_hits.extend((v, path, k) for k, v in viewitems(hits))
118
def file_match(self, paths):
119
"""Return a mapping from file_ids to the supplied paths."""
120
return self._match_hits(self.get_all_hits(paths))
123
def _match_hits(hit_list):
124
"""Using a hit list, determine a path-to-fileid map.
126
The hit list is a list of (count, path, file_id), where count is a
127
(possibly float) number, with higher numbers indicating stronger
130
seen_file_ids = set()
132
for count, path, file_id in sorted(hit_list, reverse=True):
133
if path in path_map or file_id in seen_file_ids:
135
path_map[path] = file_id
136
seen_file_ids.add(file_id)
139
def get_required_parents(self, matches):
140
"""Return a dict of all file parents that must be versioned.
142
The keys are the required parents and the values are sets of their
145
required_parents = {}
149
path = osutils.dirname(path)
150
if self.tree.path2id(path) is not None:
152
required_parents.setdefault(path, []).append(child)
154
for parent, children in viewitems(required_parents):
155
child_file_ids = set()
156
for child in children:
157
file_id = matches.get(child)
158
if file_id is not None:
159
child_file_ids.add(file_id)
160
require_ids[parent] = child_file_ids
163
def match_parents(self, required_parents, missing_parents):
164
"""Map parent directories to file-ids.
166
This is done by finding similarity between the file-ids of children of
167
required parent directories and the file-ids of children of missing
171
for file_id, file_id_children in viewitems(missing_parents):
172
for path, path_children in viewitems(required_parents):
173
hits = len(path_children.intersection(file_id_children))
175
all_hits.append((hits, path, file_id))
176
return self._match_hits(all_hits)
178
def _find_missing_files(self, basis):
179
missing_files = set()
181
candidate_files = set()
182
task = ui_factory.nested_progress_bar()
183
iterator = self.tree.iter_changes(basis, want_unversioned=True,
186
for (file_id, paths, changed_content, versioned, parent, name,
187
kind, executable) in iterator:
188
if kind[1] is None and versioned[1]:
189
missing_parents.setdefault(parent[0], set()).add(file_id)
190
if kind[0] == 'file':
191
missing_files.add(file_id)
193
#other kinds are not handled
195
if versioned == (False, False):
196
if self.tree.is_ignored(paths[1]):
198
if kind[1] == 'file':
199
candidate_files.add(paths[1])
200
if kind[1] == 'directory':
201
for _dir, children in self.tree.walkdirs(paths[1]):
202
for child in children:
203
if child[2] == 'file':
204
candidate_files.add(child[0])
207
return missing_files, missing_parents, candidate_files
210
def guess_renames(klass, tree, dry_run=False):
211
"""Guess which files to rename, and perform the rename.
213
We assume that unversioned files and missing files indicate that
214
versioned files have been renamed outside of Bazaar.
216
:param tree: A write-locked working tree.
218
required_parents = {}
219
task = ui_factory.nested_progress_bar()
221
pp = progress.ProgressPhase('Guessing renames', 4, task)
222
basis = tree.basis_tree()
227
missing_files, missing_parents, candidate_files = (
228
rn._find_missing_files(basis))
230
rn.add_file_edge_hashes(basis, missing_files)
234
matches = rn.file_match(candidate_files)
235
parents_matches = matches
236
while len(parents_matches) > 0:
237
required_parents = rn.get_required_parents(
239
parents_matches = rn.match_parents(required_parents,
241
matches.update(parents_matches)
243
delta = rn._make_inventory_delta(matches)
244
for old, new, file_id, entry in delta:
245
trace.note( gettext("{0} => {1}").format(old, new) )
247
tree.add(required_parents)
248
tree.apply_inventory_delta(delta)
252
def _make_inventory_delta(self, matches):
254
file_id_matches = dict((f, p) for p, f in viewitems(matches))
255
for old_path, entry in self.tree.iter_entries_by_dir(file_id_matches):
256
new_path = file_id_matches[entry.file_id]
257
parent_path, new_name = osutils.split(new_path)
258
parent_id = matches.get(parent_path)
259
if parent_id is None:
260
parent_id = self.tree.path2id(parent_path)
261
if entry.name == new_name and entry.parent_id == parent_id:
263
new_entry = entry.copy()
264
new_entry.parent_id = parent_id
265
new_entry.name = new_name
266
delta.append((old_path, new_path, new_entry.file_id, new_entry))