bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
1 |
# Copyright (C) 2005-2011 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Walk multiple trees simultaneously.
|
|
18 |
"""
|
|
19 |
||
20 |
from . import ( |
|
21 |
errors, |
|
22 |
osutils, |
|
23 |
)
|
|
24 |
||
25 |
||
26 |
class MultiWalker(object): |
|
27 |
"""Walk multiple trees simultaneously, getting combined results.""" |
|
28 |
||
29 |
# Note: This could be written to not assume you can do out-of-order
|
|
30 |
# lookups. Instead any nodes that don't match in all trees could be
|
|
31 |
# marked as 'deferred', and then returned in the final cleanup loop.
|
|
32 |
# For now, I think it is "nicer" to return things as close to the
|
|
33 |
# "master_tree" order as we can.
|
|
34 |
||
35 |
def __init__(self, master_tree, other_trees): |
|
36 |
"""Create a new MultiWalker. |
|
37 |
||
38 |
All trees being walked must implement "iter_entries_by_dir()", such
|
|
39 |
that they yield (path, object) tuples, where that object will have a
|
|
40 |
'.file_id' member, that can be used to check equality.
|
|
41 |
||
42 |
:param master_tree: All trees will be 'slaved' to the master_tree such
|
|
43 |
that nodes in master_tree will be used as 'first-pass' sync points.
|
|
44 |
Any nodes that aren't in master_tree will be merged in a second
|
|
45 |
pass.
|
|
46 |
:param other_trees: A list of other trees to walk simultaneously.
|
|
47 |
"""
|
|
48 |
self._master_tree = master_tree |
|
49 |
self._other_trees = other_trees |
|
50 |
||
51 |
# Keep track of any nodes that were properly processed just out of
|
|
52 |
# order, that way we don't return them at the end, we don't have to
|
|
53 |
# track *all* processed file_ids, just the out-of-order ones
|
|
54 |
self._out_of_order_processed = set() |
|
55 |
||
56 |
@staticmethod
|
|
57 |
def _step_one(iterator): |
|
58 |
"""Step an iter_entries_by_dir iterator. |
|
59 |
||
60 |
:return: (has_more, path, ie)
|
|
61 |
If has_more is False, path and ie will be None.
|
|
62 |
"""
|
|
63 |
try: |
|
64 |
path, ie = next(iterator) |
|
65 |
except StopIteration: |
|
66 |
return False, None, None |
|
67 |
else: |
|
68 |
return True, path, ie |
|
69 |
||
70 |
@staticmethod
|
|
71 |
def _lt_path_by_dirblock(path1, path2): |
|
72 |
"""Compare two paths based on what directory they are in. |
|
73 |
||
74 |
This generates a sort order, such that all children of a directory are
|
|
75 |
sorted together, and grandchildren are in the same order as the
|
|
76 |
children appear. But all grandchildren come after all children.
|
|
77 |
||
78 |
:param path1: first path
|
|
79 |
:param path2: the second path
|
|
80 |
:return: negative number if ``path1`` comes first,
|
|
81 |
0 if paths are equal
|
|
82 |
and a positive number if ``path2`` sorts first
|
|
83 |
"""
|
|
84 |
# Shortcut this special case
|
|
85 |
if path1 == path2: |
|
86 |
return False |
|
87 |
# This is stolen from _dirstate_helpers_py.py, only switching it to
|
|
88 |
# Unicode objects. Consider using encode_utf8() and then using the
|
|
89 |
# optimized versions, or maybe writing optimized unicode versions.
|
|
7479.2.1
by Jelmer Vernooij
Drop python2 support. |
90 |
if not isinstance(path1, str): |
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
91 |
raise TypeError("'path1' must be a unicode string, not %s: %r" |
92 |
% (type(path1), path1)) |
|
7479.2.1
by Jelmer Vernooij
Drop python2 support. |
93 |
if not isinstance(path2, str): |
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
94 |
raise TypeError("'path2' must be a unicode string, not %s: %r" |
95 |
% (type(path2), path2)) |
|
96 |
return (MultiWalker._path_to_key(path1) < |
|
97 |
MultiWalker._path_to_key(path2)) |
|
98 |
||
99 |
@staticmethod
|
|
100 |
def _path_to_key(path): |
|
101 |
dirname, basename = osutils.split(path) |
|
102 |
return (dirname.split(u'/'), basename) |
|
103 |
||
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
104 |
def _lookup_by_master_path(self, extra_entries, other_tree, master_path): |
105 |
return self._lookup_by_file_id( |
|
106 |
extra_entries, other_tree, |
|
107 |
self._master_tree.path2id(master_path)) |
|
108 |
||
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
109 |
def _lookup_by_file_id(self, extra_entries, other_tree, file_id): |
110 |
"""Lookup an inventory entry by file_id. |
|
111 |
||
112 |
This is called when an entry is missing in the normal order.
|
|
113 |
Generally this is because a file was either renamed, or it was
|
|
114 |
deleted/added. If the entry was found in the inventory and not in
|
|
115 |
extra_entries, it will be added to self._out_of_order_processed
|
|
116 |
||
117 |
:param extra_entries: A dictionary of {file_id: (path, ie)}. This
|
|
118 |
should be filled with entries that were found before they were
|
|
119 |
used. If file_id is present, it will be removed from the
|
|
120 |
dictionary.
|
|
121 |
:param other_tree: The Tree to search, in case we didn't find the entry
|
|
122 |
yet.
|
|
123 |
:param file_id: The file_id to look for
|
|
124 |
:return: (path, ie) if found or (None, None) if not present.
|
|
125 |
"""
|
|
126 |
if file_id in extra_entries: |
|
127 |
return extra_entries.pop(file_id) |
|
128 |
try: |
|
129 |
cur_path = other_tree.id2path(file_id) |
|
130 |
except errors.NoSuchId: |
|
131 |
cur_path = None |
|
132 |
if cur_path is None: |
|
133 |
return (None, None) |
|
134 |
else: |
|
135 |
self._out_of_order_processed.add(file_id) |
|
7296.7.5
by Jelmer Vernooij
Avoid use of inventory. |
136 |
cur_ie = next(other_tree.iter_entries_by_dir( |
137 |
specific_files=[cur_path]))[1] |
|
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
138 |
return (cur_path, cur_ie) |
139 |
||
140 |
def iter_all(self): |
|
141 |
"""Match up the values in the different trees.""" |
|
142 |
for result in self._walk_master_tree(): |
|
143 |
yield result |
|
144 |
self._finish_others() |
|
145 |
for result in self._walk_others(): |
|
146 |
yield result |
|
147 |
||
148 |
def _walk_master_tree(self): |
|
149 |
"""First pass, walk all trees in lock-step. |
|
150 |
||
151 |
When we are done, all nodes in the master_tree will have been
|
|
152 |
processed. _other_walkers, _other_entries, and _others_extra will be
|
|
153 |
set on 'self' for future processing.
|
|
154 |
"""
|
|
155 |
# This iterator has the most "inlining" done, because it tends to touch
|
|
156 |
# every file in the tree, while the others only hit nodes that don't
|
|
157 |
# match.
|
|
158 |
master_iterator = self._master_tree.iter_entries_by_dir() |
|
159 |
||
160 |
other_walkers = [other.iter_entries_by_dir() |
|
161 |
for other in self._other_trees] |
|
162 |
other_entries = [self._step_one(walker) for walker in other_walkers] |
|
163 |
# Track extra nodes in the other trees
|
|
164 |
others_extra = [{} for _ in range(len(self._other_trees))] |
|
165 |
||
166 |
master_has_more = True |
|
167 |
step_one = self._step_one |
|
168 |
lookup_by_file_id = self._lookup_by_file_id |
|
169 |
out_of_order_processed = self._out_of_order_processed |
|
170 |
||
171 |
while master_has_more: |
|
172 |
(master_has_more, path, master_ie) = step_one(master_iterator) |
|
173 |
if not master_has_more: |
|
174 |
break
|
|
175 |
||
176 |
other_values = [] |
|
177 |
other_values_append = other_values.append |
|
178 |
next_other_entries = [] |
|
179 |
next_other_entries_append = next_other_entries.append |
|
180 |
for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries): |
|
181 |
if not other_has_more: |
|
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
182 |
other_values_append(self._lookup_by_master_path( |
183 |
others_extra[idx], self._other_trees[idx], path)) |
|
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
184 |
next_other_entries_append((False, None, None)) |
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
185 |
elif master_ie.file_id == other_ie.file_id: |
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
186 |
# This is the critical code path, as most of the entries
|
187 |
# should match between most trees.
|
|
188 |
other_values_append((other_path, other_ie)) |
|
189 |
next_other_entries_append(step_one(other_walkers[idx])) |
|
190 |
else: |
|
191 |
# This walker did not match, step it until it either
|
|
192 |
# matches, or we know we are past the current walker.
|
|
193 |
other_walker = other_walkers[idx] |
|
194 |
other_extra = others_extra[idx] |
|
195 |
while (other_has_more and |
|
196 |
self._lt_path_by_dirblock(other_path, path)): |
|
197 |
other_file_id = other_ie.file_id |
|
198 |
if other_file_id not in out_of_order_processed: |
|
199 |
other_extra[other_file_id] = (other_path, other_ie) |
|
200 |
other_has_more, other_path, other_ie = \ |
|
201 |
step_one(other_walker) |
|
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
202 |
if other_has_more and other_ie.file_id == master_ie.file_id: |
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
203 |
# We ended up walking to this point, match and step
|
204 |
# again
|
|
205 |
other_values_append((other_path, other_ie)) |
|
206 |
other_has_more, other_path, other_ie = \ |
|
207 |
step_one(other_walker) |
|
208 |
else: |
|
209 |
# This record isn't in the normal order, see if it
|
|
210 |
# exists at all.
|
|
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
211 |
other_values_append(self._lookup_by_master_path( |
212 |
other_extra, self._other_trees[idx], path)) |
|
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
213 |
next_other_entries_append((other_has_more, other_path, |
214 |
other_ie)) |
|
215 |
other_entries = next_other_entries |
|
216 |
||
217 |
# We've matched all the walkers, yield this datapoint
|
|
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
218 |
yield path, master_ie.file_id, master_ie, other_values |
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
219 |
self._other_walkers = other_walkers |
220 |
self._other_entries = other_entries |
|
221 |
self._others_extra = others_extra |
|
222 |
||
223 |
def _finish_others(self): |
|
224 |
"""Finish walking the other iterators, so we get all entries.""" |
|
225 |
for idx, info in enumerate(self._other_entries): |
|
226 |
other_extra = self._others_extra[idx] |
|
227 |
(other_has_more, other_path, other_ie) = info |
|
228 |
while other_has_more: |
|
229 |
other_file_id = other_ie.file_id |
|
230 |
if other_file_id not in self._out_of_order_processed: |
|
231 |
other_extra[other_file_id] = (other_path, other_ie) |
|
232 |
other_has_more, other_path, other_ie = \ |
|
233 |
self._step_one(self._other_walkers[idx]) |
|
234 |
del self._other_entries |
|
235 |
||
236 |
def _walk_others(self): |
|
237 |
"""Finish up by walking all the 'deferred' nodes.""" |
|
238 |
# TODO: One alternative would be to grab all possible unprocessed
|
|
239 |
# file_ids, and then sort by path, and then yield them. That
|
|
240 |
# might ensure better ordering, in case a caller strictly
|
|
241 |
# requires parents before children.
|
|
242 |
for idx, other_extra in enumerate(self._others_extra): |
|
7479.2.1
by Jelmer Vernooij
Drop python2 support. |
243 |
others = sorted(other_extra.values(), |
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
244 |
key=lambda x: self._path_to_key(x[0])) |
245 |
for other_path, other_ie in others: |
|
246 |
file_id = other_ie.file_id |
|
247 |
# We don't need to check out_of_order_processed here, because
|
|
248 |
# the lookup_by_file_id will be removing anything processed
|
|
249 |
# from the extras cache
|
|
250 |
other_extra.pop(file_id) |
|
251 |
other_values = [(None, None)] * idx |
|
252 |
other_values.append((other_path, other_ie)) |
|
253 |
for alt_idx, alt_extra in enumerate(self._others_extra[idx + 1:]): |
|
254 |
alt_idx = alt_idx + idx + 1 |
|
255 |
alt_extra = self._others_extra[alt_idx] |
|
256 |
alt_tree = self._other_trees[alt_idx] |
|
257 |
other_values.append(self._lookup_by_file_id( |
|
258 |
alt_extra, alt_tree, file_id)) |
|
259 |
yield other_path, file_id, None, other_values |