bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
1 |
# Copyright (C) 2005-2011 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Walk multiple trees simultaneously.
|
|
18 |
"""
|
|
19 |
||
20 |
from __future__ import absolute_import |
|
21 |
||
22 |
from . import ( |
|
23 |
errors, |
|
24 |
osutils, |
|
25 |
)
|
|
26 |
||
27 |
from .sixish import ( |
|
28 |
text_type, |
|
29 |
viewvalues, |
|
30 |
)
|
|
31 |
||
32 |
||
33 |
class MultiWalker(object): |
|
34 |
"""Walk multiple trees simultaneously, getting combined results.""" |
|
35 |
||
36 |
# Note: This could be written to not assume you can do out-of-order
|
|
37 |
# lookups. Instead any nodes that don't match in all trees could be
|
|
38 |
# marked as 'deferred', and then returned in the final cleanup loop.
|
|
39 |
# For now, I think it is "nicer" to return things as close to the
|
|
40 |
# "master_tree" order as we can.
|
|
41 |
||
42 |
def __init__(self, master_tree, other_trees): |
|
43 |
"""Create a new MultiWalker. |
|
44 |
||
45 |
All trees being walked must implement "iter_entries_by_dir()", such
|
|
46 |
that they yield (path, object) tuples, where that object will have a
|
|
47 |
'.file_id' member, that can be used to check equality.
|
|
48 |
||
49 |
:param master_tree: All trees will be 'slaved' to the master_tree such
|
|
50 |
that nodes in master_tree will be used as 'first-pass' sync points.
|
|
51 |
Any nodes that aren't in master_tree will be merged in a second
|
|
52 |
pass.
|
|
53 |
:param other_trees: A list of other trees to walk simultaneously.
|
|
54 |
"""
|
|
55 |
self._master_tree = master_tree |
|
56 |
self._other_trees = other_trees |
|
57 |
||
58 |
# Keep track of any nodes that were properly processed just out of
|
|
59 |
# order, that way we don't return them at the end, we don't have to
|
|
60 |
# track *all* processed file_ids, just the out-of-order ones
|
|
61 |
self._out_of_order_processed = set() |
|
62 |
||
63 |
@staticmethod
|
|
64 |
def _step_one(iterator): |
|
65 |
"""Step an iter_entries_by_dir iterator. |
|
66 |
||
67 |
:return: (has_more, path, ie)
|
|
68 |
If has_more is False, path and ie will be None.
|
|
69 |
"""
|
|
70 |
try: |
|
71 |
path, ie = next(iterator) |
|
72 |
except StopIteration: |
|
73 |
return False, None, None |
|
74 |
else: |
|
75 |
return True, path, ie |
|
76 |
||
77 |
@staticmethod
|
|
78 |
def _lt_path_by_dirblock(path1, path2): |
|
79 |
"""Compare two paths based on what directory they are in. |
|
80 |
||
81 |
This generates a sort order, such that all children of a directory are
|
|
82 |
sorted together, and grandchildren are in the same order as the
|
|
83 |
children appear. But all grandchildren come after all children.
|
|
84 |
||
85 |
:param path1: first path
|
|
86 |
:param path2: the second path
|
|
87 |
:return: negative number if ``path1`` comes first,
|
|
88 |
0 if paths are equal
|
|
89 |
and a positive number if ``path2`` sorts first
|
|
90 |
"""
|
|
91 |
# Shortcut this special case
|
|
92 |
if path1 == path2: |
|
93 |
return False |
|
94 |
# This is stolen from _dirstate_helpers_py.py, only switching it to
|
|
95 |
# Unicode objects. Consider using encode_utf8() and then using the
|
|
96 |
# optimized versions, or maybe writing optimized unicode versions.
|
|
97 |
if not isinstance(path1, text_type): |
|
98 |
raise TypeError("'path1' must be a unicode string, not %s: %r" |
|
99 |
% (type(path1), path1)) |
|
100 |
if not isinstance(path2, text_type): |
|
101 |
raise TypeError("'path2' must be a unicode string, not %s: %r" |
|
102 |
% (type(path2), path2)) |
|
103 |
return (MultiWalker._path_to_key(path1) < |
|
104 |
MultiWalker._path_to_key(path2)) |
|
105 |
||
106 |
@staticmethod
|
|
107 |
def _path_to_key(path): |
|
108 |
dirname, basename = osutils.split(path) |
|
109 |
return (dirname.split(u'/'), basename) |
|
110 |
||
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
111 |
def _lookup_by_master_path(self, extra_entries, other_tree, master_path): |
112 |
return self._lookup_by_file_id( |
|
113 |
extra_entries, other_tree, |
|
114 |
self._master_tree.path2id(master_path)) |
|
115 |
||
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
116 |
def _lookup_by_file_id(self, extra_entries, other_tree, file_id): |
117 |
"""Lookup an inventory entry by file_id. |
|
118 |
||
119 |
This is called when an entry is missing in the normal order.
|
|
120 |
Generally this is because a file was either renamed, or it was
|
|
121 |
deleted/added. If the entry was found in the inventory and not in
|
|
122 |
extra_entries, it will be added to self._out_of_order_processed
|
|
123 |
||
124 |
:param extra_entries: A dictionary of {file_id: (path, ie)}. This
|
|
125 |
should be filled with entries that were found before they were
|
|
126 |
used. If file_id is present, it will be removed from the
|
|
127 |
dictionary.
|
|
128 |
:param other_tree: The Tree to search, in case we didn't find the entry
|
|
129 |
yet.
|
|
130 |
:param file_id: The file_id to look for
|
|
131 |
:return: (path, ie) if found or (None, None) if not present.
|
|
132 |
"""
|
|
133 |
if file_id in extra_entries: |
|
134 |
return extra_entries.pop(file_id) |
|
135 |
try: |
|
136 |
cur_path = other_tree.id2path(file_id) |
|
137 |
except errors.NoSuchId: |
|
138 |
cur_path = None |
|
139 |
if cur_path is None: |
|
140 |
return (None, None) |
|
141 |
else: |
|
142 |
self._out_of_order_processed.add(file_id) |
|
7296.7.5
by Jelmer Vernooij
Avoid use of inventory. |
143 |
cur_ie = next(other_tree.iter_entries_by_dir( |
144 |
specific_files=[cur_path]))[1] |
|
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
145 |
return (cur_path, cur_ie) |
146 |
||
147 |
def iter_all(self): |
|
148 |
"""Match up the values in the different trees.""" |
|
149 |
for result in self._walk_master_tree(): |
|
150 |
yield result |
|
151 |
self._finish_others() |
|
152 |
for result in self._walk_others(): |
|
153 |
yield result |
|
154 |
||
155 |
def _walk_master_tree(self): |
|
156 |
"""First pass, walk all trees in lock-step. |
|
157 |
||
158 |
When we are done, all nodes in the master_tree will have been
|
|
159 |
processed. _other_walkers, _other_entries, and _others_extra will be
|
|
160 |
set on 'self' for future processing.
|
|
161 |
"""
|
|
162 |
# This iterator has the most "inlining" done, because it tends to touch
|
|
163 |
# every file in the tree, while the others only hit nodes that don't
|
|
164 |
# match.
|
|
165 |
master_iterator = self._master_tree.iter_entries_by_dir() |
|
166 |
||
167 |
other_walkers = [other.iter_entries_by_dir() |
|
168 |
for other in self._other_trees] |
|
169 |
other_entries = [self._step_one(walker) for walker in other_walkers] |
|
170 |
# Track extra nodes in the other trees
|
|
171 |
others_extra = [{} for _ in range(len(self._other_trees))] |
|
172 |
||
173 |
master_has_more = True |
|
174 |
step_one = self._step_one |
|
175 |
lookup_by_file_id = self._lookup_by_file_id |
|
176 |
out_of_order_processed = self._out_of_order_processed |
|
177 |
||
178 |
while master_has_more: |
|
179 |
(master_has_more, path, master_ie) = step_one(master_iterator) |
|
180 |
if not master_has_more: |
|
181 |
break
|
|
182 |
||
183 |
other_values = [] |
|
184 |
other_values_append = other_values.append |
|
185 |
next_other_entries = [] |
|
186 |
next_other_entries_append = next_other_entries.append |
|
187 |
for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries): |
|
188 |
if not other_has_more: |
|
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
189 |
other_values_append(self._lookup_by_master_path( |
190 |
others_extra[idx], self._other_trees[idx], path)) |
|
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
191 |
next_other_entries_append((False, None, None)) |
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
192 |
elif master_ie.file_id == other_ie.file_id: |
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
193 |
# This is the critical code path, as most of the entries
|
194 |
# should match between most trees.
|
|
195 |
other_values_append((other_path, other_ie)) |
|
196 |
next_other_entries_append(step_one(other_walkers[idx])) |
|
197 |
else: |
|
198 |
# This walker did not match, step it until it either
|
|
199 |
# matches, or we know we are past the current walker.
|
|
200 |
other_walker = other_walkers[idx] |
|
201 |
other_extra = others_extra[idx] |
|
202 |
while (other_has_more and |
|
203 |
self._lt_path_by_dirblock(other_path, path)): |
|
204 |
other_file_id = other_ie.file_id |
|
205 |
if other_file_id not in out_of_order_processed: |
|
206 |
other_extra[other_file_id] = (other_path, other_ie) |
|
207 |
other_has_more, other_path, other_ie = \ |
|
208 |
step_one(other_walker) |
|
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
209 |
if other_has_more and other_ie.file_id == master_ie.file_id: |
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
210 |
# We ended up walking to this point, match and step
|
211 |
# again
|
|
212 |
other_values_append((other_path, other_ie)) |
|
213 |
other_has_more, other_path, other_ie = \ |
|
214 |
step_one(other_walker) |
|
215 |
else: |
|
216 |
# This record isn't in the normal order, see if it
|
|
217 |
# exists at all.
|
|
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
218 |
other_values_append(self._lookup_by_master_path( |
219 |
other_extra, self._other_trees[idx], path)) |
|
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
220 |
next_other_entries_append((other_has_more, other_path, |
221 |
other_ie)) |
|
222 |
other_entries = next_other_entries |
|
223 |
||
224 |
# We've matched all the walkers, yield this datapoint
|
|
7296.7.4
by Jelmer Vernooij
Refactor multiwalker. |
225 |
yield path, master_ie.file_id, master_ie, other_values |
7296.7.1
by Jelmer Vernooij
Split out MultiWalker. |
226 |
self._other_walkers = other_walkers |
227 |
self._other_entries = other_entries |
|
228 |
self._others_extra = others_extra |
|
229 |
||
230 |
def _finish_others(self): |
|
231 |
"""Finish walking the other iterators, so we get all entries.""" |
|
232 |
for idx, info in enumerate(self._other_entries): |
|
233 |
other_extra = self._others_extra[idx] |
|
234 |
(other_has_more, other_path, other_ie) = info |
|
235 |
while other_has_more: |
|
236 |
other_file_id = other_ie.file_id |
|
237 |
if other_file_id not in self._out_of_order_processed: |
|
238 |
other_extra[other_file_id] = (other_path, other_ie) |
|
239 |
other_has_more, other_path, other_ie = \ |
|
240 |
self._step_one(self._other_walkers[idx]) |
|
241 |
del self._other_entries |
|
242 |
||
243 |
def _walk_others(self): |
|
244 |
"""Finish up by walking all the 'deferred' nodes.""" |
|
245 |
# TODO: One alternative would be to grab all possible unprocessed
|
|
246 |
# file_ids, and then sort by path, and then yield them. That
|
|
247 |
# might ensure better ordering, in case a caller strictly
|
|
248 |
# requires parents before children.
|
|
249 |
for idx, other_extra in enumerate(self._others_extra): |
|
250 |
others = sorted(viewvalues(other_extra), |
|
251 |
key=lambda x: self._path_to_key(x[0])) |
|
252 |
for other_path, other_ie in others: |
|
253 |
file_id = other_ie.file_id |
|
254 |
# We don't need to check out_of_order_processed here, because
|
|
255 |
# the lookup_by_file_id will be removing anything processed
|
|
256 |
# from the extras cache
|
|
257 |
other_extra.pop(file_id) |
|
258 |
other_values = [(None, None)] * idx |
|
259 |
other_values.append((other_path, other_ie)) |
|
260 |
for alt_idx, alt_extra in enumerate(self._others_extra[idx + 1:]): |
|
261 |
alt_idx = alt_idx + idx + 1 |
|
262 |
alt_extra = self._others_extra[alt_idx] |
|
263 |
alt_tree = self._other_trees[alt_idx] |
|
264 |
other_values.append(self._lookup_by_file_id( |
|
265 |
alt_extra, alt_tree, file_id)) |
|
266 |
yield other_path, file_id, None, other_values |