1
# Copyright (C) 2005-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Walk multiple trees simultaneously.
20
from __future__ import absolute_import
28
class MultiWalker(object):
29
"""Walk multiple trees simultaneously, getting combined results."""
31
# Note: This could be written to not assume you can do out-of-order
32
# lookups. Instead any nodes that don't match in all trees could be
33
# marked as 'deferred', and then returned in the final cleanup loop.
34
# For now, I think it is "nicer" to return things as close to the
35
# "master_tree" order as we can.
37
def __init__(self, master_tree, other_trees):
38
"""Create a new MultiWalker.
40
All trees being walked must implement "iter_entries_by_dir()", such
41
that they yield (path, object) tuples, where that object will have a
42
'.file_id' member, that can be used to check equality.
44
:param master_tree: All trees will be 'slaved' to the master_tree such
45
that nodes in master_tree will be used as 'first-pass' sync points.
46
Any nodes that aren't in master_tree will be merged in a second
48
:param other_trees: A list of other trees to walk simultaneously.
50
self._master_tree = master_tree
51
self._other_trees = other_trees
53
# Keep track of any nodes that were properly processed just out of
54
# order, that way we don't return them at the end, we don't have to
55
# track *all* processed file_ids, just the out-of-order ones
56
self._out_of_order_processed = set()
59
def _step_one(iterator):
60
"""Step an iter_entries_by_dir iterator.
62
:return: (has_more, path, ie)
63
If has_more is False, path and ie will be None.
66
path, ie = next(iterator)
68
return False, None, None
73
def _lt_path_by_dirblock(path1, path2):
74
"""Compare two paths based on what directory they are in.
76
This generates a sort order, such that all children of a directory are
77
sorted together, and grandchildren are in the same order as the
78
children appear. But all grandchildren come after all children.
80
:param path1: first path
81
:param path2: the second path
82
:return: negative number if ``path1`` comes first,
84
and a positive number if ``path2`` sorts first
86
# Shortcut this special case
89
# This is stolen from _dirstate_helpers_py.py, only switching it to
90
# Unicode objects. Consider using encode_utf8() and then using the
91
# optimized versions, or maybe writing optimized unicode versions.
92
if not isinstance(path1, str):
93
raise TypeError("'path1' must be a unicode string, not %s: %r"
94
% (type(path1), path1))
95
if not isinstance(path2, str):
96
raise TypeError("'path2' must be a unicode string, not %s: %r"
97
% (type(path2), path2))
98
return (MultiWalker._path_to_key(path1) <
99
MultiWalker._path_to_key(path2))
102
def _path_to_key(path):
103
dirname, basename = osutils.split(path)
104
return (dirname.split(u'/'), basename)
106
def _lookup_by_master_path(self, extra_entries, other_tree, master_path):
107
return self._lookup_by_file_id(
108
extra_entries, other_tree,
109
self._master_tree.path2id(master_path))
111
def _lookup_by_file_id(self, extra_entries, other_tree, file_id):
112
"""Lookup an inventory entry by file_id.
114
This is called when an entry is missing in the normal order.
115
Generally this is because a file was either renamed, or it was
116
deleted/added. If the entry was found in the inventory and not in
117
extra_entries, it will be added to self._out_of_order_processed
119
:param extra_entries: A dictionary of {file_id: (path, ie)}. This
120
should be filled with entries that were found before they were
121
used. If file_id is present, it will be removed from the
123
:param other_tree: The Tree to search, in case we didn't find the entry
125
:param file_id: The file_id to look for
126
:return: (path, ie) if found or (None, None) if not present.
128
if file_id in extra_entries:
129
return extra_entries.pop(file_id)
131
cur_path = other_tree.id2path(file_id)
132
except errors.NoSuchId:
137
self._out_of_order_processed.add(file_id)
138
cur_ie = next(other_tree.iter_entries_by_dir(
139
specific_files=[cur_path]))[1]
140
return (cur_path, cur_ie)
143
"""Match up the values in the different trees."""
144
for result in self._walk_master_tree():
146
self._finish_others()
147
for result in self._walk_others():
150
def _walk_master_tree(self):
151
"""First pass, walk all trees in lock-step.
153
When we are done, all nodes in the master_tree will have been
154
processed. _other_walkers, _other_entries, and _others_extra will be
155
set on 'self' for future processing.
157
# This iterator has the most "inlining" done, because it tends to touch
158
# every file in the tree, while the others only hit nodes that don't
160
master_iterator = self._master_tree.iter_entries_by_dir()
162
other_walkers = [other.iter_entries_by_dir()
163
for other in self._other_trees]
164
other_entries = [self._step_one(walker) for walker in other_walkers]
165
# Track extra nodes in the other trees
166
others_extra = [{} for _ in range(len(self._other_trees))]
168
master_has_more = True
169
step_one = self._step_one
170
lookup_by_file_id = self._lookup_by_file_id
171
out_of_order_processed = self._out_of_order_processed
173
while master_has_more:
174
(master_has_more, path, master_ie) = step_one(master_iterator)
175
if not master_has_more:
179
other_values_append = other_values.append
180
next_other_entries = []
181
next_other_entries_append = next_other_entries.append
182
for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
183
if not other_has_more:
184
other_values_append(self._lookup_by_master_path(
185
others_extra[idx], self._other_trees[idx], path))
186
next_other_entries_append((False, None, None))
187
elif master_ie.file_id == other_ie.file_id:
188
# This is the critical code path, as most of the entries
189
# should match between most trees.
190
other_values_append((other_path, other_ie))
191
next_other_entries_append(step_one(other_walkers[idx]))
193
# This walker did not match, step it until it either
194
# matches, or we know we are past the current walker.
195
other_walker = other_walkers[idx]
196
other_extra = others_extra[idx]
197
while (other_has_more and
198
self._lt_path_by_dirblock(other_path, path)):
199
other_file_id = other_ie.file_id
200
if other_file_id not in out_of_order_processed:
201
other_extra[other_file_id] = (other_path, other_ie)
202
other_has_more, other_path, other_ie = \
203
step_one(other_walker)
204
if other_has_more and other_ie.file_id == master_ie.file_id:
205
# We ended up walking to this point, match and step
207
other_values_append((other_path, other_ie))
208
other_has_more, other_path, other_ie = \
209
step_one(other_walker)
211
# This record isn't in the normal order, see if it
213
other_values_append(self._lookup_by_master_path(
214
other_extra, self._other_trees[idx], path))
215
next_other_entries_append((other_has_more, other_path,
217
other_entries = next_other_entries
219
# We've matched all the walkers, yield this datapoint
220
yield path, master_ie.file_id, master_ie, other_values
221
self._other_walkers = other_walkers
222
self._other_entries = other_entries
223
self._others_extra = others_extra
225
def _finish_others(self):
226
"""Finish walking the other iterators, so we get all entries."""
227
for idx, info in enumerate(self._other_entries):
228
other_extra = self._others_extra[idx]
229
(other_has_more, other_path, other_ie) = info
230
while other_has_more:
231
other_file_id = other_ie.file_id
232
if other_file_id not in self._out_of_order_processed:
233
other_extra[other_file_id] = (other_path, other_ie)
234
other_has_more, other_path, other_ie = \
235
self._step_one(self._other_walkers[idx])
236
del self._other_entries
238
def _walk_others(self):
239
"""Finish up by walking all the 'deferred' nodes."""
240
# TODO: One alternative would be to grab all possible unprocessed
241
# file_ids, and then sort by path, and then yield them. That
242
# might ensure better ordering, in case a caller strictly
243
# requires parents before children.
244
for idx, other_extra in enumerate(self._others_extra):
245
others = sorted(other_extra.values(),
246
key=lambda x: self._path_to_key(x[0]))
247
for other_path, other_ie in others:
248
file_id = other_ie.file_id
249
# We don't need to check out_of_order_processed here, because
250
# the lookup_by_file_id will be removing anything processed
251
# from the extras cache
252
other_extra.pop(file_id)
253
other_values = [(None, None)] * idx
254
other_values.append((other_path, other_ie))
255
for alt_idx, alt_extra in enumerate(self._others_extra[idx + 1:]):
256
alt_idx = alt_idx + idx + 1
257
alt_extra = self._others_extra[alt_idx]
258
alt_tree = self._other_trees[alt_idx]
259
other_values.append(self._lookup_by_file_id(
260
alt_extra, alt_tree, file_id))
261
yield other_path, file_id, None, other_values