771
688
# it works for all trees.
691
def _changes_from_entries(self, source_entry, target_entry, source_path,
693
"""Generate a iter_changes tuple between source_entry and target_entry.
695
:param source_entry: An inventory entry from self.source, or None.
696
:param target_entry: An inventory entry from self.target, or None.
697
:param source_path: The path of source_entry.
698
:param target_path: The path of target_entry.
699
:return: A tuple, item 0 of which is an iter_changes result tuple, and
700
item 1 is True if there are any changes in the result tuple.
702
if source_entry is None:
703
if target_entry is None:
705
file_id = target_entry.file_id
707
file_id = source_entry.file_id
708
if source_entry is not None:
709
source_versioned = True
710
source_name = source_entry.name
711
source_parent = source_entry.parent_id
712
source_kind, source_executable, source_stat = \
713
self.source._comparison_data(source_entry, source_path)
715
source_versioned = False
719
source_executable = None
720
if target_entry is not None:
721
target_versioned = True
722
target_name = target_entry.name
723
target_parent = target_entry.parent_id
724
target_kind, target_executable, target_stat = \
725
self.target._comparison_data(target_entry, target_path)
727
target_versioned = False
731
target_executable = None
732
versioned = (source_versioned, target_versioned)
733
kind = (source_kind, target_kind)
734
changed_content = False
735
if source_kind != target_kind:
736
changed_content = True
737
elif source_kind == 'file':
738
if not self.file_content_matches(
739
source_path, target_path,
740
source_stat, target_stat):
741
changed_content = True
742
elif source_kind == 'symlink':
743
if (self.source.get_symlink_target(source_path) !=
744
self.target.get_symlink_target(target_path)):
745
changed_content = True
746
elif source_kind == 'tree-reference':
747
if (self.source.get_reference_revision(source_path)
748
!= self.target.get_reference_revision(target_path)):
749
changed_content = True
750
parent = (source_parent, target_parent)
751
name = (source_name, target_name)
752
executable = (source_executable, target_executable)
753
if (changed_content is not False or versioned[0] != versioned[1] or
754
parent[0] != parent[1] or name[0] != name[1] or
755
executable[0] != executable[1]):
759
return (file_id, (source_path, target_path), changed_content,
760
versioned, parent, name, kind, executable), changes
774
762
def compare(self, want_unchanged=False, specific_files=None,
775
763
extra_trees=None, require_versioned=False, include_root=False,
776
764
want_unversioned=False):
836
823
output. An unversioned file is defined as one with (False, False)
837
824
for the versioned pair.
839
raise NotImplementedError(self.iter_changes)
829
extra_trees = list(extra_trees)
830
# The ids of items we need to examine to insure delta consistency.
831
precise_file_ids = set()
832
changed_file_ids = []
833
if specific_files == []:
834
target_specific_files = []
835
source_specific_files = []
837
target_specific_files = self.target.find_related_paths_across_trees(
838
specific_files, [self.source] + extra_trees,
839
require_versioned=require_versioned)
840
source_specific_files = self.source.find_related_paths_across_trees(
841
specific_files, [self.target] + extra_trees,
842
require_versioned=require_versioned)
843
if specific_files is not None:
844
# reparented or added entries must have their parents included
845
# so that valid deltas can be created. The seen_parents set
846
# tracks the parents that we need to have.
847
# The seen_dirs set tracks directory entries we've yielded.
848
# After outputting version object in to_entries we set difference
849
# the two seen sets and start checking parents.
853
all_unversioned = sorted([(p.split('/'), p) for p in
855
if specific_files is None or
856
osutils.is_inside_any(specific_files, p)])
857
all_unversioned = deque(all_unversioned)
859
all_unversioned = deque()
861
from_entries_by_dir = list(self.source.iter_entries_by_dir(
862
specific_files=source_specific_files))
863
from_data = dict((e.file_id, (p, e)) for p, e in from_entries_by_dir)
864
to_entries_by_dir = list(self.target.iter_entries_by_dir(
865
specific_files=target_specific_files))
866
num_entries = len(from_entries_by_dir) + len(to_entries_by_dir)
868
# the unversioned path lookup only occurs on real trees - where there
869
# can be extras. So the fake_entry is solely used to look up
870
# executable it values when execute is not supported.
871
fake_entry = TreeFile()
872
for target_path, target_entry in to_entries_by_dir:
873
while (all_unversioned and
874
all_unversioned[0][0] < target_path.split('/')):
875
unversioned_path = all_unversioned.popleft()
876
target_kind, target_executable, target_stat = \
877
self.target._comparison_data(
878
fake_entry, unversioned_path[1])
879
yield (None, (None, unversioned_path[1]), True, (False, False),
881
(None, unversioned_path[0][-1]),
883
(None, target_executable))
884
source_path, source_entry = from_data.get(target_entry.file_id,
886
result, changes = self._changes_from_entries(source_entry,
887
target_entry, source_path=source_path, target_path=target_path)
888
to_paths[result[0]] = result[1][1]
893
pb.update('comparing files', entry_count, num_entries)
894
if changes or include_unchanged:
895
if specific_files is not None:
896
new_parent_id = result[4][1]
897
precise_file_ids.add(new_parent_id)
898
changed_file_ids.append(result[0])
900
# Ensure correct behaviour for reparented/added specific files.
901
if specific_files is not None:
903
if result[6][1] == 'directory':
904
seen_dirs.add(result[0])
905
# Record parents of reparented/added entries.
906
versioned = result[3]
908
if not versioned[0] or parents[0] != parents[1]:
909
seen_parents.add(parents[1])
910
while all_unversioned:
911
# yield any trailing unversioned paths
912
unversioned_path = all_unversioned.popleft()
913
to_kind, to_executable, to_stat = \
914
self.target._comparison_data(fake_entry, unversioned_path[1])
915
yield (None, (None, unversioned_path[1]), True, (False, False),
917
(None, unversioned_path[0][-1]),
919
(None, to_executable))
920
# Yield all remaining source paths
921
for path, from_entry in from_entries_by_dir:
922
file_id = from_entry.file_id
923
if file_id in to_paths:
926
to_path = find_previous_path(self.source, self.target, path)
929
pb.update('comparing files', entry_count, num_entries)
930
versioned = (True, False)
931
parent = (from_entry.parent_id, None)
932
name = (from_entry.name, None)
933
from_kind, from_executable, stat_value = \
934
self.source._comparison_data(from_entry, path)
935
kind = (from_kind, None)
936
executable = (from_executable, None)
937
changed_content = from_kind is not None
938
# the parent's path is necessarily known at this point.
939
changed_file_ids.append(file_id)
940
yield(file_id, (path, to_path), changed_content, versioned, parent,
941
name, kind, executable)
942
changed_file_ids = set(changed_file_ids)
943
if specific_files is not None:
944
for result in self._handle_precise_ids(precise_file_ids,
948
def _get_entry(self, tree, path):
949
"""Get an inventory entry from a tree, with missing entries as None.
951
If the tree raises NotImplementedError on accessing .inventory, then
952
this is worked around using iter_entries_by_dir on just the file id
955
:param tree: The tree to lookup the entry in.
956
:param path: The path to look up
958
# No inventory available.
960
iterator = tree.iter_entries_by_dir(specific_files=[path])
961
return next(iterator)[1]
962
except StopIteration:
965
def _handle_precise_ids(self, precise_file_ids, changed_file_ids,
966
discarded_changes=None):
967
"""Fill out a partial iter_changes to be consistent.
969
:param precise_file_ids: The file ids of parents that were seen during
971
:param changed_file_ids: The file ids of already emitted items.
972
:param discarded_changes: An optional dict of precalculated
973
iter_changes items which the partial iter_changes had not output
975
:return: A generator of iter_changes items to output.
977
# process parents of things that had changed under the users
978
# requested paths to prevent incorrect paths or parent ids which
979
# aren't in the tree.
980
while precise_file_ids:
981
precise_file_ids.discard(None)
982
# Don't emit file_ids twice
983
precise_file_ids.difference_update(changed_file_ids)
984
if not precise_file_ids:
986
# If the there was something at a given output path in source, we
987
# have to include the entry from source in the delta, or we would
988
# be putting this entry into a used path.
990
for parent_id in precise_file_ids:
992
paths.append(self.target.id2path(parent_id))
993
except errors.NoSuchId:
994
# This id has been dragged in from the source by delta
995
# expansion and isn't present in target at all: we don't
996
# need to check for path collisions on it.
999
old_id = self.source.path2id(path)
1000
precise_file_ids.add(old_id)
1001
precise_file_ids.discard(None)
1002
current_ids = precise_file_ids
1003
precise_file_ids = set()
1004
# We have to emit all of precise_file_ids that have been altered.
1005
# We may have to output the children of some of those ids if any
1006
# directories have stopped being directories.
1007
for file_id in current_ids:
1009
if discarded_changes:
1010
result = discarded_changes.get(file_id)
1016
source_path = self.source.id2path(file_id)
1017
except errors.NoSuchId:
1021
source_entry = self._get_entry(
1022
self.source, source_path)
1024
target_path = self.target.id2path(file_id)
1025
except errors.NoSuchId:
1029
target_entry = self._get_entry(
1030
self.target, target_path)
1031
result, changes = self._changes_from_entries(
1032
source_entry, target_entry, source_path, target_path)
1035
# Get this parents parent to examine.
1036
new_parent_id = result[4][1]
1037
precise_file_ids.add(new_parent_id)
1039
if (result[6][0] == 'directory' and
1040
result[6][1] != 'directory'):
1041
# This stopped being a directory, the old children have
1043
if source_entry is None:
1044
# Reusing a discarded change.
1045
source_entry = self._get_entry(
1046
self.source, result[1][0])
1047
precise_file_ids.update(
1049
for child in self.source.iter_child_entries(result[1][0]))
1050
changed_file_ids.add(result[0])
841
1053
def file_content_matches(
842
1054
self, source_path, target_path,
863
1077
# Fall back to SHA1 for now
864
1078
if source_verifier_kind != "SHA1":
865
1079
source_sha1 = self.source.get_file_sha1(
866
source_path, source_stat)
1080
source_path, source_file_id, source_stat)
868
1082
source_sha1 = source_verifier_data
869
1083
if target_verifier_kind != "SHA1":
870
1084
target_sha1 = self.target.get_file_sha1(
871
target_path, target_stat)
1085
target_path, target_file_id, target_stat)
873
1087
target_sha1 = target_verifier_data
874
1088
return (source_sha1 == target_sha1)
876
def find_target_path(self, path, recurse='none'):
877
"""Find target tree path.
879
:param path: Path to search for (exists in source)
880
:return: path in target, or None if there is no equivalent path.
881
:raise NoSuchFile: If the path doesn't exist in source
883
raise NotImplementedError(self.find_target_path)
885
def find_source_path(self, path, recurse='none'):
886
"""Find the source tree path.
888
:param path: Path to search for (exists in target)
889
:return: path in source, or None if there is no equivalent path.
890
:raise NoSuchFile: if the path doesn't exist in target
892
raise NotImplementedError(self.find_source_path)
894
def find_target_paths(self, paths, recurse='none'):
895
"""Find target tree paths.
897
:param paths: Iterable over paths in target to search for
898
:return: Dictionary mapping from source paths to paths in target , or
899
None if there is no equivalent path.
903
ret[path] = self.find_target_path(path, recurse=recurse)
906
def find_source_paths(self, paths, recurse='none'):
907
"""Find source tree paths.
909
:param paths: Iterable over paths in target to search for
910
:return: Dictionary mapping from target paths to paths in source, or
911
None if there is no equivalent path.
915
ret[path] = self.find_source_path(path, recurse=recurse)
919
def find_previous_paths(from_tree, to_tree, paths, recurse='none'):
1091
InterTree.register_optimiser(InterTree)
1094
class MultiWalker(object):
1095
"""Walk multiple trees simultaneously, getting combined results."""
1097
# Note: This could be written to not assume you can do out-of-order
1098
# lookups. Instead any nodes that don't match in all trees could be
1099
# marked as 'deferred', and then returned in the final cleanup loop.
1100
# For now, I think it is "nicer" to return things as close to the
1101
# "master_tree" order as we can.
1103
def __init__(self, master_tree, other_trees):
1104
"""Create a new MultiWalker.
1106
All trees being walked must implement "iter_entries_by_dir()", such
1107
that they yield (path, object) tuples, where that object will have a
1108
'.file_id' member, that can be used to check equality.
1110
:param master_tree: All trees will be 'slaved' to the master_tree such
1111
that nodes in master_tree will be used as 'first-pass' sync points.
1112
Any nodes that aren't in master_tree will be merged in a second
1114
:param other_trees: A list of other trees to walk simultaneously.
1116
self._master_tree = master_tree
1117
self._other_trees = other_trees
1119
# Keep track of any nodes that were properly processed just out of
1120
# order, that way we don't return them at the end, we don't have to
1121
# track *all* processed file_ids, just the out-of-order ones
1122
self._out_of_order_processed = set()
1125
def _step_one(iterator):
1126
"""Step an iter_entries_by_dir iterator.
1128
:return: (has_more, path, ie)
1129
If has_more is False, path and ie will be None.
1132
path, ie = next(iterator)
1133
except StopIteration:
1134
return False, None, None
1136
return True, path, ie
1139
def _lt_path_by_dirblock(path1, path2):
1140
"""Compare two paths based on what directory they are in.
1142
This generates a sort order, such that all children of a directory are
1143
sorted together, and grandchildren are in the same order as the
1144
children appear. But all grandchildren come after all children.
1146
:param path1: first path
1147
:param path2: the second path
1148
:return: negative number if ``path1`` comes first,
1149
0 if paths are equal
1150
and a positive number if ``path2`` sorts first
1152
# Shortcut this special case
1155
# This is stolen from _dirstate_helpers_py.py, only switching it to
1156
# Unicode objects. Consider using encode_utf8() and then using the
1157
# optimized versions, or maybe writing optimized unicode versions.
1158
if not isinstance(path1, text_type):
1159
raise TypeError("'path1' must be a unicode string, not %s: %r"
1160
% (type(path1), path1))
1161
if not isinstance(path2, text_type):
1162
raise TypeError("'path2' must be a unicode string, not %s: %r"
1163
% (type(path2), path2))
1164
return (MultiWalker._path_to_key(path1) <
1165
MultiWalker._path_to_key(path2))
1168
def _path_to_key(path):
1169
dirname, basename = osutils.split(path)
1170
return (dirname.split(u'/'), basename)
1172
def _lookup_by_file_id(self, extra_entries, other_tree, file_id):
1173
"""Lookup an inventory entry by file_id.
1175
This is called when an entry is missing in the normal order.
1176
Generally this is because a file was either renamed, or it was
1177
deleted/added. If the entry was found in the inventory and not in
1178
extra_entries, it will be added to self._out_of_order_processed
1180
:param extra_entries: A dictionary of {file_id: (path, ie)}. This
1181
should be filled with entries that were found before they were
1182
used. If file_id is present, it will be removed from the
1184
:param other_tree: The Tree to search, in case we didn't find the entry
1186
:param file_id: The file_id to look for
1187
:return: (path, ie) if found or (None, None) if not present.
1189
if file_id in extra_entries:
1190
return extra_entries.pop(file_id)
1191
# TODO: Is id2path better as the first call, or is
1192
# inventory[file_id] better as a first check?
1194
cur_path = other_tree.id2path(file_id)
1195
except errors.NoSuchId:
1197
if cur_path is None:
1200
self._out_of_order_processed.add(file_id)
1201
cur_ie = other_tree.root_inventory.get_entry(file_id)
1202
return (cur_path, cur_ie)
1205
"""Match up the values in the different trees."""
1206
for result in self._walk_master_tree():
1208
self._finish_others()
1209
for result in self._walk_others():
1212
def _walk_master_tree(self):
1213
"""First pass, walk all trees in lock-step.
1215
When we are done, all nodes in the master_tree will have been
1216
processed. _other_walkers, _other_entries, and _others_extra will be
1217
set on 'self' for future processing.
1219
# This iterator has the most "inlining" done, because it tends to touch
1220
# every file in the tree, while the others only hit nodes that don't
1222
master_iterator = self._master_tree.iter_entries_by_dir()
1224
other_walkers = [other.iter_entries_by_dir()
1225
for other in self._other_trees]
1226
other_entries = [self._step_one(walker) for walker in other_walkers]
1227
# Track extra nodes in the other trees
1228
others_extra = [{} for _ in range(len(self._other_trees))]
1230
master_has_more = True
1231
step_one = self._step_one
1232
lookup_by_file_id = self._lookup_by_file_id
1233
out_of_order_processed = self._out_of_order_processed
1235
while master_has_more:
1236
(master_has_more, path, master_ie) = step_one(master_iterator)
1237
if not master_has_more:
1240
file_id = master_ie.file_id
1242
other_values_append = other_values.append
1243
next_other_entries = []
1244
next_other_entries_append = next_other_entries.append
1245
for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
1246
if not other_has_more:
1247
other_values_append(lookup_by_file_id(
1248
others_extra[idx], self._other_trees[idx], file_id))
1249
next_other_entries_append((False, None, None))
1250
elif file_id == other_ie.file_id:
1251
# This is the critical code path, as most of the entries
1252
# should match between most trees.
1253
other_values_append((other_path, other_ie))
1254
next_other_entries_append(step_one(other_walkers[idx]))
1256
# This walker did not match, step it until it either
1257
# matches, or we know we are past the current walker.
1258
other_walker = other_walkers[idx]
1259
other_extra = others_extra[idx]
1260
while (other_has_more and
1261
self._lt_path_by_dirblock(other_path, path)):
1262
other_file_id = other_ie.file_id
1263
if other_file_id not in out_of_order_processed:
1264
other_extra[other_file_id] = (other_path, other_ie)
1265
other_has_more, other_path, other_ie = \
1266
step_one(other_walker)
1267
if other_has_more and other_ie.file_id == file_id:
1268
# We ended up walking to this point, match and step
1270
other_values_append((other_path, other_ie))
1271
other_has_more, other_path, other_ie = \
1272
step_one(other_walker)
1274
# This record isn't in the normal order, see if it
1276
other_values_append(lookup_by_file_id(
1277
other_extra, self._other_trees[idx], file_id))
1278
next_other_entries_append((other_has_more, other_path,
1280
other_entries = next_other_entries
1282
# We've matched all the walkers, yield this datapoint
1283
yield path, file_id, master_ie, other_values
1284
self._other_walkers = other_walkers
1285
self._other_entries = other_entries
1286
self._others_extra = others_extra
1288
def _finish_others(self):
1289
"""Finish walking the other iterators, so we get all entries."""
1290
for idx, info in enumerate(self._other_entries):
1291
other_extra = self._others_extra[idx]
1292
(other_has_more, other_path, other_ie) = info
1293
while other_has_more:
1294
other_file_id = other_ie.file_id
1295
if other_file_id not in self._out_of_order_processed:
1296
other_extra[other_file_id] = (other_path, other_ie)
1297
other_has_more, other_path, other_ie = \
1298
self._step_one(self._other_walkers[idx])
1299
del self._other_entries
1301
def _walk_others(self):
1302
"""Finish up by walking all the 'deferred' nodes."""
1303
# TODO: One alternative would be to grab all possible unprocessed
1304
# file_ids, and then sort by path, and then yield them. That
1305
# might ensure better ordering, in case a caller strictly
1306
# requires parents before children.
1307
for idx, other_extra in enumerate(self._others_extra):
1308
others = sorted(viewvalues(other_extra),
1309
key=lambda x: self._path_to_key(x[0]))
1310
for other_path, other_ie in others:
1311
file_id = other_ie.file_id
1312
# We don't need to check out_of_order_processed here, because
1313
# the lookup_by_file_id will be removing anything processed
1314
# from the extras cache
1315
other_extra.pop(file_id)
1316
other_values = [(None, None)] * idx
1317
other_values.append((other_path, other_ie))
1318
for alt_idx, alt_extra in enumerate(self._others_extra[idx + 1:]):
1319
alt_idx = alt_idx + idx + 1
1320
alt_extra = self._others_extra[alt_idx]
1321
alt_tree = self._other_trees[alt_idx]
1322
other_values.append(self._lookup_by_file_id(
1323
alt_extra, alt_tree, file_id))
1324
yield other_path, file_id, None, other_values
1327
def find_previous_paths(from_tree, to_tree, paths):
920
1328
"""Find previous tree paths.
922
1330
:param from_tree: From tree
923
1331
:param to_tree: To tree
924
:param paths: Iterable over paths in from_tree to search for
1332
:param paths: Iterable over paths to search for
925
1333
:return: Dictionary mapping from from_tree paths to paths in to_tree, or
926
1334
None if there is no equivalent path.
928
return InterTree.get(to_tree, from_tree).find_source_paths(paths, recurse=recurse)
931
def find_previous_path(from_tree, to_tree, path, recurse='none'):
1338
ret[path] = find_previous_path(from_tree, to_tree, path)
1342
def find_previous_path(from_tree, to_tree, path, file_id=None):
932
1343
"""Find previous tree path.
934
1345
:param from_tree: From tree
935
1346
:param to_tree: To tree
936
:param path: Path to search for (exists in from_tree)
1347
:param path: Path to search for
937
1348
:return: path in to_tree, or None if there is no equivalent path.
938
:raise NoSuchFile: If the path doesn't exist in from_tree
940
return InterTree.get(to_tree, from_tree).find_source_path(
941
path, recurse=recurse)
1351
file_id = from_tree.path2id(path)
1353
raise errors.NoSuchFile(path)
1355
return to_tree.id2path(file_id)
1356
except errors.NoSuchId:
944
1360
def get_canonical_path(tree, path, normalize):