427
310
raise NotImplementedError(self._comparison_data)
429
def get_file(self, path):
430
"""Return a file object for the file path in the tree.
312
def _file_size(self, entry, stat_value):
313
raise NotImplementedError(self._file_size)
315
def get_file(self, path, file_id=None):
316
"""Return a file object for the file file_id in the tree.
318
If both file_id and path are defined, it is implementation defined as
319
to which one is used.
432
321
raise NotImplementedError(self.get_file)
434
def get_file_with_stat(self, path):
435
"""Get a file handle and stat object for path.
323
def get_file_with_stat(self, path, file_id=None):
324
"""Get a file handle and stat object for file_id.
437
326
The default implementation returns (self.get_file, None) for backwards
440
329
:param path: The path of the file.
330
:param file_id: The file id to read, if it is known.
441
331
:return: A tuple (file_handle, stat_value_or_None). If the tree has
442
332
no stat facility, or need for a stat cache feedback during commit,
443
333
it may return None for the second element of the tuple.
445
return (self.get_file(path), None)
335
return (self.get_file(path, file_id), None)
447
def get_file_text(self, path):
337
def get_file_text(self, path, file_id=None):
448
338
"""Return the byte content of a file.
450
340
:param path: The path of the file.
341
:param file_id: The file_id of the file.
343
If both file_id and path are supplied, an implementation may use
452
346
:returns: A single byte string for the whole file.
454
with self.get_file(path) as my_file:
348
my_file = self.get_file(path, file_id)
455
350
return my_file.read()
457
def get_file_lines(self, path):
354
def get_file_lines(self, path, file_id=None):
458
355
"""Return the content of a file, as lines.
460
357
:param path: The path of the file.
358
:param file_id: The file_id of the file.
360
If both file_id and path are supplied, an implementation may use
462
return osutils.split_lines(self.get_file_text(path))
363
return osutils.split_lines(self.get_file_text(path, file_id))
464
def get_file_verifier(self, path, stat_value=None):
365
def get_file_verifier(self, path, file_id=None, stat_value=None):
465
366
"""Return a verifier for a file.
467
368
The default implementation returns a sha1.
370
:param file_id: The handle for this file.
469
371
:param path: The path that this file can be found at.
470
372
These must point to the same object.
471
373
:param stat_value: Optional stat value for the object
472
374
:return: Tuple with verifier name and verifier data
474
return ("SHA1", self.get_file_sha1(path, stat_value=stat_value))
376
return ("SHA1", self.get_file_sha1(path, file_id,
377
stat_value=stat_value))
476
def get_file_sha1(self, path, stat_value=None):
379
def get_file_sha1(self, path, file_id=None, stat_value=None):
477
380
"""Return the SHA1 file for a file.
479
382
:note: callers should use get_file_verifier instead
523
433
this implementation, it is a tuple containing a single bytestring with
524
434
the complete text of the file.
526
:param desired_files: a list of (path, identifier) pairs
436
:param desired_files: a list of (file_id, identifier) pairs
528
for path, identifier in desired_files:
438
for file_id, identifier in desired_files:
529
439
# We wrap the string in a tuple so that we can return an iterable
530
440
# of bytestrings. (Technically, a bytestring is also an iterable
531
441
# of bytestrings, but iterating through each character is not
533
cur_file = (self.get_file_text(path),)
443
# TODO(jelmer): Pass paths into iter_files_bytes
444
path = self.id2path(file_id)
445
cur_file = (self.get_file_text(path, file_id),)
534
446
yield identifier, cur_file
536
def get_symlink_target(self, path):
537
"""Get the target for a given path.
448
def get_symlink_target(self, path, file_id=None):
449
"""Get the target for a given file_id.
539
It is assumed that the caller already knows that path is referencing
451
It is assumed that the caller already knows that file_id is referencing
453
:param file_id: Handle for the symlink entry.
541
454
:param path: The path of the file.
455
If both file_id and path are supplied, an implementation may use
542
457
:return: The path the symlink points to.
544
459
raise NotImplementedError(self.get_symlink_target)
546
def annotate_iter(self, path,
461
def get_root_id(self):
462
"""Return the file_id for the root of this tree."""
463
raise NotImplementedError(self.get_root_id)
465
def annotate_iter(self, path, file_id=None,
547
466
default_revision=_mod_revision.CURRENT_REVISION):
548
467
"""Return an iterator of revision_id, line tuples.
550
469
For working trees (and mutable trees in general), the special
551
470
revision_id 'current:' will be used for lines that are new in this
552
471
tree, e.g. uncommitted changes.
553
:param path: The file to produce an annotated version from
472
:param file_id: The file to produce an annotated version from
554
473
:param default_revision: For lines that don't match a basis, mark them
555
474
with this revision id. Not all implementations will make use of
558
477
raise NotImplementedError(self.annotate_iter)
479
def _get_plan_merge_data(self, file_id, other, base):
480
from .bzr import versionedfile
481
vf = versionedfile._PlanMergeVersionedFile(file_id)
482
last_revision_a = self._get_file_revision(
483
self.id2path(file_id), file_id, vf, 'this:')
484
last_revision_b = other._get_file_revision(
485
other.id2path(file_id), file_id, vf, 'other:')
487
last_revision_base = None
489
last_revision_base = base._get_file_revision(
490
base.id2path(file_id), file_id, vf, 'base:')
491
return vf, last_revision_a, last_revision_b, last_revision_base
493
def plan_file_merge(self, file_id, other, base=None):
494
"""Generate a merge plan based on annotations.
496
If the file contains uncommitted changes in this tree, they will be
497
attributed to the 'current:' pseudo-revision. If the file contains
498
uncommitted changes in the other tree, they will be assigned to the
499
'other:' pseudo-revision.
501
data = self._get_plan_merge_data(file_id, other, base)
502
vf, last_revision_a, last_revision_b, last_revision_base = data
503
return vf.plan_merge(last_revision_a, last_revision_b,
506
def plan_file_lca_merge(self, file_id, other, base=None):
507
"""Generate a merge plan based lca-newness.
509
If the file contains uncommitted changes in this tree, they will be
510
attributed to the 'current:' pseudo-revision. If the file contains
511
uncommitted changes in the other tree, they will be assigned to the
512
'other:' pseudo-revision.
514
data = self._get_plan_merge_data(file_id, other, base)
515
vf, last_revision_a, last_revision_b, last_revision_base = data
516
return vf.plan_lca_merge(last_revision_a, last_revision_b,
519
def _iter_parent_trees(self):
520
"""Iterate through parent trees, defaulting to Tree.revision_tree."""
521
for revision_id in self.get_parent_ids():
523
yield self.revision_tree(revision_id)
524
except errors.NoSuchRevisionInTree:
525
yield self.repository.revision_tree(revision_id)
527
def _get_file_revision(self, path, file_id, vf, tree_revision):
528
"""Ensure that file_id, tree_revision is in vf to plan the merge."""
529
if getattr(self, '_repository', None) is None:
530
last_revision = tree_revision
531
parent_keys = [(file_id, t.get_file_revision(path, file_id)) for t in
532
self._iter_parent_trees()]
533
vf.add_lines((file_id, last_revision), parent_keys,
534
self.get_file_lines(path, file_id))
535
repo = self.branch.repository
538
last_revision = self.get_file_revision(path, file_id)
539
base_vf = self._repository.texts
540
if base_vf not in vf.fallback_versionedfiles:
541
vf.fallback_versionedfiles.append(base_vf)
544
def _check_retrieved(self, ie, f):
547
fp = osutils.fingerprint_file(f)
550
if ie.text_size is not None:
551
if ie.text_size != fp['size']:
552
raise errors.BzrError(
553
"mismatched size for file %r in %r" %
554
(ie.file_id, self._store),
555
["inventory expects %d bytes" % ie.text_size,
556
"file is actually %d bytes" % fp['size'],
557
"store is probably damaged/corrupt"])
559
if ie.text_sha1 != fp['sha1']:
560
raise errors.BzrError("wrong SHA-1 for file %r in %r" %
561
(ie.file_id, self._store),
562
["inventory expects %s" % ie.text_sha1,
563
"file is actually %s" % fp['sha1'],
564
"store is probably damaged/corrupt"])
560
566
def path2id(self, path):
561
567
"""Return the id for path in this tree."""
562
568
raise NotImplementedError(self.path2id)
728
741
searcher = default_searcher
731
def archive(self, format, name, root='', subdir=None,
733
"""Create an archive of this tree.
735
:param format: Format name (e.g. 'tar')
736
:param name: target file name
737
:param root: Root directory name (or None)
738
:param subdir: Subdirectory to export (or None)
739
:return: Iterator over archive chunks
741
from .archive import create_archive
742
with self.lock_read():
743
return create_archive(format, self, name, root,
744
subdir, force_mtime=force_mtime)
747
def versionable_kind(cls, kind):
748
"""Check if this tree support versioning a specific file kind."""
749
return (kind in ('file', 'directory', 'symlink', 'tree-reference'))
751
def preview_transform(self, pb=None):
752
"""Obtain a transform object."""
753
raise NotImplementedError(self.preview_transform)
745
def find_ids_across_trees(filenames, trees, require_versioned=True):
746
"""Find the ids corresponding to specified filenames.
748
All matches in all trees will be used, and all children of matched
749
directories will be used.
751
:param filenames: The filenames to find file_ids for (if None, returns
753
:param trees: The trees to find file_ids within
754
:param require_versioned: if true, all specified filenames must occur in
756
:return: a set of file ids for the specified filenames and their children.
760
specified_path_ids = _find_ids_across_trees(filenames, trees,
762
return _find_children_across_trees(specified_path_ids, trees)
765
def _find_ids_across_trees(filenames, trees, require_versioned):
766
"""Find the ids corresponding to specified filenames.
768
All matches in all trees will be used, but subdirectories are not scanned.
770
:param filenames: The filenames to find file_ids for
771
:param trees: The trees to find file_ids within
772
:param require_versioned: if true, all specified filenames must occur in
774
:return: a set of file ids for the specified filenames
777
interesting_ids = set()
778
for tree_path in filenames:
781
file_id = tree.path2id(tree_path)
782
if file_id is not None:
783
interesting_ids.add(file_id)
786
not_versioned.append(tree_path)
787
if len(not_versioned) > 0 and require_versioned:
788
raise errors.PathsNotVersionedError(not_versioned)
789
return interesting_ids
792
def _find_children_across_trees(specified_ids, trees):
793
"""Return a set including specified ids and their children.
795
All matches in all trees will be used.
797
:param trees: The trees to find file_ids within
798
:return: a set containing all specified ids and their children
800
interesting_ids = set(specified_ids)
801
pending = interesting_ids
802
# now handle children of interesting ids
803
# we loop so that we handle all children of each id in both trees
804
while len(pending) > 0:
806
for file_id in pending:
808
if not tree.has_or_had_id(file_id):
810
for child_id in tree.iter_children(file_id):
811
if child_id not in interesting_ids:
812
new_pending.add(child_id)
813
interesting_ids.update(new_pending)
814
pending = new_pending
815
return interesting_ids
756
818
class InterTree(InterObject):
778
840
# it works for all trees.
843
def _changes_from_entries(self, source_entry, target_entry,
844
source_path=None, target_path=None):
845
"""Generate a iter_changes tuple between source_entry and target_entry.
847
:param source_entry: An inventory entry from self.source, or None.
848
:param target_entry: An inventory entry from self.target, or None.
849
:param source_path: The path of source_entry, if known. If not known
850
it will be looked up.
851
:param target_path: The path of target_entry, if known. If not known
852
it will be looked up.
853
:return: A tuple, item 0 of which is an iter_changes result tuple, and
854
item 1 is True if there are any changes in the result tuple.
856
if source_entry is None:
857
if target_entry is None:
859
file_id = target_entry.file_id
861
file_id = source_entry.file_id
862
if source_entry is not None:
863
source_versioned = True
864
source_name = source_entry.name
865
source_parent = source_entry.parent_id
866
if source_path is None:
867
source_path = self.source.id2path(file_id)
868
source_kind, source_executable, source_stat = \
869
self.source._comparison_data(source_entry, source_path)
871
source_versioned = False
875
source_executable = None
876
if target_entry is not None:
877
target_versioned = True
878
target_name = target_entry.name
879
target_parent = target_entry.parent_id
880
if target_path is None:
881
target_path = self.target.id2path(file_id)
882
target_kind, target_executable, target_stat = \
883
self.target._comparison_data(target_entry, target_path)
885
target_versioned = False
889
target_executable = None
890
versioned = (source_versioned, target_versioned)
891
kind = (source_kind, target_kind)
892
changed_content = False
893
if source_kind != target_kind:
894
changed_content = True
895
elif source_kind == 'file':
896
if not self.file_content_matches(file_id, file_id, source_path,
897
target_path, source_stat, target_stat):
898
changed_content = True
899
elif source_kind == 'symlink':
900
if (self.source.get_symlink_target(source_path, file_id) !=
901
self.target.get_symlink_target(target_path, file_id)):
902
changed_content = True
903
elif source_kind == 'tree-reference':
904
if (self.source.get_reference_revision(source_path, file_id)
905
!= self.target.get_reference_revision(target_path, file_id)):
906
changed_content = True
907
parent = (source_parent, target_parent)
908
name = (source_name, target_name)
909
executable = (source_executable, target_executable)
910
if (changed_content is not False or versioned[0] != versioned[1]
911
or parent[0] != parent[1] or name[0] != name[1] or
912
executable[0] != executable[1]):
916
return (file_id, (source_path, target_path), changed_content,
917
versioned, parent, name, kind, executable), changes
781
919
def compare(self, want_unchanged=False, specific_files=None,
782
extra_trees=None, require_versioned=False, include_root=False,
783
want_unversioned=False):
920
extra_trees=None, require_versioned=False, include_root=False,
921
want_unversioned=False):
784
922
"""Return the changes from source to target.
786
924
:return: A TreeDelta.
843
992
output. An unversioned file is defined as one with (False, False)
844
993
for the versioned pair.
846
raise NotImplementedError(self.iter_changes)
995
lookup_trees = [self.source]
997
lookup_trees.extend(extra_trees)
998
# The ids of items we need to examine to insure delta consistency.
999
precise_file_ids = set()
1000
changed_file_ids = []
1001
if specific_files == []:
1002
specific_file_ids = []
1004
specific_file_ids = self.target.paths2ids(specific_files,
1005
lookup_trees, require_versioned=require_versioned)
1006
if specific_files is not None:
1007
# reparented or added entries must have their parents included
1008
# so that valid deltas can be created. The seen_parents set
1009
# tracks the parents that we need to have.
1010
# The seen_dirs set tracks directory entries we've yielded.
1011
# After outputting version object in to_entries we set difference
1012
# the two seen sets and start checking parents.
1013
seen_parents = set()
1015
if want_unversioned:
1016
all_unversioned = sorted([(p.split('/'), p) for p in
1017
self.target.extras()
1018
if specific_files is None or
1019
osutils.is_inside_any(specific_files, p)])
1020
all_unversioned = collections.deque(all_unversioned)
1022
all_unversioned = collections.deque()
1024
from_entries_by_dir = list(self.source.iter_entries_by_dir(
1025
specific_file_ids=specific_file_ids))
1026
from_data = dict((e.file_id, (p, e)) for p, e in from_entries_by_dir)
1027
to_entries_by_dir = list(self.target.iter_entries_by_dir(
1028
specific_file_ids=specific_file_ids))
1029
num_entries = len(from_entries_by_dir) + len(to_entries_by_dir)
1031
# the unversioned path lookup only occurs on real trees - where there
1032
# can be extras. So the fake_entry is solely used to look up
1033
# executable it values when execute is not supported.
1034
fake_entry = inventory.InventoryFile('unused', 'unused', 'unused')
1035
for target_path, target_entry in to_entries_by_dir:
1036
while (all_unversioned and
1037
all_unversioned[0][0] < target_path.split('/')):
1038
unversioned_path = all_unversioned.popleft()
1039
target_kind, target_executable, target_stat = \
1040
self.target._comparison_data(fake_entry, unversioned_path[1])
1041
yield (None, (None, unversioned_path[1]), True, (False, False),
1043
(None, unversioned_path[0][-1]),
1044
(None, target_kind),
1045
(None, target_executable))
1046
source_path, source_entry = from_data.get(target_entry.file_id,
1048
result, changes = self._changes_from_entries(source_entry,
1049
target_entry, source_path=source_path, target_path=target_path)
1050
to_paths[result[0]] = result[1][1]
1055
pb.update('comparing files', entry_count, num_entries)
1056
if changes or include_unchanged:
1057
if specific_file_ids is not None:
1058
new_parent_id = result[4][1]
1059
precise_file_ids.add(new_parent_id)
1060
changed_file_ids.append(result[0])
1062
# Ensure correct behaviour for reparented/added specific files.
1063
if specific_files is not None:
1064
# Record output dirs
1065
if result[6][1] == 'directory':
1066
seen_dirs.add(result[0])
1067
# Record parents of reparented/added entries.
1068
versioned = result[3]
1070
if not versioned[0] or parents[0] != parents[1]:
1071
seen_parents.add(parents[1])
1072
while all_unversioned:
1073
# yield any trailing unversioned paths
1074
unversioned_path = all_unversioned.popleft()
1075
to_kind, to_executable, to_stat = \
1076
self.target._comparison_data(fake_entry, unversioned_path[1])
1077
yield (None, (None, unversioned_path[1]), True, (False, False),
1079
(None, unversioned_path[0][-1]),
1081
(None, to_executable))
1082
# Yield all remaining source paths
1083
for path, from_entry in from_entries_by_dir:
1084
file_id = from_entry.file_id
1085
if file_id in to_paths:
1088
if not self.target.has_id(file_id):
1089
# common case - paths we have not emitted are not present in
1093
to_path = self.target.id2path(file_id)
1096
pb.update('comparing files', entry_count, num_entries)
1097
versioned = (True, False)
1098
parent = (from_entry.parent_id, None)
1099
name = (from_entry.name, None)
1100
from_kind, from_executable, stat_value = \
1101
self.source._comparison_data(from_entry, path)
1102
kind = (from_kind, None)
1103
executable = (from_executable, None)
1104
changed_content = from_kind is not None
1105
# the parent's path is necessarily known at this point.
1106
changed_file_ids.append(file_id)
1107
yield(file_id, (path, to_path), changed_content, versioned, parent,
1108
name, kind, executable)
1109
changed_file_ids = set(changed_file_ids)
1110
if specific_file_ids is not None:
1111
for result in self._handle_precise_ids(precise_file_ids,
1115
def _get_entry(self, tree, file_id):
1116
"""Get an inventory entry from a tree, with missing entries as None.
1118
If the tree raises NotImplementedError on accessing .inventory, then
1119
this is worked around using iter_entries_by_dir on just the file id
1122
:param tree: The tree to lookup the entry in.
1123
:param file_id: The file_id to lookup.
1126
inventory = tree.root_inventory
1127
except (AttributeError, NotImplementedError):
1128
# No inventory available.
1130
iterator = tree.iter_entries_by_dir(specific_file_ids=[file_id])
1131
return iterator.next()[1]
1132
except StopIteration:
1136
return inventory[file_id]
1137
except errors.NoSuchId:
1140
def _handle_precise_ids(self, precise_file_ids, changed_file_ids,
1141
discarded_changes=None):
1142
"""Fill out a partial iter_changes to be consistent.
1144
:param precise_file_ids: The file ids of parents that were seen during
1146
:param changed_file_ids: The file ids of already emitted items.
1147
:param discarded_changes: An optional dict of precalculated
1148
iter_changes items which the partial iter_changes had not output
1150
:return: A generator of iter_changes items to output.
1152
# process parents of things that had changed under the users
1153
# requested paths to prevent incorrect paths or parent ids which
1154
# aren't in the tree.
1155
while precise_file_ids:
1156
precise_file_ids.discard(None)
1157
# Don't emit file_ids twice
1158
precise_file_ids.difference_update(changed_file_ids)
1159
if not precise_file_ids:
1161
# If the there was something at a given output path in source, we
1162
# have to include the entry from source in the delta, or we would
1163
# be putting this entry into a used path.
1165
for parent_id in precise_file_ids:
1167
paths.append(self.target.id2path(parent_id))
1168
except errors.NoSuchId:
1169
# This id has been dragged in from the source by delta
1170
# expansion and isn't present in target at all: we don't
1171
# need to check for path collisions on it.
1174
old_id = self.source.path2id(path)
1175
precise_file_ids.add(old_id)
1176
precise_file_ids.discard(None)
1177
current_ids = precise_file_ids
1178
precise_file_ids = set()
1179
# We have to emit all of precise_file_ids that have been altered.
1180
# We may have to output the children of some of those ids if any
1181
# directories have stopped being directories.
1182
for file_id in current_ids:
1184
if discarded_changes:
1185
result = discarded_changes.get(file_id)
1190
old_entry = self._get_entry(self.source, file_id)
1191
new_entry = self._get_entry(self.target, file_id)
1192
result, changes = self._changes_from_entries(
1193
old_entry, new_entry)
1196
# Get this parents parent to examine.
1197
new_parent_id = result[4][1]
1198
precise_file_ids.add(new_parent_id)
1200
if (result[6][0] == 'directory' and
1201
result[6][1] != 'directory'):
1202
# This stopped being a directory, the old children have
1204
if old_entry is None:
1205
# Reusing a discarded change.
1206
old_entry = self._get_entry(self.source, file_id)
1207
precise_file_ids.update(
1208
self.source.iter_children(file_id))
1209
changed_file_ids.add(result[0])
848
1212
def file_content_matches(
849
self, source_path, target_path,
850
source_stat=None, target_stat=None):
1213
self, source_file_id, target_file_id, source_path=None,
1214
target_path=None, source_stat=None, target_stat=None):
851
1215
"""Check if two files are the same in the source and target trees.
853
1217
This only checks that the contents of the files are the same,
854
1218
it does not touch anything else.
1220
:param source_file_id: File id of the file in the source tree
1221
:param target_file_id: File id of the file in the target tree
856
1222
:param source_path: Path of the file in the source tree
857
1223
:param target_path: Path of the file in the target tree
858
1224
:param source_stat: Optional stat value of the file in the source tree
860
1226
:return: Boolean indicating whether the files have the same contents
862
1228
with self.lock_read():
1229
if source_path is None:
1230
source_path = self.source.id2path(source_file_id)
1231
if target_path is None:
1232
target_path = self.target.id2path(target_file_id)
863
1233
source_verifier_kind, source_verifier_data = (
864
self.source.get_file_verifier(source_path, source_stat))
1234
self.source.get_file_verifier(
1235
source_path, source_file_id, source_stat))
865
1236
target_verifier_kind, target_verifier_data = (
866
1237
self.target.get_file_verifier(
867
target_path, target_stat))
1238
target_path, target_file_id, target_stat))
868
1239
if source_verifier_kind == target_verifier_kind:
869
1240
return (source_verifier_data == target_verifier_data)
870
1241
# Fall back to SHA1 for now
871
1242
if source_verifier_kind != "SHA1":
872
1243
source_sha1 = self.source.get_file_sha1(
873
source_path, source_stat)
1244
source_path, source_file_id, source_stat)
875
1246
source_sha1 = source_verifier_data
876
1247
if target_verifier_kind != "SHA1":
877
1248
target_sha1 = self.target.get_file_sha1(
878
target_path, target_stat)
1249
target_path, target_file_id, target_stat)
880
1251
target_sha1 = target_verifier_data
881
1252
return (source_sha1 == target_sha1)
883
def find_target_path(self, path, recurse='none'):
884
"""Find target tree path.
886
:param path: Path to search for (exists in source)
887
:return: path in target, or None if there is no equivalent path.
888
:raise NoSuchFile: If the path doesn't exist in source
890
raise NotImplementedError(self.find_target_path)
892
def find_source_path(self, path, recurse='none'):
893
"""Find the source tree path.
895
:param path: Path to search for (exists in target)
896
:return: path in source, or None if there is no equivalent path.
897
:raise NoSuchFile: if the path doesn't exist in target
899
raise NotImplementedError(self.find_source_path)
901
def find_target_paths(self, paths, recurse='none'):
902
"""Find target tree paths.
904
:param paths: Iterable over paths in target to search for
905
:return: Dictionary mapping from source paths to paths in target , or
906
None if there is no equivalent path.
910
ret[path] = self.find_target_path(path, recurse=recurse)
913
def find_source_paths(self, paths, recurse='none'):
914
"""Find source tree paths.
916
:param paths: Iterable over paths in target to search for
917
:return: Dictionary mapping from target paths to paths in source, or
918
None if there is no equivalent path.
922
ret[path] = self.find_source_path(path, recurse=recurse)
926
def find_previous_paths(from_tree, to_tree, paths, recurse='none'):
927
"""Find previous tree paths.
929
:param from_tree: From tree
930
:param to_tree: To tree
931
:param paths: Iterable over paths in from_tree to search for
932
:return: Dictionary mapping from from_tree paths to paths in to_tree, or
933
None if there is no equivalent path.
935
return InterTree.get(to_tree, from_tree).find_source_paths(paths, recurse=recurse)
938
def find_previous_path(from_tree, to_tree, path, recurse='none'):
939
"""Find previous tree path.
941
:param from_tree: From tree
942
:param to_tree: To tree
943
:param path: Path to search for (exists in from_tree)
944
:return: path in to_tree, or None if there is no equivalent path.
945
:raise NoSuchFile: If the path doesn't exist in from_tree
947
return InterTree.get(to_tree, from_tree).find_source_path(
948
path, recurse=recurse)
951
def get_canonical_path(tree, path, normalize):
952
"""Find the canonical path of an item, ignoring case.
954
:param tree: Tree to traverse
955
:param path: Case-insensitive path to look up
956
:param normalize: Function to normalize a filename for comparison
957
:return: The canonical path
961
bit_iter = iter(path.split("/"))
963
lelt = normalize(elt)
966
for child in tree.iter_child_entries(cur_path):
968
if child.name == elt:
969
# if we found an exact match, we can stop now; if
970
# we found an approximate match we need to keep
971
# searching because there might be an exact match
973
new_path = osutils.pathjoin(cur_path, child.name)
975
elif normalize(child.name) == lelt:
976
new_path = osutils.pathjoin(cur_path, child.name)
977
except errors.NoSuchId:
978
# before a change is committed we can see this error...
980
except errors.NotADirectory:
985
# got to the end of this directory and no entries matched.
986
# Return what matched so far, plus the rest as specified.
987
cur_path = osutils.pathjoin(cur_path, elt, *list(bit_iter))
1254
InterTree.register_optimiser(InterTree)
1257
class MultiWalker(object):
1258
"""Walk multiple trees simultaneously, getting combined results."""
1260
# Note: This could be written to not assume you can do out-of-order
1261
# lookups. Instead any nodes that don't match in all trees could be
1262
# marked as 'deferred', and then returned in the final cleanup loop.
1263
# For now, I think it is "nicer" to return things as close to the
1264
# "master_tree" order as we can.
1266
def __init__(self, master_tree, other_trees):
1267
"""Create a new MultiWalker.
1269
All trees being walked must implement "iter_entries_by_dir()", such
1270
that they yield (path, object) tuples, where that object will have a
1271
'.file_id' member, that can be used to check equality.
1273
:param master_tree: All trees will be 'slaved' to the master_tree such
1274
that nodes in master_tree will be used as 'first-pass' sync points.
1275
Any nodes that aren't in master_tree will be merged in a second
1277
:param other_trees: A list of other trees to walk simultaneously.
1279
self._master_tree = master_tree
1280
self._other_trees = other_trees
1282
# Keep track of any nodes that were properly processed just out of
1283
# order, that way we don't return them at the end, we don't have to
1284
# track *all* processed file_ids, just the out-of-order ones
1285
self._out_of_order_processed = set()
1288
def _step_one(iterator):
1289
"""Step an iter_entries_by_dir iterator.
1291
:return: (has_more, path, ie)
1292
If has_more is False, path and ie will be None.
1295
path, ie = next(iterator)
1296
except StopIteration:
1297
return False, None, None
1299
return True, path, ie
1302
def _cmp_path_by_dirblock(path1, path2):
1303
"""Compare two paths based on what directory they are in.
1305
This generates a sort order, such that all children of a directory are
1306
sorted together, and grandchildren are in the same order as the
1307
children appear. But all grandchildren come after all children.
1309
:param path1: first path
1310
:param path2: the second path
1311
:return: negative number if ``path1`` comes first,
1312
0 if paths are equal
1313
and a positive number if ``path2`` sorts first
1315
# Shortcut this special case
1318
# This is stolen from _dirstate_helpers_py.py, only switching it to
1319
# Unicode objects. Consider using encode_utf8() and then using the
1320
# optimized versions, or maybe writing optimized unicode versions.
1321
if not isinstance(path1, unicode):
1322
raise TypeError("'path1' must be a unicode string, not %s: %r"
1323
% (type(path1), path1))
1324
if not isinstance(path2, unicode):
1325
raise TypeError("'path2' must be a unicode string, not %s: %r"
1326
% (type(path2), path2))
1327
return cmp(MultiWalker._path_to_key(path1),
1328
MultiWalker._path_to_key(path2))
1331
def _path_to_key(path):
1332
dirname, basename = osutils.split(path)
1333
return (dirname.split(u'/'), basename)
1335
def _lookup_by_file_id(self, extra_entries, other_tree, file_id):
1336
"""Lookup an inventory entry by file_id.
1338
This is called when an entry is missing in the normal order.
1339
Generally this is because a file was either renamed, or it was
1340
deleted/added. If the entry was found in the inventory and not in
1341
extra_entries, it will be added to self._out_of_order_processed
1343
:param extra_entries: A dictionary of {file_id: (path, ie)}. This
1344
should be filled with entries that were found before they were
1345
used. If file_id is present, it will be removed from the
1347
:param other_tree: The Tree to search, in case we didn't find the entry
1349
:param file_id: The file_id to look for
1350
:return: (path, ie) if found or (None, None) if not present.
1352
if file_id in extra_entries:
1353
return extra_entries.pop(file_id)
1354
# TODO: Is id2path better as the first call, or is
1355
# inventory[file_id] better as a first check?
1357
cur_path = other_tree.id2path(file_id)
1358
except errors.NoSuchId:
1360
if cur_path is None:
1363
self._out_of_order_processed.add(file_id)
1364
cur_ie = other_tree.root_inventory[file_id]
1365
return (cur_path, cur_ie)
1368
"""Match up the values in the different trees."""
1369
for result in self._walk_master_tree():
1371
self._finish_others()
1372
for result in self._walk_others():
1375
def _walk_master_tree(self):
1376
"""First pass, walk all trees in lock-step.
1378
When we are done, all nodes in the master_tree will have been
1379
processed. _other_walkers, _other_entries, and _others_extra will be
1380
set on 'self' for future processing.
1382
# This iterator has the most "inlining" done, because it tends to touch
1383
# every file in the tree, while the others only hit nodes that don't
1385
master_iterator = self._master_tree.iter_entries_by_dir()
1387
other_walkers = [other.iter_entries_by_dir()
1388
for other in self._other_trees]
1389
other_entries = [self._step_one(walker) for walker in other_walkers]
1390
# Track extra nodes in the other trees
1391
others_extra = [{} for _ in range(len(self._other_trees))]
1393
master_has_more = True
1394
step_one = self._step_one
1395
lookup_by_file_id = self._lookup_by_file_id
1396
out_of_order_processed = self._out_of_order_processed
1398
while master_has_more:
1399
(master_has_more, path, master_ie) = step_one(master_iterator)
1400
if not master_has_more:
1403
file_id = master_ie.file_id
1405
other_values_append = other_values.append
1406
next_other_entries = []
1407
next_other_entries_append = next_other_entries.append
1408
for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
1409
if not other_has_more:
1410
other_values_append(lookup_by_file_id(
1411
others_extra[idx], self._other_trees[idx], file_id))
1412
next_other_entries_append((False, None, None))
1413
elif file_id == other_ie.file_id:
1414
# This is the critical code path, as most of the entries
1415
# should match between most trees.
1416
other_values_append((other_path, other_ie))
1417
next_other_entries_append(step_one(other_walkers[idx]))
1419
# This walker did not match, step it until it either
1420
# matches, or we know we are past the current walker.
1421
other_walker = other_walkers[idx]
1422
other_extra = others_extra[idx]
1423
while (other_has_more and
1424
self._cmp_path_by_dirblock(other_path, path) < 0):
1425
other_file_id = other_ie.file_id
1426
if other_file_id not in out_of_order_processed:
1427
other_extra[other_file_id] = (other_path, other_ie)
1428
other_has_more, other_path, other_ie = \
1429
step_one(other_walker)
1430
if other_has_more and other_ie.file_id == file_id:
1431
# We ended up walking to this point, match and step
1433
other_values_append((other_path, other_ie))
1434
other_has_more, other_path, other_ie = \
1435
step_one(other_walker)
1437
# This record isn't in the normal order, see if it
1439
other_values_append(lookup_by_file_id(
1440
other_extra, self._other_trees[idx], file_id))
1441
next_other_entries_append((other_has_more, other_path,
1443
other_entries = next_other_entries
1445
# We've matched all the walkers, yield this datapoint
1446
yield path, file_id, master_ie, other_values
1447
self._other_walkers = other_walkers
1448
self._other_entries = other_entries
1449
self._others_extra = others_extra
1451
def _finish_others(self):
1452
"""Finish walking the other iterators, so we get all entries."""
1453
for idx, info in enumerate(self._other_entries):
1454
other_extra = self._others_extra[idx]
1455
(other_has_more, other_path, other_ie) = info
1456
while other_has_more:
1457
other_file_id = other_ie.file_id
1458
if other_file_id not in self._out_of_order_processed:
1459
other_extra[other_file_id] = (other_path, other_ie)
1460
other_has_more, other_path, other_ie = \
1461
self._step_one(self._other_walkers[idx])
1462
del self._other_entries
1464
def _walk_others(self):
1465
"""Finish up by walking all the 'deferred' nodes."""
1466
# TODO: One alternative would be to grab all possible unprocessed
1467
# file_ids, and then sort by path, and then yield them. That
1468
# might ensure better ordering, in case a caller strictly
1469
# requires parents before children.
1470
for idx, other_extra in enumerate(self._others_extra):
1471
others = sorted(viewvalues(other_extra),
1472
key=lambda x: self._path_to_key(x[0]))
1473
for other_path, other_ie in others:
1474
file_id = other_ie.file_id
1475
# We don't need to check out_of_order_processed here, because
1476
# the lookup_by_file_id will be removing anything processed
1477
# from the extras cache
1478
other_extra.pop(file_id)
1479
other_values = [(None, None)] * idx
1480
other_values.append((other_path, other_ie))
1481
for alt_idx, alt_extra in enumerate(self._others_extra[idx+1:]):
1482
alt_idx = alt_idx + idx + 1
1483
alt_extra = self._others_extra[alt_idx]
1484
alt_tree = self._other_trees[alt_idx]
1485
other_values.append(self._lookup_by_file_id(
1486
alt_extra, alt_tree, file_id))
1487
yield other_path, file_id, None, other_values