1
# Copyright (C) 2005-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tree classes, representing directory at point in time.
20
from __future__ import absolute_import
22
from .lazy_import import lazy_import
23
lazy_import(globals(), """
27
conflicts as _mod_conflicts,
31
revision as _mod_revision,
35
from breezy.i18n import gettext
43
from .inter import InterObject
50
class FileTimestampUnavailable(errors.BzrError):
52
_fmt = "The filestamp for %(path)s is not available."
56
def __init__(self, path):
60
class TreeEntry(object):
61
"""An entry that implements the minimum interface used by commands.
64
def __eq__(self, other):
65
# yes, this is ugly, TODO: best practice __eq__ style.
66
return (isinstance(other, TreeEntry)
67
and other.__class__ == self.__class__)
69
def kind_character(self):
73
class TreeDirectory(TreeEntry):
74
"""See TreeEntry. This is a directory in a working tree."""
76
def kind_character(self):
80
class TreeFile(TreeEntry):
81
"""See TreeEntry. This is a regular file in a working tree."""
83
def kind_character(self):
87
class TreeLink(TreeEntry):
88
"""See TreeEntry. This is a symlink in a working tree."""
90
def kind_character(self):
94
class TreeReference(TreeEntry):
95
"""See TreeEntry. This is a reference to a nested tree in a working tree."""
97
def kind_character(self):
102
"""Abstract file tree.
104
There are several subclasses:
106
* `WorkingTree` exists as files on disk editable by the user.
108
* `RevisionTree` is a tree as recorded at some point in the past.
110
Trees can be compared, etc, regardless of whether they are working
111
trees or versioned trees.
114
def supports_rename_tracking(self):
115
"""Whether this tree supports rename tracking.
117
This defaults to True, but some implementations may want to override
122
def has_versioned_directories(self):
123
"""Whether this tree can contain explicitly versioned directories.
125
This defaults to True, but some implementations may want to override
130
def changes_from(self, other, want_unchanged=False, specific_files=None,
131
extra_trees=None, require_versioned=False, include_root=False,
132
want_unversioned=False):
133
"""Return a TreeDelta of the changes from other to this tree.
135
:param other: A tree to compare with.
136
:param specific_files: An optional list of file paths to restrict the
137
comparison to. When mapping filenames to ids, all matches in all
138
trees (including optional extra_trees) are used, and all children of
139
matched directories are included.
140
:param want_unchanged: An optional boolean requesting the inclusion of
141
unchanged entries in the result.
142
:param extra_trees: An optional list of additional trees to use when
143
mapping the contents of specific_files (paths) to file_ids.
144
:param require_versioned: An optional boolean (defaults to False). When
145
supplied and True all the 'specific_files' must be versioned, or
146
a PathsNotVersionedError will be thrown.
147
:param want_unversioned: Scan for unversioned paths.
149
The comparison will be performed by an InterTree object looked up on
152
# Martin observes that Tree.changes_from returns a TreeDelta and this
153
# may confuse people, because the class name of the returned object is
154
# a synonym of the object referenced in the method name.
155
return InterTree.get(other, self).compare(
156
want_unchanged=want_unchanged,
157
specific_files=specific_files,
158
extra_trees=extra_trees,
159
require_versioned=require_versioned,
160
include_root=include_root,
161
want_unversioned=want_unversioned,
164
def iter_changes(self, from_tree, include_unchanged=False,
165
specific_files=None, pb=None, extra_trees=None,
166
require_versioned=True, want_unversioned=False):
167
"""See InterTree.iter_changes"""
168
intertree = InterTree.get(from_tree, self)
169
return intertree.iter_changes(include_unchanged, specific_files, pb,
170
extra_trees, require_versioned, want_unversioned=want_unversioned)
173
"""Get a list of the conflicts in the tree.
175
Each conflict is an instance of breezy.conflicts.Conflict.
177
return _mod_conflicts.ConflictList()
180
"""For trees that can have unversioned files, return all such paths."""
183
def get_parent_ids(self):
184
"""Get the parent ids for this tree.
186
:return: a list of parent ids. [] is returned to indicate
187
a tree with no parents.
188
:raises: BzrError if the parents are not known.
190
raise NotImplementedError(self.get_parent_ids)
192
def has_filename(self, filename):
193
"""True if the tree has given filename."""
194
raise NotImplementedError(self.has_filename)
196
def is_ignored(self, filename):
197
"""Check whether the filename is ignored by this tree.
199
:param filename: The relative filename within the tree.
200
:return: True if the filename is ignored.
204
def all_file_ids(self):
205
"""Iterate through all file ids, including ids for missing files."""
206
raise NotImplementedError(self.all_file_ids)
208
def all_versioned_paths(self):
209
"""Iterate through all paths, including paths for missing files."""
210
raise NotImplementedError(self.all_versioned_paths)
212
def id2path(self, file_id):
213
"""Return the path for a file id.
217
raise NotImplementedError(self.id2path)
219
def iter_entries_by_dir(self, specific_files=None):
220
"""Walk the tree in 'by_dir' order.
222
This will yield each entry in the tree as a (path, entry) tuple.
223
The order that they are yielded is:
225
Directories are walked in a depth-first lexicographical order,
226
however, whenever a directory is reached, all of its direct child
227
nodes are yielded in lexicographical order before yielding the
230
For example, in the tree::
240
The yield order (ignoring root) would be::
242
a, f, a/b, a/d, a/b/c, a/d/e, f/g
244
raise NotImplementedError(self.iter_entries_by_dir)
246
def iter_child_entries(self, path, file_id=None):
247
"""Iterate over the children of a directory or tree reference.
249
:param path: Path of the directory
250
:param file_id: Optional file id of the directory/tree-reference
251
:raise NoSuchId: When the file_id does not exist
252
:return: Iterator over entries in the directory
254
raise NotImplementedError(self.iter_child_entries)
256
def list_files(self, include_root=False, from_dir=None, recursive=True):
257
"""List all files in this tree.
259
:param include_root: Whether to include the entry for the tree root
260
:param from_dir: Directory under which to list files
261
:param recursive: Whether to list files recursively
262
:return: iterator over tuples of (path, versioned, kind, file_id,
265
raise NotImplementedError(self.list_files)
267
def iter_references(self):
268
if self.supports_tree_reference():
269
for path, entry in self.iter_entries_by_dir():
270
if entry.kind == 'tree-reference':
271
yield path, entry.file_id
273
def kind(self, path, file_id=None):
274
raise NotImplementedError("Tree subclass %s must implement kind"
275
% self.__class__.__name__)
277
def stored_kind(self, path, file_id=None):
278
"""File kind stored for this file_id.
280
May not match kind on disk for working trees. Always available
281
for versioned files, even when the file itself is missing.
283
return self.kind(path, file_id)
285
def path_content_summary(self, path):
286
"""Get a summary of the information about path.
288
All the attributes returned are for the canonical form, not the
289
convenient form (if content filters are in use.)
291
:param path: A relative path within the tree.
292
:return: A tuple containing kind, size, exec, sha1-or-link.
293
Kind is always present (see tree.kind()).
294
size is present if kind is file and the size of the
295
canonical form can be cheaply determined, None otherwise.
296
exec is None unless kind is file and the platform supports the 'x'
298
sha1-or-link is the link target if kind is symlink, or the sha1 if
299
it can be obtained without reading the file.
301
raise NotImplementedError(self.path_content_summary)
303
def get_reference_revision(self, path, file_id=None):
304
raise NotImplementedError("Tree subclass %s must implement "
305
"get_reference_revision"
306
% self.__class__.__name__)
308
def _comparison_data(self, entry, path):
309
"""Return a tuple of kind, executable, stat_value for a file.
311
entry may be None if there is no inventory entry for the file, but
312
path must always be supplied.
314
kind is None if there is no file present (even if an inventory id is
315
present). executable is False for non-file entries.
317
raise NotImplementedError(self._comparison_data)
319
def get_file(self, path, file_id=None):
320
"""Return a file object for the file file_id in the tree.
322
If both file_id and path are defined, it is implementation defined as
323
to which one is used.
325
raise NotImplementedError(self.get_file)
327
def get_file_with_stat(self, path, file_id=None):
328
"""Get a file handle and stat object for file_id.
330
The default implementation returns (self.get_file, None) for backwards
333
:param path: The path of the file.
334
:param file_id: The file id to read, if it is known.
335
:return: A tuple (file_handle, stat_value_or_None). If the tree has
336
no stat facility, or need for a stat cache feedback during commit,
337
it may return None for the second element of the tuple.
339
return (self.get_file(path, file_id), None)
341
def get_file_text(self, path, file_id=None):
342
"""Return the byte content of a file.
344
:param path: The path of the file.
345
:param file_id: The file_id of the file.
347
If both file_id and path are supplied, an implementation may use
350
:returns: A single byte string for the whole file.
352
with self.get_file(path, file_id) as my_file:
353
return my_file.read()
355
def get_file_lines(self, path, file_id=None):
356
"""Return the content of a file, as lines.
358
:param path: The path of the file.
359
:param file_id: The file_id of the file.
361
If both file_id and path are supplied, an implementation may use
364
return osutils.split_lines(self.get_file_text(path, file_id))
366
def get_file_verifier(self, path, file_id=None, stat_value=None):
367
"""Return a verifier for a file.
369
The default implementation returns a sha1.
371
:param file_id: The handle for this file.
372
:param path: The path that this file can be found at.
373
These must point to the same object.
374
:param stat_value: Optional stat value for the object
375
:return: Tuple with verifier name and verifier data
377
return ("SHA1", self.get_file_sha1(path, file_id,
378
stat_value=stat_value))
380
def get_file_sha1(self, path, file_id=None, stat_value=None):
381
"""Return the SHA1 file for a file.
383
:note: callers should use get_file_verifier instead
384
where possible, as the underlying repository implementation may
385
have quicker access to a non-sha1 verifier.
387
:param path: The path that this file can be found at.
388
:param file_id: The handle for this file.
389
These must point to the same object.
390
:param stat_value: Optional stat value for the object
392
raise NotImplementedError(self.get_file_sha1)
394
def get_file_mtime(self, path, file_id=None):
395
"""Return the modification time for a file.
397
:param path: The path that this file can be found at.
398
:param file_id: The handle for this file.
399
These must point to the same object.
401
raise NotImplementedError(self.get_file_mtime)
403
def get_file_size(self, path, file_id=None):
404
"""Return the size of a file in bytes.
406
This applies only to regular files. If invoked on directories or
407
symlinks, it will return None.
408
:param file_id: The file-id of the file
410
raise NotImplementedError(self.get_file_size)
412
def is_executable(self, path, file_id=None):
413
"""Check if a file is executable.
415
:param path: The path that this file can be found at.
416
:param file_id: The handle for this file.
417
These must point to the same object.
419
raise NotImplementedError(self.is_executable)
421
def iter_files_bytes(self, desired_files):
422
"""Iterate through file contents.
424
Files will not necessarily be returned in the order they occur in
425
desired_files. No specific order is guaranteed.
427
Yields pairs of identifier, bytes_iterator. identifier is an opaque
428
value supplied by the caller as part of desired_files. It should
429
uniquely identify the file version in the caller's context. (Examples:
430
an index number or a TreeTransform trans_id.)
432
bytes_iterator is an iterable of bytestrings for the file. The
433
kind of iterable and length of the bytestrings are unspecified, but for
434
this implementation, it is a tuple containing a single bytestring with
435
the complete text of the file.
437
:param desired_files: a list of (path, identifier) pairs
439
for path, identifier in desired_files:
440
# We wrap the string in a tuple so that we can return an iterable
441
# of bytestrings. (Technically, a bytestring is also an iterable
442
# of bytestrings, but iterating through each character is not
444
cur_file = (self.get_file_text(path),)
445
yield identifier, cur_file
447
def get_symlink_target(self, path, file_id=None):
448
"""Get the target for a given file_id.
450
It is assumed that the caller already knows that file_id is referencing
452
:param file_id: Handle for the symlink entry.
453
:param path: The path of the file.
454
If both file_id and path are supplied, an implementation may use
456
:return: The path the symlink points to.
458
raise NotImplementedError(self.get_symlink_target)
460
def get_root_id(self):
461
"""Return the file_id for the root of this tree."""
462
raise NotImplementedError(self.get_root_id)
464
def annotate_iter(self, path, file_id=None,
465
default_revision=_mod_revision.CURRENT_REVISION):
466
"""Return an iterator of revision_id, line tuples.
468
For working trees (and mutable trees in general), the special
469
revision_id 'current:' will be used for lines that are new in this
470
tree, e.g. uncommitted changes.
471
:param file_id: The file to produce an annotated version from
472
:param default_revision: For lines that don't match a basis, mark them
473
with this revision id. Not all implementations will make use of
476
raise NotImplementedError(self.annotate_iter)
478
def _iter_parent_trees(self):
479
"""Iterate through parent trees, defaulting to Tree.revision_tree."""
480
for revision_id in self.get_parent_ids():
482
yield self.revision_tree(revision_id)
483
except errors.NoSuchRevisionInTree:
484
yield self.repository.revision_tree(revision_id)
486
def path2id(self, path):
487
"""Return the id for path in this tree."""
488
raise NotImplementedError(self.path2id)
490
def is_versioned(self, path):
491
"""Check whether path is versioned.
493
:param path: Path to check
496
return self.path2id(path) is not None
498
def find_related_paths_across_trees(self, paths, trees=[],
499
require_versioned=True):
500
"""Find related paths in tree corresponding to specified filenames in any
503
All matches in all trees will be used, and all children of matched
504
directories will be used.
506
:param paths: The filenames to find related paths for (if None, returns
508
:param trees: The trees to find file_ids within
509
:param require_versioned: if true, all specified filenames must occur in
511
:return: a set of paths for the specified filenames and their children
514
raise NotImplementedError(self.find_related_paths_across_trees)
517
"""Lock this tree for multiple read only operations.
519
:return: A breezy.lock.LogicalLockResult.
521
return lock.LogicalLockResult(self.unlock)
523
def revision_tree(self, revision_id):
524
"""Obtain a revision tree for the revision revision_id.
526
The intention of this method is to allow access to possibly cached
527
tree data. Implementors of this method should raise NoSuchRevision if
528
the tree is not locally available, even if they could obtain the
529
tree via a repository or some other means. Callers are responsible
530
for finding the ultimate source for a revision tree.
532
:param revision_id: The revision_id of the requested tree.
534
:raises: NoSuchRevision if the tree cannot be obtained.
536
raise errors.NoSuchRevisionInTree(self, revision_id)
539
"""What files are present in this tree and unknown.
541
:return: an iterator over the unknown files.
548
def filter_unversioned_files(self, paths):
549
"""Filter out paths that are versioned.
551
:return: set of paths.
553
# NB: we specifically *don't* call self.has_filename, because for
554
# WorkingTrees that can indicate files that exist on disk but that
556
return set(p for p in paths if not self.is_versioned(p))
558
def walkdirs(self, prefix=""):
559
"""Walk the contents of this tree from path down.
561
This yields all the data about the contents of a directory at a time.
562
After each directory has been yielded, if the caller has mutated the
563
list to exclude some directories, they are then not descended into.
565
The data yielded is of the form:
566
((directory-relpath, directory-path-from-root, directory-fileid),
567
[(relpath, basename, kind, lstat, path_from_tree_root, file_id,
568
versioned_kind), ...]),
569
- directory-relpath is the containing dirs relpath from prefix
570
- directory-path-from-root is the containing dirs path from /
571
- directory-fileid is the id of the directory if it is versioned.
572
- relpath is the relative path within the subtree being walked.
573
- basename is the basename
574
- kind is the kind of the file now. If unknonwn then the file is not
575
present within the tree - but it may be recorded as versioned. See
577
- lstat is the stat data *if* the file was statted.
578
- path_from_tree_root is the path from the root of the tree.
579
- file_id is the file_id if the entry is versioned.
580
- versioned_kind is the kind of the file as last recorded in the
581
versioning system. If 'unknown' the file is not versioned.
582
One of 'kind' and 'versioned_kind' must not be 'unknown'.
584
:param prefix: Start walking from prefix within the tree rather than
585
at the root. This allows one to walk a subtree but get paths that are
586
relative to a tree rooted higher up.
587
:return: an iterator over the directory data.
589
raise NotImplementedError(self.walkdirs)
591
def supports_content_filtering(self):
594
def _content_filter_stack(self, path=None):
595
"""The stack of content filters for a path if filtering is supported.
597
Readers will be applied in first-to-last order.
598
Writers will be applied in last-to-first order.
599
Either the path or the file-id needs to be provided.
601
:param path: path relative to the root of the tree
603
:return: the list of filters - [] if there are none
605
filter_pref_names = filters._get_registered_names()
606
if len(filter_pref_names) == 0:
608
prefs = next(self.iter_search_rules([path], filter_pref_names))
609
stk = filters._get_filter_stack_for(prefs)
610
if 'filters' in debug.debug_flags:
612
gettext("*** {0} content-filter: {1} => {2!r}").format(path, prefs, stk))
615
def _content_filter_stack_provider(self):
616
"""A function that returns a stack of ContentFilters.
618
The function takes a path (relative to the top of the tree) and a
619
file-id as parameters.
621
:return: None if content filtering is not supported by this tree.
623
if self.supports_content_filtering():
624
return lambda path, file_id: \
625
self._content_filter_stack(path)
629
def iter_search_rules(self, path_names, pref_names=None,
630
_default_searcher=None):
631
"""Find the preferences for filenames in a tree.
633
:param path_names: an iterable of paths to find attributes for.
634
Paths are given relative to the root of the tree.
635
:param pref_names: the list of preferences to lookup - None for all
636
:param _default_searcher: private parameter to assist testing - don't use
637
:return: an iterator of tuple sequences, one per path-name.
638
See _RulesSearcher.get_items for details on the tuple sequence.
640
if _default_searcher is None:
641
_default_searcher = rules._per_user_searcher
642
searcher = self._get_rules_searcher(_default_searcher)
643
if searcher is not None:
644
if pref_names is not None:
645
for path in path_names:
646
yield searcher.get_selected_items(path, pref_names)
648
for path in path_names:
649
yield searcher.get_items(path)
651
def _get_rules_searcher(self, default_searcher):
652
"""Get the RulesSearcher for this tree given the default one."""
653
searcher = default_searcher
656
def archive(self, format, name, root='', subdir=None,
658
"""Create an archive of this tree.
660
:param format: Format name (e.g. 'tar')
661
:param name: target file name
662
:param root: Root directory name (or None)
663
:param subdir: Subdirectory to export (or None)
664
:return: Iterator over archive chunks
666
from .archive import create_archive
667
with self.lock_read():
668
return create_archive(format, self, name, root,
669
subdir, force_mtime=force_mtime)
672
def versionable_kind(cls, kind):
673
"""Check if this tree support versioning a specific file kind."""
674
return (kind in ('file', 'directory', 'symlink', 'tree-reference'))
677
class InterTree(InterObject):
678
"""This class represents operations taking place between two Trees.
680
Its instances have methods like 'compare' and contain references to the
681
source and target trees these operations are to be carried out on.
683
Clients of breezy should not need to use InterTree directly, rather they
684
should use the convenience methods on Tree such as 'Tree.compare()' which
685
will pass through to InterTree as appropriate.
688
# Formats that will be used to test this InterTree. If both are
689
# None, this InterTree will not be tested (e.g. because a complex
691
_matching_from_tree_format = None
692
_matching_to_tree_format = None
697
def is_compatible(kls, source, target):
698
# The default implementation is naive and uses the public API, so
699
# it works for all trees.
702
def _changes_from_entries(self, source_entry, target_entry, source_path,
704
"""Generate a iter_changes tuple between source_entry and target_entry.
706
:param source_entry: An inventory entry from self.source, or None.
707
:param target_entry: An inventory entry from self.target, or None.
708
:param source_path: The path of source_entry.
709
:param target_path: The path of target_entry.
710
:return: A tuple, item 0 of which is an iter_changes result tuple, and
711
item 1 is True if there are any changes in the result tuple.
713
if source_entry is None:
714
if target_entry is None:
716
file_id = target_entry.file_id
718
file_id = source_entry.file_id
719
if source_entry is not None:
720
source_versioned = True
721
source_name = source_entry.name
722
source_parent = source_entry.parent_id
723
source_kind, source_executable, source_stat = \
724
self.source._comparison_data(source_entry, source_path)
726
source_versioned = False
730
source_executable = None
731
if target_entry is not None:
732
target_versioned = True
733
target_name = target_entry.name
734
target_parent = target_entry.parent_id
735
target_kind, target_executable, target_stat = \
736
self.target._comparison_data(target_entry, target_path)
738
target_versioned = False
742
target_executable = None
743
versioned = (source_versioned, target_versioned)
744
kind = (source_kind, target_kind)
745
changed_content = False
746
if source_kind != target_kind:
747
changed_content = True
748
elif source_kind == 'file':
749
if not self.file_content_matches(
750
source_path, target_path,
751
file_id, file_id, source_stat, target_stat):
752
changed_content = True
753
elif source_kind == 'symlink':
754
if (self.source.get_symlink_target(source_path, file_id) !=
755
self.target.get_symlink_target(target_path, file_id)):
756
changed_content = True
757
elif source_kind == 'tree-reference':
758
if (self.source.get_reference_revision(source_path, file_id)
759
!= self.target.get_reference_revision(target_path, file_id)):
760
changed_content = True
761
parent = (source_parent, target_parent)
762
name = (source_name, target_name)
763
executable = (source_executable, target_executable)
764
if (changed_content is not False or versioned[0] != versioned[1] or
765
parent[0] != parent[1] or name[0] != name[1] or
766
executable[0] != executable[1]):
770
return (file_id, (source_path, target_path), changed_content,
771
versioned, parent, name, kind, executable), changes
773
def compare(self, want_unchanged=False, specific_files=None,
774
extra_trees=None, require_versioned=False, include_root=False,
775
want_unversioned=False):
776
"""Return the changes from source to target.
778
:return: A TreeDelta.
779
:param specific_files: An optional list of file paths to restrict the
780
comparison to. When mapping filenames to ids, all matches in all
781
trees (including optional extra_trees) are used, and all children of
782
matched directories are included.
783
:param want_unchanged: An optional boolean requesting the inclusion of
784
unchanged entries in the result.
785
:param extra_trees: An optional list of additional trees to use when
786
mapping the contents of specific_files (paths) to file_ids.
787
:param require_versioned: An optional boolean (defaults to False). When
788
supplied and True all the 'specific_files' must be versioned, or
789
a PathsNotVersionedError will be thrown.
790
:param want_unversioned: Scan for unversioned paths.
792
trees = (self.source,)
793
if extra_trees is not None:
794
trees = trees + tuple(extra_trees)
795
with self.lock_read():
796
return delta._compare_trees(self.source, self.target, want_unchanged,
797
specific_files, include_root, extra_trees=extra_trees,
798
require_versioned=require_versioned,
799
want_unversioned=want_unversioned)
801
def iter_changes(self, include_unchanged=False,
802
specific_files=None, pb=None, extra_trees=[],
803
require_versioned=True, want_unversioned=False):
804
"""Generate an iterator of changes between trees.
807
(file_id, (path_in_source, path_in_target),
808
changed_content, versioned, parent, name, kind,
811
Changed_content is True if the file's content has changed. This
812
includes changes to its kind, and to a symlink's target.
814
versioned, parent, name, kind, executable are tuples of (from, to).
815
If a file is missing in a tree, its kind is None.
817
Iteration is done in parent-to-child order, relative to the target
820
There is no guarantee that all paths are in sorted order: the
821
requirement to expand the search due to renames may result in children
822
that should be found early being found late in the search, after
823
lexically later results have been returned.
824
:param require_versioned: Raise errors.PathsNotVersionedError if a
825
path in the specific_files list is not versioned in one of
826
source, target or extra_trees.
827
:param specific_files: An optional list of file paths to restrict the
828
comparison to. When mapping filenames to ids, all matches in all
829
trees (including optional extra_trees) are used, and all children
830
of matched directories are included. The parents in the target tree
831
of the specific files up to and including the root of the tree are
832
always evaluated for changes too.
833
:param want_unversioned: Should unversioned files be returned in the
834
output. An unversioned file is defined as one with (False, False)
835
for the versioned pair.
840
extra_trees = list(extra_trees)
841
# The ids of items we need to examine to insure delta consistency.
842
precise_file_ids = set()
843
changed_file_ids = []
844
if specific_files == []:
845
target_specific_files = []
846
source_specific_files = []
848
target_specific_files = self.target.find_related_paths_across_trees(
849
specific_files, [self.source] + extra_trees,
850
require_versioned=require_versioned)
851
source_specific_files = self.source.find_related_paths_across_trees(
852
specific_files, [self.target] + extra_trees,
853
require_versioned=require_versioned)
854
if specific_files is not None:
855
# reparented or added entries must have their parents included
856
# so that valid deltas can be created. The seen_parents set
857
# tracks the parents that we need to have.
858
# The seen_dirs set tracks directory entries we've yielded.
859
# After outputting version object in to_entries we set difference
860
# the two seen sets and start checking parents.
864
all_unversioned = sorted([(p.split('/'), p) for p in
866
if specific_files is None or
867
osutils.is_inside_any(specific_files, p)])
868
all_unversioned = collections.deque(all_unversioned)
870
all_unversioned = collections.deque()
872
from_entries_by_dir = list(self.source.iter_entries_by_dir(
873
specific_files=source_specific_files))
874
from_data = dict((e.file_id, (p, e)) for p, e in from_entries_by_dir)
875
to_entries_by_dir = list(self.target.iter_entries_by_dir(
876
specific_files=target_specific_files))
877
num_entries = len(from_entries_by_dir) + len(to_entries_by_dir)
879
# the unversioned path lookup only occurs on real trees - where there
880
# can be extras. So the fake_entry is solely used to look up
881
# executable it values when execute is not supported.
882
fake_entry = TreeFile()
883
for target_path, target_entry in to_entries_by_dir:
884
while (all_unversioned and
885
all_unversioned[0][0] < target_path.split('/')):
886
unversioned_path = all_unversioned.popleft()
887
target_kind, target_executable, target_stat = \
888
self.target._comparison_data(
889
fake_entry, unversioned_path[1])
890
yield (None, (None, unversioned_path[1]), True, (False, False),
892
(None, unversioned_path[0][-1]),
894
(None, target_executable))
895
source_path, source_entry = from_data.get(target_entry.file_id,
897
result, changes = self._changes_from_entries(source_entry,
898
target_entry, source_path=source_path, target_path=target_path)
899
to_paths[result[0]] = result[1][1]
904
pb.update('comparing files', entry_count, num_entries)
905
if changes or include_unchanged:
906
if specific_files is not None:
907
new_parent_id = result[4][1]
908
precise_file_ids.add(new_parent_id)
909
changed_file_ids.append(result[0])
911
# Ensure correct behaviour for reparented/added specific files.
912
if specific_files is not None:
914
if result[6][1] == 'directory':
915
seen_dirs.add(result[0])
916
# Record parents of reparented/added entries.
917
versioned = result[3]
919
if not versioned[0] or parents[0] != parents[1]:
920
seen_parents.add(parents[1])
921
while all_unversioned:
922
# yield any trailing unversioned paths
923
unversioned_path = all_unversioned.popleft()
924
to_kind, to_executable, to_stat = \
925
self.target._comparison_data(fake_entry, unversioned_path[1])
926
yield (None, (None, unversioned_path[1]), True, (False, False),
928
(None, unversioned_path[0][-1]),
930
(None, to_executable))
931
# Yield all remaining source paths
932
for path, from_entry in from_entries_by_dir:
933
file_id = from_entry.file_id
934
if file_id in to_paths:
937
to_path = find_previous_path(self.source, self.target, path)
940
pb.update('comparing files', entry_count, num_entries)
941
versioned = (True, False)
942
parent = (from_entry.parent_id, None)
943
name = (from_entry.name, None)
944
from_kind, from_executable, stat_value = \
945
self.source._comparison_data(from_entry, path)
946
kind = (from_kind, None)
947
executable = (from_executable, None)
948
changed_content = from_kind is not None
949
# the parent's path is necessarily known at this point.
950
changed_file_ids.append(file_id)
951
yield(file_id, (path, to_path), changed_content, versioned, parent,
952
name, kind, executable)
953
changed_file_ids = set(changed_file_ids)
954
if specific_files is not None:
955
for result in self._handle_precise_ids(precise_file_ids,
959
def _get_entry(self, tree, path):
960
"""Get an inventory entry from a tree, with missing entries as None.
962
If the tree raises NotImplementedError on accessing .inventory, then
963
this is worked around using iter_entries_by_dir on just the file id
966
:param tree: The tree to lookup the entry in.
967
:param file_id: The file_id to lookup.
969
# No inventory available.
971
iterator = tree.iter_entries_by_dir(specific_files=[path])
972
return next(iterator)[1]
973
except StopIteration:
976
def _handle_precise_ids(self, precise_file_ids, changed_file_ids,
977
discarded_changes=None):
978
"""Fill out a partial iter_changes to be consistent.
980
:param precise_file_ids: The file ids of parents that were seen during
982
:param changed_file_ids: The file ids of already emitted items.
983
:param discarded_changes: An optional dict of precalculated
984
iter_changes items which the partial iter_changes had not output
986
:return: A generator of iter_changes items to output.
988
# process parents of things that had changed under the users
989
# requested paths to prevent incorrect paths or parent ids which
990
# aren't in the tree.
991
while precise_file_ids:
992
precise_file_ids.discard(None)
993
# Don't emit file_ids twice
994
precise_file_ids.difference_update(changed_file_ids)
995
if not precise_file_ids:
997
# If the there was something at a given output path in source, we
998
# have to include the entry from source in the delta, or we would
999
# be putting this entry into a used path.
1001
for parent_id in precise_file_ids:
1003
paths.append(self.target.id2path(parent_id))
1004
except errors.NoSuchId:
1005
# This id has been dragged in from the source by delta
1006
# expansion and isn't present in target at all: we don't
1007
# need to check for path collisions on it.
1010
old_id = self.source.path2id(path)
1011
precise_file_ids.add(old_id)
1012
precise_file_ids.discard(None)
1013
current_ids = precise_file_ids
1014
precise_file_ids = set()
1015
# We have to emit all of precise_file_ids that have been altered.
1016
# We may have to output the children of some of those ids if any
1017
# directories have stopped being directories.
1018
for file_id in current_ids:
1020
if discarded_changes:
1021
result = discarded_changes.get(file_id)
1027
source_path = self.source.id2path(file_id)
1028
except errors.NoSuchId:
1032
source_entry = self._get_entry(
1033
self.source, source_path)
1035
target_path = self.target.id2path(file_id)
1036
except errors.NoSuchId:
1040
target_entry = self._get_entry(
1041
self.target, target_path)
1042
result, changes = self._changes_from_entries(
1043
source_entry, target_entry, source_path, target_path)
1046
# Get this parents parent to examine.
1047
new_parent_id = result[4][1]
1048
precise_file_ids.add(new_parent_id)
1050
if (result[6][0] == 'directory' and
1051
result[6][1] != 'directory'):
1052
# This stopped being a directory, the old children have
1054
if source_entry is None:
1055
# Reusing a discarded change.
1056
source_entry = self._get_entry(
1057
self.source, result[1][0])
1058
precise_file_ids.update(
1060
for child in self.source.iter_child_entries(result[1][0]))
1061
changed_file_ids.add(result[0])
1064
def file_content_matches(
1065
self, source_path, target_path,
1066
source_file_id=None, target_file_id=None,
1067
source_stat=None, target_stat=None):
1068
"""Check if two files are the same in the source and target trees.
1070
This only checks that the contents of the files are the same,
1071
it does not touch anything else.
1073
:param source_path: Path of the file in the source tree
1074
:param target_path: Path of the file in the target tree
1075
:param source_file_id: Optional file id of the file in the source tree
1076
:param target_file_id: Optional file id of the file in the target tree
1077
:param source_stat: Optional stat value of the file in the source tree
1078
:param target_stat: Optional stat value of the file in the target tree
1079
:return: Boolean indicating whether the files have the same contents
1081
with self.lock_read():
1082
source_verifier_kind, source_verifier_data = (
1083
self.source.get_file_verifier(
1084
source_path, source_file_id, source_stat))
1085
target_verifier_kind, target_verifier_data = (
1086
self.target.get_file_verifier(
1087
target_path, target_file_id, target_stat))
1088
if source_verifier_kind == target_verifier_kind:
1089
return (source_verifier_data == target_verifier_data)
1090
# Fall back to SHA1 for now
1091
if source_verifier_kind != "SHA1":
1092
source_sha1 = self.source.get_file_sha1(
1093
source_path, source_file_id, source_stat)
1095
source_sha1 = source_verifier_data
1096
if target_verifier_kind != "SHA1":
1097
target_sha1 = self.target.get_file_sha1(
1098
target_path, target_file_id, target_stat)
1100
target_sha1 = target_verifier_data
1101
return (source_sha1 == target_sha1)
1104
InterTree.register_optimiser(InterTree)
1107
class MultiWalker(object):
1108
"""Walk multiple trees simultaneously, getting combined results."""
1110
# Note: This could be written to not assume you can do out-of-order
1111
# lookups. Instead any nodes that don't match in all trees could be
1112
# marked as 'deferred', and then returned in the final cleanup loop.
1113
# For now, I think it is "nicer" to return things as close to the
1114
# "master_tree" order as we can.
1116
def __init__(self, master_tree, other_trees):
1117
"""Create a new MultiWalker.
1119
All trees being walked must implement "iter_entries_by_dir()", such
1120
that they yield (path, object) tuples, where that object will have a
1121
'.file_id' member, that can be used to check equality.
1123
:param master_tree: All trees will be 'slaved' to the master_tree such
1124
that nodes in master_tree will be used as 'first-pass' sync points.
1125
Any nodes that aren't in master_tree will be merged in a second
1127
:param other_trees: A list of other trees to walk simultaneously.
1129
self._master_tree = master_tree
1130
self._other_trees = other_trees
1132
# Keep track of any nodes that were properly processed just out of
1133
# order, that way we don't return them at the end, we don't have to
1134
# track *all* processed file_ids, just the out-of-order ones
1135
self._out_of_order_processed = set()
1138
def _step_one(iterator):
1139
"""Step an iter_entries_by_dir iterator.
1141
:return: (has_more, path, ie)
1142
If has_more is False, path and ie will be None.
1145
path, ie = next(iterator)
1146
except StopIteration:
1147
return False, None, None
1149
return True, path, ie
1152
def _lt_path_by_dirblock(path1, path2):
1153
"""Compare two paths based on what directory they are in.
1155
This generates a sort order, such that all children of a directory are
1156
sorted together, and grandchildren are in the same order as the
1157
children appear. But all grandchildren come after all children.
1159
:param path1: first path
1160
:param path2: the second path
1161
:return: negative number if ``path1`` comes first,
1162
0 if paths are equal
1163
and a positive number if ``path2`` sorts first
1165
# Shortcut this special case
1168
# This is stolen from _dirstate_helpers_py.py, only switching it to
1169
# Unicode objects. Consider using encode_utf8() and then using the
1170
# optimized versions, or maybe writing optimized unicode versions.
1171
if not isinstance(path1, text_type):
1172
raise TypeError("'path1' must be a unicode string, not %s: %r"
1173
% (type(path1), path1))
1174
if not isinstance(path2, text_type):
1175
raise TypeError("'path2' must be a unicode string, not %s: %r"
1176
% (type(path2), path2))
1177
return (MultiWalker._path_to_key(path1) <
1178
MultiWalker._path_to_key(path2))
1181
def _path_to_key(path):
1182
dirname, basename = osutils.split(path)
1183
return (dirname.split(u'/'), basename)
1185
def _lookup_by_file_id(self, extra_entries, other_tree, file_id):
1186
"""Lookup an inventory entry by file_id.
1188
This is called when an entry is missing in the normal order.
1189
Generally this is because a file was either renamed, or it was
1190
deleted/added. If the entry was found in the inventory and not in
1191
extra_entries, it will be added to self._out_of_order_processed
1193
:param extra_entries: A dictionary of {file_id: (path, ie)}. This
1194
should be filled with entries that were found before they were
1195
used. If file_id is present, it will be removed from the
1197
:param other_tree: The Tree to search, in case we didn't find the entry
1199
:param file_id: The file_id to look for
1200
:return: (path, ie) if found or (None, None) if not present.
1202
if file_id in extra_entries:
1203
return extra_entries.pop(file_id)
1204
# TODO: Is id2path better as the first call, or is
1205
# inventory[file_id] better as a first check?
1207
cur_path = other_tree.id2path(file_id)
1208
except errors.NoSuchId:
1210
if cur_path is None:
1213
self._out_of_order_processed.add(file_id)
1214
cur_ie = other_tree.root_inventory.get_entry(file_id)
1215
return (cur_path, cur_ie)
1218
"""Match up the values in the different trees."""
1219
for result in self._walk_master_tree():
1221
self._finish_others()
1222
for result in self._walk_others():
1225
def _walk_master_tree(self):
1226
"""First pass, walk all trees in lock-step.
1228
When we are done, all nodes in the master_tree will have been
1229
processed. _other_walkers, _other_entries, and _others_extra will be
1230
set on 'self' for future processing.
1232
# This iterator has the most "inlining" done, because it tends to touch
1233
# every file in the tree, while the others only hit nodes that don't
1235
master_iterator = self._master_tree.iter_entries_by_dir()
1237
other_walkers = [other.iter_entries_by_dir()
1238
for other in self._other_trees]
1239
other_entries = [self._step_one(walker) for walker in other_walkers]
1240
# Track extra nodes in the other trees
1241
others_extra = [{} for _ in range(len(self._other_trees))]
1243
master_has_more = True
1244
step_one = self._step_one
1245
lookup_by_file_id = self._lookup_by_file_id
1246
out_of_order_processed = self._out_of_order_processed
1248
while master_has_more:
1249
(master_has_more, path, master_ie) = step_one(master_iterator)
1250
if not master_has_more:
1253
file_id = master_ie.file_id
1255
other_values_append = other_values.append
1256
next_other_entries = []
1257
next_other_entries_append = next_other_entries.append
1258
for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
1259
if not other_has_more:
1260
other_values_append(lookup_by_file_id(
1261
others_extra[idx], self._other_trees[idx], file_id))
1262
next_other_entries_append((False, None, None))
1263
elif file_id == other_ie.file_id:
1264
# This is the critical code path, as most of the entries
1265
# should match between most trees.
1266
other_values_append((other_path, other_ie))
1267
next_other_entries_append(step_one(other_walkers[idx]))
1269
# This walker did not match, step it until it either
1270
# matches, or we know we are past the current walker.
1271
other_walker = other_walkers[idx]
1272
other_extra = others_extra[idx]
1273
while (other_has_more and
1274
self._lt_path_by_dirblock(other_path, path)):
1275
other_file_id = other_ie.file_id
1276
if other_file_id not in out_of_order_processed:
1277
other_extra[other_file_id] = (other_path, other_ie)
1278
other_has_more, other_path, other_ie = \
1279
step_one(other_walker)
1280
if other_has_more and other_ie.file_id == file_id:
1281
# We ended up walking to this point, match and step
1283
other_values_append((other_path, other_ie))
1284
other_has_more, other_path, other_ie = \
1285
step_one(other_walker)
1287
# This record isn't in the normal order, see if it
1289
other_values_append(lookup_by_file_id(
1290
other_extra, self._other_trees[idx], file_id))
1291
next_other_entries_append((other_has_more, other_path,
1293
other_entries = next_other_entries
1295
# We've matched all the walkers, yield this datapoint
1296
yield path, file_id, master_ie, other_values
1297
self._other_walkers = other_walkers
1298
self._other_entries = other_entries
1299
self._others_extra = others_extra
1301
def _finish_others(self):
1302
"""Finish walking the other iterators, so we get all entries."""
1303
for idx, info in enumerate(self._other_entries):
1304
other_extra = self._others_extra[idx]
1305
(other_has_more, other_path, other_ie) = info
1306
while other_has_more:
1307
other_file_id = other_ie.file_id
1308
if other_file_id not in self._out_of_order_processed:
1309
other_extra[other_file_id] = (other_path, other_ie)
1310
other_has_more, other_path, other_ie = \
1311
self._step_one(self._other_walkers[idx])
1312
del self._other_entries
1314
def _walk_others(self):
1315
"""Finish up by walking all the 'deferred' nodes."""
1316
# TODO: One alternative would be to grab all possible unprocessed
1317
# file_ids, and then sort by path, and then yield them. That
1318
# might ensure better ordering, in case a caller strictly
1319
# requires parents before children.
1320
for idx, other_extra in enumerate(self._others_extra):
1321
others = sorted(viewvalues(other_extra),
1322
key=lambda x: self._path_to_key(x[0]))
1323
for other_path, other_ie in others:
1324
file_id = other_ie.file_id
1325
# We don't need to check out_of_order_processed here, because
1326
# the lookup_by_file_id will be removing anything processed
1327
# from the extras cache
1328
other_extra.pop(file_id)
1329
other_values = [(None, None)] * idx
1330
other_values.append((other_path, other_ie))
1331
for alt_idx, alt_extra in enumerate(self._others_extra[idx + 1:]):
1332
alt_idx = alt_idx + idx + 1
1333
alt_extra = self._others_extra[alt_idx]
1334
alt_tree = self._other_trees[alt_idx]
1335
other_values.append(self._lookup_by_file_id(
1336
alt_extra, alt_tree, file_id))
1337
yield other_path, file_id, None, other_values
1340
def find_previous_paths(from_tree, to_tree, paths):
1341
"""Find previous tree paths.
1343
:param from_tree: From tree
1344
:param to_tree: To tree
1345
:param paths: Iterable over paths to search for
1346
:return: Dictionary mapping from from_tree paths to paths in to_tree, or
1347
None if there is no equivalent path.
1351
ret[path] = find_previous_path(from_tree, to_tree, path)
1355
def find_previous_path(from_tree, to_tree, path, file_id=None):
1356
"""Find previous tree path.
1358
:param from_tree: From tree
1359
:param to_tree: To tree
1360
:param path: Path to search for
1361
:return: path in to_tree, or None if there is no equivalent path.
1364
file_id = from_tree.path2id(path)
1366
raise errors.NoSuchFile(path)
1368
return to_tree.id2path(file_id)
1369
except errors.NoSuchId:
1373
def get_canonical_path(tree, path, normalize):
1374
"""Find the canonical path of an item, ignoring case.
1376
:param tree: Tree to traverse
1377
:param path: Case-insensitive path to look up
1378
:param normalize: Function to normalize a filename for comparison
1379
:return: The canonical path
1382
cur_id = tree.get_root_id()
1384
bit_iter = iter(path.split("/"))
1385
for elt in bit_iter:
1386
lelt = normalize(elt)
1389
for child in tree.iter_child_entries(cur_path, cur_id):
1391
if child.name == elt:
1392
# if we found an exact match, we can stop now; if
1393
# we found an approximate match we need to keep
1394
# searching because there might be an exact match
1396
cur_id = child.file_id
1397
new_path = osutils.pathjoin(cur_path, child.name)
1399
elif normalize(child.name) == lelt:
1400
cur_id = child.file_id
1401
new_path = osutils.pathjoin(cur_path, child.name)
1402
except errors.NoSuchId:
1403
# before a change is committed we can see this error...
1405
except errors.NotADirectory:
1410
# got to the end of this directory and no entries matched.
1411
# Return what matched so far, plus the rest as specified.
1412
cur_path = osutils.pathjoin(cur_path, elt, *list(bit_iter))