1
# Copyright (C) 2005-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tree classes, representing directory at point in time.
20
from __future__ import absolute_import
24
from .lazy_import import lazy_import
25
lazy_import(globals(), """
29
conflicts as _mod_conflicts,
35
revision as _mod_revision,
39
from breezy.bzr import (
42
from breezy.i18n import gettext
45
from .decorators import needs_read_lock
46
from .inter import InterObject
53
"""Abstract file tree.
55
There are several subclasses:
57
* `WorkingTree` exists as files on disk editable by the user.
59
* `RevisionTree` is a tree as recorded at some point in the past.
61
Trees can be compared, etc, regardless of whether they are working
62
trees or versioned trees.
65
def has_versioned_directories(self):
66
"""Whether this tree can contain explicitly versioned directories.
68
This defaults to True, but some implementations may want to override
73
def changes_from(self, other, want_unchanged=False, specific_files=None,
74
extra_trees=None, require_versioned=False, include_root=False,
75
want_unversioned=False):
76
"""Return a TreeDelta of the changes from other to this tree.
78
:param other: A tree to compare with.
79
:param specific_files: An optional list of file paths to restrict the
80
comparison to. When mapping filenames to ids, all matches in all
81
trees (including optional extra_trees) are used, and all children of
82
matched directories are included.
83
:param want_unchanged: An optional boolean requesting the inclusion of
84
unchanged entries in the result.
85
:param extra_trees: An optional list of additional trees to use when
86
mapping the contents of specific_files (paths) to file_ids.
87
:param require_versioned: An optional boolean (defaults to False). When
88
supplied and True all the 'specific_files' must be versioned, or
89
a PathsNotVersionedError will be thrown.
90
:param want_unversioned: Scan for unversioned paths.
92
The comparison will be performed by an InterTree object looked up on
95
# Martin observes that Tree.changes_from returns a TreeDelta and this
96
# may confuse people, because the class name of the returned object is
97
# a synonym of the object referenced in the method name.
98
return InterTree.get(other, self).compare(
99
want_unchanged=want_unchanged,
100
specific_files=specific_files,
101
extra_trees=extra_trees,
102
require_versioned=require_versioned,
103
include_root=include_root,
104
want_unversioned=want_unversioned,
107
def iter_changes(self, from_tree, include_unchanged=False,
108
specific_files=None, pb=None, extra_trees=None,
109
require_versioned=True, want_unversioned=False):
110
"""See InterTree.iter_changes"""
111
intertree = InterTree.get(from_tree, self)
112
return intertree.iter_changes(include_unchanged, specific_files, pb,
113
extra_trees, require_versioned, want_unversioned=want_unversioned)
116
"""Get a list of the conflicts in the tree.
118
Each conflict is an instance of breezy.conflicts.Conflict.
120
return _mod_conflicts.ConflictList()
123
"""For trees that can have unversioned files, return all such paths."""
126
def get_parent_ids(self):
127
"""Get the parent ids for this tree.
129
:return: a list of parent ids. [] is returned to indicate
130
a tree with no parents.
131
:raises: BzrError if the parents are not known.
133
raise NotImplementedError(self.get_parent_ids)
135
def has_filename(self, filename):
136
"""True if the tree has given filename."""
137
raise NotImplementedError(self.has_filename)
139
def has_id(self, file_id):
140
raise NotImplementedError(self.has_id)
142
def has_or_had_id(self, file_id):
143
raise NotImplementedError(self.has_or_had_id)
145
def is_ignored(self, filename):
146
"""Check whether the filename is ignored by this tree.
148
:param filename: The relative filename within the tree.
149
:return: True if the filename is ignored.
153
def all_file_ids(self):
154
"""Iterate through all file ids, including ids for missing files."""
155
raise NotImplementedError(self.all_file_ids)
157
def id2path(self, file_id):
158
"""Return the path for a file id.
162
raise NotImplementedError(self.id2path)
164
def iter_entries_by_dir(self, specific_file_ids=None, yield_parents=False):
165
"""Walk the tree in 'by_dir' order.
167
This will yield each entry in the tree as a (path, entry) tuple.
168
The order that they are yielded is:
170
Directories are walked in a depth-first lexicographical order,
171
however, whenever a directory is reached, all of its direct child
172
nodes are yielded in lexicographical order before yielding the
175
For example, in the tree::
185
The yield order (ignoring root) would be::
187
a, f, a/b, a/d, a/b/c, a/d/e, f/g
189
:param yield_parents: If True, yield the parents from the root leading
190
down to specific_file_ids that have been requested. This has no
191
impact if specific_file_ids is None.
193
raise NotImplementedError(self.iter_entries_by_dir)
195
def iter_child_entries(self, file_id, path=None):
196
"""Iterate over the children of a directory or tree reference.
198
:param file_id: File id of the directory/tree-reference
199
:param path: Optional path of the directory
200
:raise NoSuchId: When the file_id does not exist
201
:return: Iterator over entries in the directory
203
raise NotImplementedError(self.iter_child_entries)
205
def list_files(self, include_root=False, from_dir=None, recursive=True):
206
"""List all files in this tree.
208
:param include_root: Whether to include the entry for the tree root
209
:param from_dir: Directory under which to list files
210
:param recursive: Whether to list files recursively
211
:return: iterator over tuples of (path, versioned, kind, file_id,
214
raise NotImplementedError(self.list_files)
216
def iter_references(self):
217
if self.supports_tree_reference():
218
for path, entry in self.iter_entries_by_dir():
219
if entry.kind == 'tree-reference':
220
yield path, entry.file_id
222
def kind(self, file_id):
223
raise NotImplementedError("Tree subclass %s must implement kind"
224
% self.__class__.__name__)
226
def stored_kind(self, file_id):
227
"""File kind stored for this file_id.
229
May not match kind on disk for working trees. Always available
230
for versioned files, even when the file itself is missing.
232
return self.kind(file_id)
234
def path_content_summary(self, path):
235
"""Get a summary of the information about path.
237
All the attributes returned are for the canonical form, not the
238
convenient form (if content filters are in use.)
240
:param path: A relative path within the tree.
241
:return: A tuple containing kind, size, exec, sha1-or-link.
242
Kind is always present (see tree.kind()).
243
size is present if kind is file and the size of the
244
canonical form can be cheaply determined, None otherwise.
245
exec is None unless kind is file and the platform supports the 'x'
247
sha1-or-link is the link target if kind is symlink, or the sha1 if
248
it can be obtained without reading the file.
250
raise NotImplementedError(self.path_content_summary)
252
def get_reference_revision(self, file_id, path=None):
253
raise NotImplementedError("Tree subclass %s must implement "
254
"get_reference_revision"
255
% self.__class__.__name__)
257
def _comparison_data(self, entry, path):
258
"""Return a tuple of kind, executable, stat_value for a file.
260
entry may be None if there is no inventory entry for the file, but
261
path must always be supplied.
263
kind is None if there is no file present (even if an inventory id is
264
present). executable is False for non-file entries.
266
raise NotImplementedError(self._comparison_data)
268
def _file_size(self, entry, stat_value):
269
raise NotImplementedError(self._file_size)
271
def get_file(self, file_id, path=None):
272
"""Return a file object for the file file_id in the tree.
274
If both file_id and path are defined, it is implementation defined as
275
to which one is used.
277
raise NotImplementedError(self.get_file)
279
def get_file_with_stat(self, file_id, path=None):
280
"""Get a file handle and stat object for file_id.
282
The default implementation returns (self.get_file, None) for backwards
285
:param file_id: The file id to read.
286
:param path: The path of the file, if it is known.
287
:return: A tuple (file_handle, stat_value_or_None). If the tree has
288
no stat facility, or need for a stat cache feedback during commit,
289
it may return None for the second element of the tuple.
291
return (self.get_file(file_id, path), None)
293
def get_file_text(self, file_id, path=None):
294
"""Return the byte content of a file.
296
:param file_id: The file_id of the file.
297
:param path: The path of the file.
299
If both file_id and path are supplied, an implementation may use
302
:returns: A single byte string for the whole file.
304
my_file = self.get_file(file_id, path)
306
return my_file.read()
310
def get_file_lines(self, file_id, path=None):
311
"""Return the content of a file, as lines.
313
:param file_id: The file_id of the file.
314
:param path: The path of the file.
316
If both file_id and path are supplied, an implementation may use
319
return osutils.split_lines(self.get_file_text(file_id, path))
321
def get_file_verifier(self, file_id, path=None, stat_value=None):
322
"""Return a verifier for a file.
324
The default implementation returns a sha1.
326
:param file_id: The handle for this file.
327
:param path: The path that this file can be found at.
328
These must point to the same object.
329
:param stat_value: Optional stat value for the object
330
:return: Tuple with verifier name and verifier data
332
return ("SHA1", self.get_file_sha1(file_id, path=path,
333
stat_value=stat_value))
335
def get_file_sha1(self, file_id, path=None, stat_value=None):
336
"""Return the SHA1 file for a file.
338
:note: callers should use get_file_verifier instead
339
where possible, as the underlying repository implementation may
340
have quicker access to a non-sha1 verifier.
342
:param file_id: The handle for this file.
343
:param path: The path that this file can be found at.
344
These must point to the same object.
345
:param stat_value: Optional stat value for the object
347
raise NotImplementedError(self.get_file_sha1)
349
def get_file_mtime(self, file_id, path=None):
350
"""Return the modification time for a file.
352
:param file_id: The handle for this file.
353
:param path: The path that this file can be found at.
354
These must point to the same object.
356
raise NotImplementedError(self.get_file_mtime)
358
def get_file_size(self, file_id):
359
"""Return the size of a file in bytes.
361
This applies only to regular files. If invoked on directories or
362
symlinks, it will return None.
363
:param file_id: The file-id of the file
365
raise NotImplementedError(self.get_file_size)
367
def is_executable(self, file_id, path=None):
368
"""Check if a file is executable.
370
:param file_id: The handle for this file.
371
:param path: The path that this file can be found at.
372
These must point to the same object.
374
raise NotImplementedError(self.is_executable)
376
def iter_files_bytes(self, desired_files):
377
"""Iterate through file contents.
379
Files will not necessarily be returned in the order they occur in
380
desired_files. No specific order is guaranteed.
382
Yields pairs of identifier, bytes_iterator. identifier is an opaque
383
value supplied by the caller as part of desired_files. It should
384
uniquely identify the file version in the caller's context. (Examples:
385
an index number or a TreeTransform trans_id.)
387
bytes_iterator is an iterable of bytestrings for the file. The
388
kind of iterable and length of the bytestrings are unspecified, but for
389
this implementation, it is a tuple containing a single bytestring with
390
the complete text of the file.
392
:param desired_files: a list of (file_id, identifier) pairs
394
for file_id, identifier in desired_files:
395
# We wrap the string in a tuple so that we can return an iterable
396
# of bytestrings. (Technically, a bytestring is also an iterable
397
# of bytestrings, but iterating through each character is not
399
cur_file = (self.get_file_text(file_id),)
400
yield identifier, cur_file
402
def get_symlink_target(self, file_id, path=None):
403
"""Get the target for a given file_id.
405
It is assumed that the caller already knows that file_id is referencing
407
:param file_id: Handle for the symlink entry.
408
:param path: The path of the file.
409
If both file_id and path are supplied, an implementation may use
411
:return: The path the symlink points to.
413
raise NotImplementedError(self.get_symlink_target)
415
def get_root_id(self):
416
"""Return the file_id for the root of this tree."""
417
raise NotImplementedError(self.get_root_id)
419
def annotate_iter(self, file_id,
420
default_revision=_mod_revision.CURRENT_REVISION):
421
"""Return an iterator of revision_id, line tuples.
423
For working trees (and mutable trees in general), the special
424
revision_id 'current:' will be used for lines that are new in this
425
tree, e.g. uncommitted changes.
426
:param file_id: The file to produce an annotated version from
427
:param default_revision: For lines that don't match a basis, mark them
428
with this revision id. Not all implementations will make use of
431
raise NotImplementedError(self.annotate_iter)
433
def _get_plan_merge_data(self, file_id, other, base):
434
from .bzr import versionedfile
435
vf = versionedfile._PlanMergeVersionedFile(file_id)
436
last_revision_a = self._get_file_revision(file_id, vf, 'this:')
437
last_revision_b = other._get_file_revision(file_id, vf, 'other:')
439
last_revision_base = None
441
last_revision_base = base._get_file_revision(file_id, vf, 'base:')
442
return vf, last_revision_a, last_revision_b, last_revision_base
444
def plan_file_merge(self, file_id, other, base=None):
445
"""Generate a merge plan based on annotations.
447
If the file contains uncommitted changes in this tree, they will be
448
attributed to the 'current:' pseudo-revision. If the file contains
449
uncommitted changes in the other tree, they will be assigned to the
450
'other:' pseudo-revision.
452
data = self._get_plan_merge_data(file_id, other, base)
453
vf, last_revision_a, last_revision_b, last_revision_base = data
454
return vf.plan_merge(last_revision_a, last_revision_b,
457
def plan_file_lca_merge(self, file_id, other, base=None):
458
"""Generate a merge plan based lca-newness.
460
If the file contains uncommitted changes in this tree, they will be
461
attributed to the 'current:' pseudo-revision. If the file contains
462
uncommitted changes in the other tree, they will be assigned to the
463
'other:' pseudo-revision.
465
data = self._get_plan_merge_data(file_id, other, base)
466
vf, last_revision_a, last_revision_b, last_revision_base = data
467
return vf.plan_lca_merge(last_revision_a, last_revision_b,
470
def _iter_parent_trees(self):
471
"""Iterate through parent trees, defaulting to Tree.revision_tree."""
472
for revision_id in self.get_parent_ids():
474
yield self.revision_tree(revision_id)
475
except errors.NoSuchRevisionInTree:
476
yield self.repository.revision_tree(revision_id)
478
def _get_file_revision(self, file_id, vf, tree_revision):
479
"""Ensure that file_id, tree_revision is in vf to plan the merge."""
481
if getattr(self, '_repository', None) is None:
482
last_revision = tree_revision
483
parent_keys = [(file_id, t.get_file_revision(file_id)) for t in
484
self._iter_parent_trees()]
485
vf.add_lines((file_id, last_revision), parent_keys,
486
self.get_file_lines(file_id))
487
repo = self.branch.repository
490
last_revision = self.get_file_revision(file_id)
491
base_vf = self._repository.texts
492
if base_vf not in vf.fallback_versionedfiles:
493
vf.fallback_versionedfiles.append(base_vf)
496
def _check_retrieved(self, ie, f):
499
fp = osutils.fingerprint_file(f)
502
if ie.text_size is not None:
503
if ie.text_size != fp['size']:
504
raise errors.BzrError(
505
"mismatched size for file %r in %r" %
506
(ie.file_id, self._store),
507
["inventory expects %d bytes" % ie.text_size,
508
"file is actually %d bytes" % fp['size'],
509
"store is probably damaged/corrupt"])
511
if ie.text_sha1 != fp['sha1']:
512
raise errors.BzrError("wrong SHA-1 for file %r in %r" %
513
(ie.file_id, self._store),
514
["inventory expects %s" % ie.text_sha1,
515
"file is actually %s" % fp['sha1'],
516
"store is probably damaged/corrupt"])
518
def path2id(self, path):
519
"""Return the id for path in this tree."""
520
raise NotImplementedError(self.path2id)
522
def paths2ids(self, paths, trees=[], require_versioned=True):
523
"""Return all the ids that can be reached by walking from paths.
525
Each path is looked up in this tree and any extras provided in
526
trees, and this is repeated recursively: the children in an extra tree
527
of a directory that has been renamed under a provided path in this tree
528
are all returned, even if none exist under a provided path in this
529
tree, and vice versa.
531
:param paths: An iterable of paths to start converting to ids from.
532
Alternatively, if paths is None, no ids should be calculated and None
533
will be returned. This is offered to make calling the api unconditional
534
for code that *might* take a list of files.
535
:param trees: Additional trees to consider.
536
:param require_versioned: If False, do not raise NotVersionedError if
537
an element of paths is not versioned in this tree and all of trees.
539
return find_ids_across_trees(paths, [self] + list(trees), require_versioned)
541
def iter_children(self, file_id):
542
"""Iterate over the file ids of the children of an entry.
544
:param file_id: File id of the entry
545
:return: Iterator over child file ids.
547
raise NotImplementedError(self.iter_children)
550
"""Lock this tree for multiple read only operations.
552
:return: A breezy.lock.LogicalLockResult.
556
def revision_tree(self, revision_id):
557
"""Obtain a revision tree for the revision revision_id.
559
The intention of this method is to allow access to possibly cached
560
tree data. Implementors of this method should raise NoSuchRevision if
561
the tree is not locally available, even if they could obtain the
562
tree via a repository or some other means. Callers are responsible
563
for finding the ultimate source for a revision tree.
565
:param revision_id: The revision_id of the requested tree.
567
:raises: NoSuchRevision if the tree cannot be obtained.
569
raise errors.NoSuchRevisionInTree(self, revision_id)
572
"""What files are present in this tree and unknown.
574
:return: an iterator over the unknown files.
581
def filter_unversioned_files(self, paths):
582
"""Filter out paths that are versioned.
584
:return: set of paths.
586
raise NotImplementedError(self.filter_unversioned_files)
588
def walkdirs(self, prefix=""):
589
"""Walk the contents of this tree from path down.
591
This yields all the data about the contents of a directory at a time.
592
After each directory has been yielded, if the caller has mutated the
593
list to exclude some directories, they are then not descended into.
595
The data yielded is of the form:
596
((directory-relpath, directory-path-from-root, directory-fileid),
597
[(relpath, basename, kind, lstat, path_from_tree_root, file_id,
598
versioned_kind), ...]),
599
- directory-relpath is the containing dirs relpath from prefix
600
- directory-path-from-root is the containing dirs path from /
601
- directory-fileid is the id of the directory if it is versioned.
602
- relpath is the relative path within the subtree being walked.
603
- basename is the basename
604
- kind is the kind of the file now. If unknonwn then the file is not
605
present within the tree - but it may be recorded as versioned. See
607
- lstat is the stat data *if* the file was statted.
608
- path_from_tree_root is the path from the root of the tree.
609
- file_id is the file_id if the entry is versioned.
610
- versioned_kind is the kind of the file as last recorded in the
611
versioning system. If 'unknown' the file is not versioned.
612
One of 'kind' and 'versioned_kind' must not be 'unknown'.
614
:param prefix: Start walking from prefix within the tree rather than
615
at the root. This allows one to walk a subtree but get paths that are
616
relative to a tree rooted higher up.
617
:return: an iterator over the directory data.
619
raise NotImplementedError(self.walkdirs)
621
def supports_content_filtering(self):
624
def _content_filter_stack(self, path=None, file_id=None):
625
"""The stack of content filters for a path if filtering is supported.
627
Readers will be applied in first-to-last order.
628
Writers will be applied in last-to-first order.
629
Either the path or the file-id needs to be provided.
631
:param path: path relative to the root of the tree
633
:param file_id: file_id or None if unknown
634
:return: the list of filters - [] if there are none
636
filter_pref_names = filters._get_registered_names()
637
if len(filter_pref_names) == 0:
640
path = self.id2path(file_id)
641
prefs = next(self.iter_search_rules([path], filter_pref_names))
642
stk = filters._get_filter_stack_for(prefs)
643
if 'filters' in debug.debug_flags:
644
trace.note(gettext("*** {0} content-filter: {1} => {2!r}").format(path,prefs,stk))
647
def _content_filter_stack_provider(self):
648
"""A function that returns a stack of ContentFilters.
650
The function takes a path (relative to the top of the tree) and a
651
file-id as parameters.
653
:return: None if content filtering is not supported by this tree.
655
if self.supports_content_filtering():
656
return lambda path, file_id: \
657
self._content_filter_stack(path, file_id)
661
def iter_search_rules(self, path_names, pref_names=None,
662
_default_searcher=None):
663
"""Find the preferences for filenames in a tree.
665
:param path_names: an iterable of paths to find attributes for.
666
Paths are given relative to the root of the tree.
667
:param pref_names: the list of preferences to lookup - None for all
668
:param _default_searcher: private parameter to assist testing - don't use
669
:return: an iterator of tuple sequences, one per path-name.
670
See _RulesSearcher.get_items for details on the tuple sequence.
672
if _default_searcher is None:
673
_default_searcher = rules._per_user_searcher
674
searcher = self._get_rules_searcher(_default_searcher)
675
if searcher is not None:
676
if pref_names is not None:
677
for path in path_names:
678
yield searcher.get_selected_items(path, pref_names)
680
for path in path_names:
681
yield searcher.get_items(path)
683
def _get_rules_searcher(self, default_searcher):
684
"""Get the RulesSearcher for this tree given the default one."""
685
searcher = default_searcher
689
class InventoryTree(Tree):
690
"""A tree that relies on an inventory for its metadata.
692
Trees contain an `Inventory` object, and also know how to retrieve
693
file texts mentioned in the inventory, either from a working
694
directory or from a store.
696
It is possible for trees to contain files that are not described
697
in their inventory or vice versa; for this use `filenames()`.
699
Subclasses should set the _inventory attribute, which is considered
700
private to external API users.
703
def get_canonical_inventory_paths(self, paths):
704
"""Like get_canonical_inventory_path() but works on multiple items.
706
:param paths: A sequence of paths relative to the root of the tree.
707
:return: A list of paths, with each item the corresponding input path
708
adjusted to account for existing elements that match case
711
return list(self._yield_canonical_inventory_paths(paths))
713
def get_canonical_inventory_path(self, path):
714
"""Returns the first inventory item that case-insensitively matches path.
716
If a path matches exactly, it is returned. If no path matches exactly
717
but more than one path matches case-insensitively, it is implementation
718
defined which is returned.
720
If no path matches case-insensitively, the input path is returned, but
721
with as many path entries that do exist changed to their canonical
724
If you need to resolve many names from the same tree, you should
725
use get_canonical_inventory_paths() to avoid O(N) behaviour.
727
:param path: A paths relative to the root of the tree.
728
:return: The input path adjusted to account for existing elements
729
that match case insensitively.
731
return next(self._yield_canonical_inventory_paths([path]))
733
def _yield_canonical_inventory_paths(self, paths):
735
# First, if the path as specified exists exactly, just use it.
736
if self.path2id(path) is not None:
740
cur_id = self.get_root_id()
742
bit_iter = iter(path.split("/"))
746
for child in self.iter_children(cur_id):
748
# XXX: it seem like if the child is known to be in the
749
# tree, we shouldn't need to go from its id back to
750
# its path -- mbp 2010-02-11
752
# XXX: it seems like we could be more efficient
753
# by just directly looking up the original name and
754
# only then searching all children; also by not
755
# chopping paths so much. -- mbp 2010-02-11
756
child_base = os.path.basename(self.id2path(child))
757
if (child_base == elt):
758
# if we found an exact match, we can stop now; if
759
# we found an approximate match we need to keep
760
# searching because there might be an exact match
763
new_path = osutils.pathjoin(cur_path, child_base)
765
elif child_base.lower() == lelt:
767
new_path = osutils.pathjoin(cur_path, child_base)
768
except errors.NoSuchId:
769
# before a change is committed we can see this error...
774
# got to the end of this directory and no entries matched.
775
# Return what matched so far, plus the rest as specified.
776
cur_path = osutils.pathjoin(cur_path, elt, *list(bit_iter))
781
def _get_root_inventory(self):
782
return self._inventory
784
root_inventory = property(_get_root_inventory,
785
doc="Root inventory of this tree")
787
def _unpack_file_id(self, file_id):
788
"""Find the inventory and inventory file id for a tree file id.
790
:param file_id: The tree file id, as bytestring or tuple
791
:return: Inventory and inventory file id
793
if isinstance(file_id, tuple):
794
if len(file_id) != 1:
795
raise ValueError("nested trees not yet supported: %r" % file_id)
797
return self.root_inventory, file_id
800
def path2id(self, path):
801
"""Return the id for path in this tree."""
802
return self._path2inv_file_id(path)[1]
804
def _path2inv_file_id(self, path):
805
"""Lookup a inventory and inventory file id by path.
807
:param path: Path to look up
808
:return: tuple with inventory and inventory file id
810
# FIXME: Support nested trees
811
return self.root_inventory, self.root_inventory.path2id(path)
813
def id2path(self, file_id):
814
"""Return the path for a file id.
818
inventory, file_id = self._unpack_file_id(file_id)
819
return inventory.id2path(file_id)
821
def has_id(self, file_id):
822
inventory, file_id = self._unpack_file_id(file_id)
823
return inventory.has_id(file_id)
825
def has_or_had_id(self, file_id):
826
inventory, file_id = self._unpack_file_id(file_id)
827
return inventory.has_id(file_id)
829
def all_file_ids(self):
830
return {entry.file_id for path, entry in self.iter_entries_by_dir()}
832
def filter_unversioned_files(self, paths):
833
"""Filter out paths that are versioned.
835
:return: set of paths.
837
# NB: we specifically *don't* call self.has_filename, because for
838
# WorkingTrees that can indicate files that exist on disk but that
840
return set((p for p in paths if self.path2id(p) is None))
843
def iter_entries_by_dir(self, specific_file_ids=None, yield_parents=False):
844
"""Walk the tree in 'by_dir' order.
846
This will yield each entry in the tree as a (path, entry) tuple.
847
The order that they are yielded is:
849
See Tree.iter_entries_by_dir for details.
851
:param yield_parents: If True, yield the parents from the root leading
852
down to specific_file_ids that have been requested. This has no
853
impact if specific_file_ids is None.
855
if specific_file_ids is None:
856
inventory_file_ids = None
858
inventory_file_ids = []
859
for tree_file_id in specific_file_ids:
860
inventory, inv_file_id = self._unpack_file_id(tree_file_id)
861
if not inventory is self.root_inventory: # for now
862
raise AssertionError("%r != %r" % (
863
inventory, self.root_inventory))
864
inventory_file_ids.append(inv_file_id)
865
# FIXME: Handle nested trees
866
return self.root_inventory.iter_entries_by_dir(
867
specific_file_ids=inventory_file_ids, yield_parents=yield_parents)
870
def iter_child_entries(self, file_id, path=None):
871
inv, inv_file_id = self._unpack_file_id(file_id)
872
return iter(viewvalues(inv[inv_file_id].children))
874
def iter_children(self, file_id, path=None):
875
"""See Tree.iter_children."""
876
entry = self.iter_entries_by_dir([file_id]).next()[1]
877
for child in viewvalues(getattr(entry, 'children', {})):
881
def find_ids_across_trees(filenames, trees, require_versioned=True):
882
"""Find the ids corresponding to specified filenames.
884
All matches in all trees will be used, and all children of matched
885
directories will be used.
887
:param filenames: The filenames to find file_ids for (if None, returns
889
:param trees: The trees to find file_ids within
890
:param require_versioned: if true, all specified filenames must occur in
892
:return: a set of file ids for the specified filenames and their children.
896
specified_path_ids = _find_ids_across_trees(filenames, trees,
898
return _find_children_across_trees(specified_path_ids, trees)
901
def _find_ids_across_trees(filenames, trees, require_versioned):
902
"""Find the ids corresponding to specified filenames.
904
All matches in all trees will be used, but subdirectories are not scanned.
906
:param filenames: The filenames to find file_ids for
907
:param trees: The trees to find file_ids within
908
:param require_versioned: if true, all specified filenames must occur in
910
:return: a set of file ids for the specified filenames
913
interesting_ids = set()
914
for tree_path in filenames:
917
file_id = tree.path2id(tree_path)
918
if file_id is not None:
919
interesting_ids.add(file_id)
922
not_versioned.append(tree_path)
923
if len(not_versioned) > 0 and require_versioned:
924
raise errors.PathsNotVersionedError(not_versioned)
925
return interesting_ids
928
def _find_children_across_trees(specified_ids, trees):
929
"""Return a set including specified ids and their children.
931
All matches in all trees will be used.
933
:param trees: The trees to find file_ids within
934
:return: a set containing all specified ids and their children
936
interesting_ids = set(specified_ids)
937
pending = interesting_ids
938
# now handle children of interesting ids
939
# we loop so that we handle all children of each id in both trees
940
while len(pending) > 0:
942
for file_id in pending:
944
if not tree.has_or_had_id(file_id):
946
for child_id in tree.iter_children(file_id):
947
if child_id not in interesting_ids:
948
new_pending.add(child_id)
949
interesting_ids.update(new_pending)
950
pending = new_pending
951
return interesting_ids
954
class InterTree(InterObject):
955
"""This class represents operations taking place between two Trees.
957
Its instances have methods like 'compare' and contain references to the
958
source and target trees these operations are to be carried out on.
960
Clients of breezy should not need to use InterTree directly, rather they
961
should use the convenience methods on Tree such as 'Tree.compare()' which
962
will pass through to InterTree as appropriate.
965
# Formats that will be used to test this InterTree. If both are
966
# None, this InterTree will not be tested (e.g. because a complex
968
_matching_from_tree_format = None
969
_matching_to_tree_format = None
974
def is_compatible(kls, source, target):
975
# The default implementation is naive and uses the public API, so
976
# it works for all trees.
979
def _changes_from_entries(self, source_entry, target_entry,
980
source_path=None, target_path=None):
981
"""Generate a iter_changes tuple between source_entry and target_entry.
983
:param source_entry: An inventory entry from self.source, or None.
984
:param target_entry: An inventory entry from self.target, or None.
985
:param source_path: The path of source_entry, if known. If not known
986
it will be looked up.
987
:param target_path: The path of target_entry, if known. If not known
988
it will be looked up.
989
:return: A tuple, item 0 of which is an iter_changes result tuple, and
990
item 1 is True if there are any changes in the result tuple.
992
if source_entry is None:
993
if target_entry is None:
995
file_id = target_entry.file_id
997
file_id = source_entry.file_id
998
if source_entry is not None:
999
source_versioned = True
1000
source_name = source_entry.name
1001
source_parent = source_entry.parent_id
1002
if source_path is None:
1003
source_path = self.source.id2path(file_id)
1004
source_kind, source_executable, source_stat = \
1005
self.source._comparison_data(source_entry, source_path)
1007
source_versioned = False
1009
source_parent = None
1011
source_executable = None
1012
if target_entry is not None:
1013
target_versioned = True
1014
target_name = target_entry.name
1015
target_parent = target_entry.parent_id
1016
if target_path is None:
1017
target_path = self.target.id2path(file_id)
1018
target_kind, target_executable, target_stat = \
1019
self.target._comparison_data(target_entry, target_path)
1021
target_versioned = False
1023
target_parent = None
1025
target_executable = None
1026
versioned = (source_versioned, target_versioned)
1027
kind = (source_kind, target_kind)
1028
changed_content = False
1029
if source_kind != target_kind:
1030
changed_content = True
1031
elif source_kind == 'file':
1032
if not self.file_content_matches(file_id, file_id, source_path,
1033
target_path, source_stat, target_stat):
1034
changed_content = True
1035
elif source_kind == 'symlink':
1036
if (self.source.get_symlink_target(file_id) !=
1037
self.target.get_symlink_target(file_id)):
1038
changed_content = True
1039
elif source_kind == 'tree-reference':
1040
if (self.source.get_reference_revision(file_id, source_path)
1041
!= self.target.get_reference_revision(file_id, target_path)):
1042
changed_content = True
1043
parent = (source_parent, target_parent)
1044
name = (source_name, target_name)
1045
executable = (source_executable, target_executable)
1046
if (changed_content is not False or versioned[0] != versioned[1]
1047
or parent[0] != parent[1] or name[0] != name[1] or
1048
executable[0] != executable[1]):
1052
return (file_id, (source_path, target_path), changed_content,
1053
versioned, parent, name, kind, executable), changes
1056
def compare(self, want_unchanged=False, specific_files=None,
1057
extra_trees=None, require_versioned=False, include_root=False,
1058
want_unversioned=False):
1059
"""Return the changes from source to target.
1061
:return: A TreeDelta.
1062
:param specific_files: An optional list of file paths to restrict the
1063
comparison to. When mapping filenames to ids, all matches in all
1064
trees (including optional extra_trees) are used, and all children of
1065
matched directories are included.
1066
:param want_unchanged: An optional boolean requesting the inclusion of
1067
unchanged entries in the result.
1068
:param extra_trees: An optional list of additional trees to use when
1069
mapping the contents of specific_files (paths) to file_ids.
1070
:param require_versioned: An optional boolean (defaults to False). When
1071
supplied and True all the 'specific_files' must be versioned, or
1072
a PathsNotVersionedError will be thrown.
1073
:param want_unversioned: Scan for unversioned paths.
1075
trees = (self.source,)
1076
if extra_trees is not None:
1077
trees = trees + tuple(extra_trees)
1078
# target is usually the newer tree:
1079
specific_file_ids = self.target.paths2ids(specific_files, trees,
1080
require_versioned=require_versioned)
1081
if specific_files and not specific_file_ids:
1082
# All files are unversioned, so just return an empty delta
1083
# _compare_trees would think we want a complete delta
1084
result = delta.TreeDelta()
1085
fake_entry = inventory.InventoryFile('unused', 'unused', 'unused')
1086
result.unversioned = [(path, None,
1087
self.target._comparison_data(fake_entry, path)[0]) for path in
1090
return delta._compare_trees(self.source, self.target, want_unchanged,
1091
specific_files, include_root, extra_trees=extra_trees,
1092
require_versioned=require_versioned,
1093
want_unversioned=want_unversioned)
1095
def iter_changes(self, include_unchanged=False,
1096
specific_files=None, pb=None, extra_trees=[],
1097
require_versioned=True, want_unversioned=False):
1098
"""Generate an iterator of changes between trees.
1100
A tuple is returned:
1101
(file_id, (path_in_source, path_in_target),
1102
changed_content, versioned, parent, name, kind,
1105
Changed_content is True if the file's content has changed. This
1106
includes changes to its kind, and to a symlink's target.
1108
versioned, parent, name, kind, executable are tuples of (from, to).
1109
If a file is missing in a tree, its kind is None.
1111
Iteration is done in parent-to-child order, relative to the target
1114
There is no guarantee that all paths are in sorted order: the
1115
requirement to expand the search due to renames may result in children
1116
that should be found early being found late in the search, after
1117
lexically later results have been returned.
1118
:param require_versioned: Raise errors.PathsNotVersionedError if a
1119
path in the specific_files list is not versioned in one of
1120
source, target or extra_trees.
1121
:param specific_files: An optional list of file paths to restrict the
1122
comparison to. When mapping filenames to ids, all matches in all
1123
trees (including optional extra_trees) are used, and all children
1124
of matched directories are included. The parents in the target tree
1125
of the specific files up to and including the root of the tree are
1126
always evaluated for changes too.
1127
:param want_unversioned: Should unversioned files be returned in the
1128
output. An unversioned file is defined as one with (False, False)
1129
for the versioned pair.
1131
lookup_trees = [self.source]
1133
lookup_trees.extend(extra_trees)
1134
# The ids of items we need to examine to insure delta consistency.
1135
precise_file_ids = set()
1136
changed_file_ids = []
1137
if specific_files == []:
1138
specific_file_ids = []
1140
specific_file_ids = self.target.paths2ids(specific_files,
1141
lookup_trees, require_versioned=require_versioned)
1142
if specific_files is not None:
1143
# reparented or added entries must have their parents included
1144
# so that valid deltas can be created. The seen_parents set
1145
# tracks the parents that we need to have.
1146
# The seen_dirs set tracks directory entries we've yielded.
1147
# After outputting version object in to_entries we set difference
1148
# the two seen sets and start checking parents.
1149
seen_parents = set()
1151
if want_unversioned:
1152
all_unversioned = sorted([(p.split('/'), p) for p in
1153
self.target.extras()
1154
if specific_files is None or
1155
osutils.is_inside_any(specific_files, p)])
1156
all_unversioned = collections.deque(all_unversioned)
1158
all_unversioned = collections.deque()
1160
from_entries_by_dir = list(self.source.iter_entries_by_dir(
1161
specific_file_ids=specific_file_ids))
1162
from_data = dict((e.file_id, (p, e)) for p, e in from_entries_by_dir)
1163
to_entries_by_dir = list(self.target.iter_entries_by_dir(
1164
specific_file_ids=specific_file_ids))
1165
num_entries = len(from_entries_by_dir) + len(to_entries_by_dir)
1167
# the unversioned path lookup only occurs on real trees - where there
1168
# can be extras. So the fake_entry is solely used to look up
1169
# executable it values when execute is not supported.
1170
fake_entry = inventory.InventoryFile('unused', 'unused', 'unused')
1171
for target_path, target_entry in to_entries_by_dir:
1172
while (all_unversioned and
1173
all_unversioned[0][0] < target_path.split('/')):
1174
unversioned_path = all_unversioned.popleft()
1175
target_kind, target_executable, target_stat = \
1176
self.target._comparison_data(fake_entry, unversioned_path[1])
1177
yield (None, (None, unversioned_path[1]), True, (False, False),
1179
(None, unversioned_path[0][-1]),
1180
(None, target_kind),
1181
(None, target_executable))
1182
source_path, source_entry = from_data.get(target_entry.file_id,
1184
result, changes = self._changes_from_entries(source_entry,
1185
target_entry, source_path=source_path, target_path=target_path)
1186
to_paths[result[0]] = result[1][1]
1191
pb.update('comparing files', entry_count, num_entries)
1192
if changes or include_unchanged:
1193
if specific_file_ids is not None:
1194
new_parent_id = result[4][1]
1195
precise_file_ids.add(new_parent_id)
1196
changed_file_ids.append(result[0])
1198
# Ensure correct behaviour for reparented/added specific files.
1199
if specific_files is not None:
1200
# Record output dirs
1201
if result[6][1] == 'directory':
1202
seen_dirs.add(result[0])
1203
# Record parents of reparented/added entries.
1204
versioned = result[3]
1206
if not versioned[0] or parents[0] != parents[1]:
1207
seen_parents.add(parents[1])
1208
while all_unversioned:
1209
# yield any trailing unversioned paths
1210
unversioned_path = all_unversioned.popleft()
1211
to_kind, to_executable, to_stat = \
1212
self.target._comparison_data(fake_entry, unversioned_path[1])
1213
yield (None, (None, unversioned_path[1]), True, (False, False),
1215
(None, unversioned_path[0][-1]),
1217
(None, to_executable))
1218
# Yield all remaining source paths
1219
for path, from_entry in from_entries_by_dir:
1220
file_id = from_entry.file_id
1221
if file_id in to_paths:
1224
if not self.target.has_id(file_id):
1225
# common case - paths we have not emitted are not present in
1229
to_path = self.target.id2path(file_id)
1232
pb.update('comparing files', entry_count, num_entries)
1233
versioned = (True, False)
1234
parent = (from_entry.parent_id, None)
1235
name = (from_entry.name, None)
1236
from_kind, from_executable, stat_value = \
1237
self.source._comparison_data(from_entry, path)
1238
kind = (from_kind, None)
1239
executable = (from_executable, None)
1240
changed_content = from_kind is not None
1241
# the parent's path is necessarily known at this point.
1242
changed_file_ids.append(file_id)
1243
yield(file_id, (path, to_path), changed_content, versioned, parent,
1244
name, kind, executable)
1245
changed_file_ids = set(changed_file_ids)
1246
if specific_file_ids is not None:
1247
for result in self._handle_precise_ids(precise_file_ids,
1251
def _get_entry(self, tree, file_id):
1252
"""Get an inventory entry from a tree, with missing entries as None.
1254
If the tree raises NotImplementedError on accessing .inventory, then
1255
this is worked around using iter_entries_by_dir on just the file id
1258
:param tree: The tree to lookup the entry in.
1259
:param file_id: The file_id to lookup.
1262
inventory = tree.root_inventory
1263
except NotImplementedError:
1264
# No inventory available.
1266
iterator = tree.iter_entries_by_dir(specific_file_ids=[file_id])
1267
return iterator.next()[1]
1268
except StopIteration:
1272
return inventory[file_id]
1273
except errors.NoSuchId:
1276
def _handle_precise_ids(self, precise_file_ids, changed_file_ids,
1277
discarded_changes=None):
1278
"""Fill out a partial iter_changes to be consistent.
1280
:param precise_file_ids: The file ids of parents that were seen during
1282
:param changed_file_ids: The file ids of already emitted items.
1283
:param discarded_changes: An optional dict of precalculated
1284
iter_changes items which the partial iter_changes had not output
1286
:return: A generator of iter_changes items to output.
1288
# process parents of things that had changed under the users
1289
# requested paths to prevent incorrect paths or parent ids which
1290
# aren't in the tree.
1291
while precise_file_ids:
1292
precise_file_ids.discard(None)
1293
# Don't emit file_ids twice
1294
precise_file_ids.difference_update(changed_file_ids)
1295
if not precise_file_ids:
1297
# If the there was something at a given output path in source, we
1298
# have to include the entry from source in the delta, or we would
1299
# be putting this entry into a used path.
1301
for parent_id in precise_file_ids:
1303
paths.append(self.target.id2path(parent_id))
1304
except errors.NoSuchId:
1305
# This id has been dragged in from the source by delta
1306
# expansion and isn't present in target at all: we don't
1307
# need to check for path collisions on it.
1310
old_id = self.source.path2id(path)
1311
precise_file_ids.add(old_id)
1312
precise_file_ids.discard(None)
1313
current_ids = precise_file_ids
1314
precise_file_ids = set()
1315
# We have to emit all of precise_file_ids that have been altered.
1316
# We may have to output the children of some of those ids if any
1317
# directories have stopped being directories.
1318
for file_id in current_ids:
1320
if discarded_changes:
1321
result = discarded_changes.get(file_id)
1326
old_entry = self._get_entry(self.source, file_id)
1327
new_entry = self._get_entry(self.target, file_id)
1328
result, changes = self._changes_from_entries(
1329
old_entry, new_entry)
1332
# Get this parents parent to examine.
1333
new_parent_id = result[4][1]
1334
precise_file_ids.add(new_parent_id)
1336
if (result[6][0] == 'directory' and
1337
result[6][1] != 'directory'):
1338
# This stopped being a directory, the old children have
1340
if old_entry is None:
1341
# Reusing a discarded change.
1342
old_entry = self._get_entry(self.source, file_id)
1343
precise_file_ids.update(
1344
self.source.iter_children(file_id))
1345
changed_file_ids.add(result[0])
1349
def file_content_matches(self, source_file_id, target_file_id,
1350
source_path=None, target_path=None, source_stat=None, target_stat=None):
1351
"""Check if two files are the same in the source and target trees.
1353
This only checks that the contents of the files are the same,
1354
it does not touch anything else.
1356
:param source_file_id: File id of the file in the source tree
1357
:param target_file_id: File id of the file in the target tree
1358
:param source_path: Path of the file in the source tree
1359
:param target_path: Path of the file in the target tree
1360
:param source_stat: Optional stat value of the file in the source tree
1361
:param target_stat: Optional stat value of the file in the target tree
1362
:return: Boolean indicating whether the files have the same contents
1364
source_verifier_kind, source_verifier_data = self.source.get_file_verifier(
1365
source_file_id, source_path, source_stat)
1366
target_verifier_kind, target_verifier_data = self.target.get_file_verifier(
1367
target_file_id, target_path, target_stat)
1368
if source_verifier_kind == target_verifier_kind:
1369
return (source_verifier_data == target_verifier_data)
1370
# Fall back to SHA1 for now
1371
if source_verifier_kind != "SHA1":
1372
source_sha1 = self.source.get_file_sha1(source_file_id,
1373
source_path, source_stat)
1375
source_sha1 = source_verifier_data
1376
if target_verifier_kind != "SHA1":
1377
target_sha1 = self.target.get_file_sha1(target_file_id,
1378
target_path, target_stat)
1380
target_sha1 = target_verifier_data
1381
return (source_sha1 == target_sha1)
1383
InterTree.register_optimiser(InterTree)
1386
class MultiWalker(object):
1387
"""Walk multiple trees simultaneously, getting combined results."""
1389
# Note: This could be written to not assume you can do out-of-order
1390
# lookups. Instead any nodes that don't match in all trees could be
1391
# marked as 'deferred', and then returned in the final cleanup loop.
1392
# For now, I think it is "nicer" to return things as close to the
1393
# "master_tree" order as we can.
1395
def __init__(self, master_tree, other_trees):
1396
"""Create a new MultiWalker.
1398
All trees being walked must implement "iter_entries_by_dir()", such
1399
that they yield (path, object) tuples, where that object will have a
1400
'.file_id' member, that can be used to check equality.
1402
:param master_tree: All trees will be 'slaved' to the master_tree such
1403
that nodes in master_tree will be used as 'first-pass' sync points.
1404
Any nodes that aren't in master_tree will be merged in a second
1406
:param other_trees: A list of other trees to walk simultaneously.
1408
self._master_tree = master_tree
1409
self._other_trees = other_trees
1411
# Keep track of any nodes that were properly processed just out of
1412
# order, that way we don't return them at the end, we don't have to
1413
# track *all* processed file_ids, just the out-of-order ones
1414
self._out_of_order_processed = set()
1417
def _step_one(iterator):
1418
"""Step an iter_entries_by_dir iterator.
1420
:return: (has_more, path, ie)
1421
If has_more is False, path and ie will be None.
1424
path, ie = next(iterator)
1425
except StopIteration:
1426
return False, None, None
1428
return True, path, ie
1431
def _cmp_path_by_dirblock(path1, path2):
1432
"""Compare two paths based on what directory they are in.
1434
This generates a sort order, such that all children of a directory are
1435
sorted together, and grandchildren are in the same order as the
1436
children appear. But all grandchildren come after all children.
1438
:param path1: first path
1439
:param path2: the second path
1440
:return: negative number if ``path1`` comes first,
1441
0 if paths are equal
1442
and a positive number if ``path2`` sorts first
1444
# Shortcut this special case
1447
# This is stolen from _dirstate_helpers_py.py, only switching it to
1448
# Unicode objects. Consider using encode_utf8() and then using the
1449
# optimized versions, or maybe writing optimized unicode versions.
1450
if not isinstance(path1, unicode):
1451
raise TypeError("'path1' must be a unicode string, not %s: %r"
1452
% (type(path1), path1))
1453
if not isinstance(path2, unicode):
1454
raise TypeError("'path2' must be a unicode string, not %s: %r"
1455
% (type(path2), path2))
1456
return cmp(MultiWalker._path_to_key(path1),
1457
MultiWalker._path_to_key(path2))
1460
def _path_to_key(path):
1461
dirname, basename = osutils.split(path)
1462
return (dirname.split(u'/'), basename)
1464
def _lookup_by_file_id(self, extra_entries, other_tree, file_id):
1465
"""Lookup an inventory entry by file_id.
1467
This is called when an entry is missing in the normal order.
1468
Generally this is because a file was either renamed, or it was
1469
deleted/added. If the entry was found in the inventory and not in
1470
extra_entries, it will be added to self._out_of_order_processed
1472
:param extra_entries: A dictionary of {file_id: (path, ie)}. This
1473
should be filled with entries that were found before they were
1474
used. If file_id is present, it will be removed from the
1476
:param other_tree: The Tree to search, in case we didn't find the entry
1478
:param file_id: The file_id to look for
1479
:return: (path, ie) if found or (None, None) if not present.
1481
if file_id in extra_entries:
1482
return extra_entries.pop(file_id)
1483
# TODO: Is id2path better as the first call, or is
1484
# inventory[file_id] better as a first check?
1486
cur_path = other_tree.id2path(file_id)
1487
except errors.NoSuchId:
1489
if cur_path is None:
1492
self._out_of_order_processed.add(file_id)
1493
cur_ie = other_tree.root_inventory[file_id]
1494
return (cur_path, cur_ie)
1497
"""Match up the values in the different trees."""
1498
for result in self._walk_master_tree():
1500
self._finish_others()
1501
for result in self._walk_others():
1504
def _walk_master_tree(self):
1505
"""First pass, walk all trees in lock-step.
1507
When we are done, all nodes in the master_tree will have been
1508
processed. _other_walkers, _other_entries, and _others_extra will be
1509
set on 'self' for future processing.
1511
# This iterator has the most "inlining" done, because it tends to touch
1512
# every file in the tree, while the others only hit nodes that don't
1514
master_iterator = self._master_tree.iter_entries_by_dir()
1516
other_walkers = [other.iter_entries_by_dir()
1517
for other in self._other_trees]
1518
other_entries = [self._step_one(walker) for walker in other_walkers]
1519
# Track extra nodes in the other trees
1520
others_extra = [{} for _ in range(len(self._other_trees))]
1522
master_has_more = True
1523
step_one = self._step_one
1524
lookup_by_file_id = self._lookup_by_file_id
1525
out_of_order_processed = self._out_of_order_processed
1527
while master_has_more:
1528
(master_has_more, path, master_ie) = step_one(master_iterator)
1529
if not master_has_more:
1532
file_id = master_ie.file_id
1534
other_values_append = other_values.append
1535
next_other_entries = []
1536
next_other_entries_append = next_other_entries.append
1537
for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):
1538
if not other_has_more:
1539
other_values_append(lookup_by_file_id(
1540
others_extra[idx], self._other_trees[idx], file_id))
1541
next_other_entries_append((False, None, None))
1542
elif file_id == other_ie.file_id:
1543
# This is the critical code path, as most of the entries
1544
# should match between most trees.
1545
other_values_append((other_path, other_ie))
1546
next_other_entries_append(step_one(other_walkers[idx]))
1548
# This walker did not match, step it until it either
1549
# matches, or we know we are past the current walker.
1550
other_walker = other_walkers[idx]
1551
other_extra = others_extra[idx]
1552
while (other_has_more and
1553
self._cmp_path_by_dirblock(other_path, path) < 0):
1554
other_file_id = other_ie.file_id
1555
if other_file_id not in out_of_order_processed:
1556
other_extra[other_file_id] = (other_path, other_ie)
1557
other_has_more, other_path, other_ie = \
1558
step_one(other_walker)
1559
if other_has_more and other_ie.file_id == file_id:
1560
# We ended up walking to this point, match and step
1562
other_values_append((other_path, other_ie))
1563
other_has_more, other_path, other_ie = \
1564
step_one(other_walker)
1566
# This record isn't in the normal order, see if it
1568
other_values_append(lookup_by_file_id(
1569
other_extra, self._other_trees[idx], file_id))
1570
next_other_entries_append((other_has_more, other_path,
1572
other_entries = next_other_entries
1574
# We've matched all the walkers, yield this datapoint
1575
yield path, file_id, master_ie, other_values
1576
self._other_walkers = other_walkers
1577
self._other_entries = other_entries
1578
self._others_extra = others_extra
1580
def _finish_others(self):
1581
"""Finish walking the other iterators, so we get all entries."""
1582
for idx, info in enumerate(self._other_entries):
1583
other_extra = self._others_extra[idx]
1584
(other_has_more, other_path, other_ie) = info
1585
while other_has_more:
1586
other_file_id = other_ie.file_id
1587
if other_file_id not in self._out_of_order_processed:
1588
other_extra[other_file_id] = (other_path, other_ie)
1589
other_has_more, other_path, other_ie = \
1590
self._step_one(self._other_walkers[idx])
1591
del self._other_entries
1593
def _walk_others(self):
1594
"""Finish up by walking all the 'deferred' nodes."""
1595
# TODO: One alternative would be to grab all possible unprocessed
1596
# file_ids, and then sort by path, and then yield them. That
1597
# might ensure better ordering, in case a caller strictly
1598
# requires parents before children.
1599
for idx, other_extra in enumerate(self._others_extra):
1600
others = sorted(viewvalues(other_extra),
1601
key=lambda x: self._path_to_key(x[0]))
1602
for other_path, other_ie in others:
1603
file_id = other_ie.file_id
1604
# We don't need to check out_of_order_processed here, because
1605
# the lookup_by_file_id will be removing anything processed
1606
# from the extras cache
1607
other_extra.pop(file_id)
1608
other_values = [(None, None)] * idx
1609
other_values.append((other_path, other_ie))
1610
for alt_idx, alt_extra in enumerate(self._others_extra[idx+1:]):
1611
alt_idx = alt_idx + idx + 1
1612
alt_extra = self._others_extra[alt_idx]
1613
alt_tree = self._other_trees[alt_idx]
1614
other_values.append(self._lookup_by_file_id(
1615
alt_extra, alt_tree, file_id))
1616
yield other_path, file_id, None, other_values