1
# Copyright (C) 2005 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Tree classes, representing directory at point in time.
21
from cStringIO import StringIO
29
from bzrlib.decorators import needs_read_lock
30
from bzrlib.errors import BzrError, BzrCheckError
31
from bzrlib import errors
32
from bzrlib.inventory import Inventory
33
from bzrlib.inter import InterObject
34
from bzrlib.osutils import fingerprint_file
35
import bzrlib.revision
36
from bzrlib.trace import mutter, note
40
"""Abstract file tree.
42
There are several subclasses:
44
* `WorkingTree` exists as files on disk editable by the user.
46
* `RevisionTree` is a tree as recorded at some point in the past.
48
Trees contain an `Inventory` object, and also know how to retrieve
49
file texts mentioned in the inventory, either from a working
50
directory or from a store.
52
It is possible for trees to contain files that are not described
53
in their inventory or vice versa; for this use `filenames()`.
55
Trees can be compared, etc, regardless of whether they are working
56
trees or versioned trees.
59
def changes_from(self, other, want_unchanged=False, specific_files=None,
60
extra_trees=None, require_versioned=False, include_root=False):
61
"""Return a TreeDelta of the changes from other to this tree.
63
:param other: A tree to compare with.
64
:param specific_files: An optional list of file paths to restrict the
65
comparison to. When mapping filenames to ids, all matches in all
66
trees (including optional extra_trees) are used, and all children of
67
matched directories are included.
68
:param want_unchanged: An optional boolean requesting the inclusion of
69
unchanged entries in the result.
70
:param extra_trees: An optional list of additional trees to use when
71
mapping the contents of specific_files (paths) to file_ids.
72
:param require_versioned: An optional boolean (defaults to False). When
73
supplied and True all the 'specific_files' must be versioned, or
74
a PathsNotVersionedError will be thrown.
76
The comparison will be performed by an InterTree object looked up on
79
# Martin observes that Tree.changes_from returns a TreeDelta and this
80
# may confuse people, because the class name of the returned object is
81
# a synonym of the object referenced in the method name.
82
return InterTree.get(other, self).compare(
83
want_unchanged=want_unchanged,
84
specific_files=specific_files,
85
extra_trees=extra_trees,
86
require_versioned=require_versioned,
87
include_root=include_root
90
def _iter_changes(self, from_tree, include_unchanged=False,
91
specific_file_ids=None, pb=None):
92
intertree = InterTree.get(from_tree, self)
93
return intertree._iter_changes(include_unchanged,
94
specific_file_ids, pb)
97
"""Get a list of the conflicts in the tree.
99
Each conflict is an instance of bzrlib.conflicts.Conflict.
103
def get_parent_ids(self):
104
"""Get the parent ids for this tree.
106
:return: a list of parent ids. [] is returned to indicate
107
a tree with no parents.
108
:raises: BzrError if the parents are not known.
110
raise NotImplementedError(self.get_parent_ids)
112
def has_filename(self, filename):
113
"""True if the tree has given filename."""
114
raise NotImplementedError()
116
def has_id(self, file_id):
117
file_id = osutils.safe_file_id(file_id)
118
return self.inventory.has_id(file_id)
120
__contains__ = has_id
122
def has_or_had_id(self, file_id):
123
file_id = osutils.safe_file_id(file_id)
124
if file_id == self.inventory.root.file_id:
126
return self.inventory.has_id(file_id)
129
return iter(self.inventory)
131
def id2path(self, file_id):
132
file_id = osutils.safe_file_id(file_id)
133
return self.inventory.id2path(file_id)
135
def is_control_filename(self, filename):
136
"""True if filename is the name of a control file in this tree.
138
:param filename: A filename within the tree. This is a relative path
139
from the root of this tree.
141
This is true IF and ONLY IF the filename is part of the meta data
142
that bzr controls in this tree. I.E. a random .bzr directory placed
143
on disk will not be a control file for this tree.
145
return self.bzrdir.is_control_filename(filename)
148
def iter_entries_by_dir(self, specific_file_ids=None):
149
"""Walk the tree in 'by_dir' order.
151
This will yield each entry in the tree as a (path, entry) tuple. The
152
order that they are yielded is: the contents of a directory are
153
preceeded by the parent of a directory, and all the contents of a
154
directory are grouped together.
156
return self.inventory.iter_entries_by_dir(
157
specific_file_ids=specific_file_ids)
159
def iter_reference_entries(self):
160
for path, entry in self.iter_entries_by_dir():
161
if entry.kind == 'tree-reference':
164
def kind(self, file_id):
165
raise NotImplementedError("subclasses must implement kind")
167
def get_reference_revision(self, entry, path=None):
168
raise NotImplementedError("subclasses must implement "
169
"get_reference_revision")
171
def _comparison_data(self, entry, path):
172
"""Return a tuple of kind, executable, stat_value for a file.
174
entry may be None if there is no inventory entry for the file, but
175
path must always be supplied.
177
kind is None if there is no file present (even if an inventory id is
178
present). executable is False for non-file entries.
180
raise NotImplementedError(self._comparison_data)
182
def _file_size(self, entry, stat_value):
183
raise NotImplementedError(self._file_size)
185
def _get_inventory(self):
186
return self._inventory
188
def get_file(self, file_id):
189
"""Return a file object for the file file_id in the tree."""
190
raise NotImplementedError(self.get_file)
192
def get_file_by_path(self, path):
193
return self.get_file(self._inventory.path2id(path))
195
def annotate_iter(self, file_id):
196
"""Return an iterator of revision_id, line tuples
198
For working trees (and mutable trees in general), the special
199
revision_id 'current:' will be used for lines that are new in this
200
tree, e.g. uncommitted changes.
201
:param file_id: The file to produce an annotated version from
203
raise NotImplementedError(self.annotate_iter)
205
inventory = property(_get_inventory,
206
doc="Inventory of this Tree")
208
def _check_retrieved(self, ie, f):
211
fp = fingerprint_file(f)
214
if ie.text_size is not None:
215
if ie.text_size != fp['size']:
216
raise BzrError("mismatched size for file %r in %r" % (ie.file_id, self._store),
217
["inventory expects %d bytes" % ie.text_size,
218
"file is actually %d bytes" % fp['size'],
219
"store is probably damaged/corrupt"])
221
if ie.text_sha1 != fp['sha1']:
222
raise BzrError("wrong SHA-1 for file %r in %r" % (ie.file_id, self._store),
223
["inventory expects %s" % ie.text_sha1,
224
"file is actually %s" % fp['sha1'],
225
"store is probably damaged/corrupt"])
227
def path2id(self, path):
228
"""Return the id for path in this tree."""
229
return self._inventory.path2id(path)
231
def paths2ids(self, paths, trees=[], require_versioned=True):
232
"""Return all the ids that can be reached by walking from paths.
234
Each path is looked up in each this tree and any extras provided in
235
trees, and this is repeated recursively: the children in an extra tree
236
of a directory that has been renamed under a provided path in this tree
237
are all returned, even if none exist until a provided path in this
238
tree, and vice versa.
240
:param paths: An iterable of paths to start converting to ids from.
241
Alternatively, if paths is None, no ids should be calculated and None
242
will be returned. This is offered to make calling the api unconditional
243
for code that *might* take a list of files.
244
:param trees: Additional trees to consider.
245
:param require_versioned: If False, do not raise NotVersionedError if
246
an element of paths is not versioned in this tree and all of trees.
248
return find_ids_across_trees(paths, [self] + list(trees), require_versioned)
250
def print_file(self, file_id):
251
"""Print file with id `file_id` to stdout."""
252
file_id = osutils.safe_file_id(file_id)
254
sys.stdout.write(self.get_file_text(file_id))
259
def revision_tree(self, revision_id):
260
"""Obtain a revision tree for the revision revision_id.
262
The intention of this method is to allow access to possibly cached
263
tree data. Implementors of this method should raise NoSuchRevision if
264
the tree is not locally available, even if they could obtain the
265
tree via a repository or some other means. Callers are responsible
266
for finding the ultimate source for a revision tree.
268
:param revision_id: The revision_id of the requested tree.
270
:raises: NoSuchRevision if the tree cannot be obtained.
272
raise errors.NoSuchRevisionInTree(self, revision_id)
275
"""What files are present in this tree and unknown.
277
:return: an iterator over the unknown files.
284
def filter_unversioned_files(self, paths):
285
"""Filter out paths that are not versioned.
287
:return: set of paths.
289
# NB: we specifically *don't* call self.has_filename, because for
290
# WorkingTrees that can indicate files that exist on disk but that
292
pred = self.inventory.has_filename
293
return set((p for p in paths if not pred(p)))
295
def walkdirs(self, prefix=""):
296
"""Walk the contents of this tree from path down.
298
This yields all the data about the contents of a directory at a time.
299
After each directory has been yielded, if the caller has mutated the
300
list to exclude some directories, they are then not descended into.
302
The data yielded is of the form:
303
((directory-relpath, directory-path-from-root, directory-fileid),
304
[(relpath, basename, kind, lstat, path_from_tree_root, file_id,
305
versioned_kind), ...]),
306
- directory-relpath is the containing dirs relpath from prefix
307
- directory-path-from-root is the containing dirs path from /
308
- directory-fileid is the id of the directory if it is versioned.
309
- relpath is the relative path within the subtree being walked.
310
- basename is the basename
311
- kind is the kind of the file now. If unknonwn then the file is not
312
present within the tree - but it may be recorded as versioned. See
314
- lstat is the stat data *if* the file was statted.
315
- path_from_tree_root is the path from the root of the tree.
316
- file_id is the file_id is the entry is versioned.
317
- versioned_kind is the kind of the file as last recorded in the
318
versioning system. If 'unknown' the file is not versioned.
319
One of 'kind' and 'versioned_kind' must not be 'unknown'.
321
:param prefix: Start walking from prefix within the tree rather than
322
at the root. This allows one to walk a subtree but get paths that are
323
relative to a tree rooted higher up.
324
:return: an iterator over the directory data.
326
raise NotImplementedError(self.walkdirs)
329
class EmptyTree(Tree):
332
self._inventory = Inventory(root_id=None)
333
symbol_versioning.warn('EmptyTree is deprecated as of bzr 0.9 please'
334
' use repository.revision_tree instead.',
335
DeprecationWarning, stacklevel=2)
337
def get_parent_ids(self):
340
def get_symlink_target(self, file_id):
343
def has_filename(self, filename):
346
def kind(self, file_id):
347
file_id = osutils.safe_file_id(file_id)
348
assert self._inventory[file_id].kind == "directory"
351
def list_files(self, include_root=False):
354
def __contains__(self, file_id):
355
file_id = osutils.safe_file_id(file_id)
356
return (file_id in self._inventory)
358
def get_file_sha1(self, file_id, path=None, stat_value=None):
362
######################################################################
365
# TODO: Merge these two functions into a single one that can operate
366
# on either a whole tree or a set of files.
368
# TODO: Return the diff in order by filename, not by category or in
369
# random order. Can probably be done by lock-stepping through the
370
# filenames from both trees.
373
def file_status(filename, old_tree, new_tree):
374
"""Return single-letter status, old and new names for a file.
376
The complexity here is in deciding how to represent renames;
377
many complex cases are possible.
379
old_inv = old_tree.inventory
380
new_inv = new_tree.inventory
381
new_id = new_inv.path2id(filename)
382
old_id = old_inv.path2id(filename)
384
if not new_id and not old_id:
385
# easy: doesn't exist in either; not versioned at all
386
if new_tree.is_ignored(filename):
387
return 'I', None, None
389
return '?', None, None
391
# There is now a file of this name, great.
394
# There is no longer a file of this name, but we can describe
395
# what happened to the file that used to have
396
# this name. There are two possibilities: either it was
397
# deleted entirely, or renamed.
399
if new_inv.has_id(old_id):
400
return 'X', old_inv.id2path(old_id), new_inv.id2path(old_id)
402
return 'D', old_inv.id2path(old_id), None
404
# if the file_id is new in this revision, it is added
405
if new_id and not old_inv.has_id(new_id):
408
# if there used to be a file of this name, but that ID has now
409
# disappeared, it is deleted
410
if old_id and not new_inv.has_id(old_id):
417
def find_renames(old_inv, new_inv):
418
for file_id in old_inv:
419
if file_id not in new_inv:
421
old_name = old_inv.id2path(file_id)
422
new_name = new_inv.id2path(file_id)
423
if old_name != new_name:
424
yield (old_name, new_name)
427
def find_ids_across_trees(filenames, trees, require_versioned=True):
428
"""Find the ids corresponding to specified filenames.
430
All matches in all trees will be used, and all children of matched
431
directories will be used.
433
:param filenames: The filenames to find file_ids for (if None, returns
435
:param trees: The trees to find file_ids within
436
:param require_versioned: if true, all specified filenames must occur in
438
:return: a set of file ids for the specified filenames and their children.
442
specified_path_ids = _find_ids_across_trees(filenames, trees,
444
return _find_children_across_trees(specified_path_ids, trees)
445
# specified_ids = [id for path, id in _find_path_ids_across_trees(filenames, trees, require_versioned)]
446
# return _find_children_across_trees(specified_ids, trees)
448
def find_path_ids_across_trees(filenames, trees, require_versioned=True):
449
"""Find the paths and ids corresponding to specified filenames.
451
All matches in all trees will be used, and all children of matched
452
directories will be included
454
:param filenames: The filenames to find file_ids for
455
:param trees: The trees to find file_ids within
456
:param require_versioned: if true, all specified filenames must occur in
458
:return: a set of (path, file ids) for the specified filenames and their
459
children. The returned path is the path of the id in the first tree
460
that contains it. This matters when files have been moved
464
# This function needs to know the ids for filenames in all trees, then
465
# search for those same files and children in all the other trees.
466
# it is complicated by the same path in two trees being able to have
467
# different ids, which might both be present in both trees.
468
# consider two trees, which have had 'mv foo bar' and 'mv baz foo' done
469
# in this case, a diff of 'foo' should should changes to both the current
470
# 'bar' and the current 'foo' which was baz. Its arguable that if
471
# the situation is 'mv parent/foo bar' and 'mv baz parent/foo', that
472
# we should return the current bar and the current parent/foo' - at the
473
# moment we do, but we loop around all ids and all trees: I*T checks.
475
# Updating this algorithm to be fast in the common case:
476
# nothing has moved, all files have the same id in parent, child and there
477
# are only two trees (or one is working tree and the others are parents).
478
# walk the dirstate. as we find each path, gather the paths of that
479
# id in all trees. add a mapping from the id to the path in those trees.
480
# now lookup children by id, again in all trees; for these trees that
481
# nothing has moved in, the id->path mapping will allow us to find the
482
# parent trivially. To answer 'has anything been moved' in one of the
483
# dirstate parent trees though, we will need to stare harder at it.
485
# Now, given a path index, that is trivial for any one tree, and given
486
# that we can ask for additional data from a dirstate tree, its a single
487
# pass, though it will require scanning the entire tree to find paths
488
# that were at the current location.
489
# ideal results?: There are three things: tree, path, id. Pathologically
490
# we can have completely disjoint ids for each tree; but we cannot have
491
# disjoin paths for each tree, except if we scan each tree for the
492
# different ids from other trees.
494
specified_path_ids = _find_ids_across_trees(filenames, trees,
496
return _find_path_id_children_across_trees(specified_path_ids, trees)
499
def _find_ids_across_trees(filenames, trees, require_versioned):
500
"""Find the ids corresponding to specified filenames.
502
All matches in all trees will be used, but subdirectories are not scanned.
504
:param filenames: The filenames to find file_ids for
505
:param trees: The trees to find file_ids within
506
:param require_versioned: if true, all specified filenames must occur in
508
:return: a set of (path, file ids) for the specified filenames
511
interesting_ids = set()
512
for tree_path in filenames:
515
file_id = tree.path2id(tree_path)
516
if file_id is not None:
517
interesting_ids.add(file_id)
520
not_versioned.append(tree_path)
521
if len(not_versioned) > 0 and require_versioned:
522
raise errors.PathsNotVersionedError(not_versioned)
523
return interesting_ids
526
def _find_children_across_trees(specified_ids, trees):
527
"""Return a set including specified ids and their children
529
All matches in all trees will be used.
531
:param trees: The trees to find file_ids within
532
:return: a set containing all specified ids and their children
534
interesting_ids = set(specified_ids)
535
pending = interesting_ids
536
# now handle children of interesting ids
537
# we loop so that we handle all children of each id in both trees
538
while len(pending) > 0:
540
for file_id in pending:
542
if not tree.has_id(file_id):
544
entry = tree.inventory[file_id]
545
for child in getattr(entry, 'children', {}).itervalues():
546
if child.file_id not in interesting_ids:
547
new_pending.add(child.file_id)
548
interesting_ids.update(new_pending)
549
pending = new_pending
550
return interesting_ids
553
class InterTree(InterObject):
554
"""This class represents operations taking place between two Trees.
556
Its instances have methods like 'compare' and contain references to the
557
source and target trees these operations are to be carried out on.
559
clients of bzrlib should not need to use InterTree directly, rather they
560
should use the convenience methods on Tree such as 'Tree.compare()' which
561
will pass through to InterTree as appropriate.
567
def compare(self, want_unchanged=False, specific_files=None,
568
extra_trees=None, require_versioned=False, include_root=False):
569
"""Return the changes from source to target.
571
:return: A TreeDelta.
572
:param specific_files: An optional list of file paths to restrict the
573
comparison to. When mapping filenames to ids, all matches in all
574
trees (including optional extra_trees) are used, and all children of
575
matched directories are included.
576
:param want_unchanged: An optional boolean requesting the inclusion of
577
unchanged entries in the result.
578
:param extra_trees: An optional list of additional trees to use when
579
mapping the contents of specific_files (paths) to file_ids.
580
:param require_versioned: An optional boolean (defaults to False). When
581
supplied and True all the 'specific_files' must be versioned, or
582
a PathsNotVersionedError will be thrown.
584
# NB: show_status depends on being able to pass in non-versioned files
585
# and report them as unknown
586
trees = (self.source,)
587
if extra_trees is not None:
588
trees = trees + tuple(extra_trees)
589
# target is usually the newer tree:
590
specific_file_ids = self.target.paths2ids(specific_files, trees,
591
require_versioned=require_versioned)
592
if specific_files and not specific_file_ids:
593
# All files are unversioned, so just return an empty delta
594
# _compare_trees would think we want a complete delta
595
return delta.TreeDelta()
596
return delta._compare_trees(self.source, self.target, want_unchanged,
597
specific_file_ids, include_root)
599
def _iter_changes(self, include_unchanged=False,
600
specific_file_ids=None, pb=None):
601
"""Generate an iterator of changes between trees.
604
(file_id, path, changed_content, versioned, parent, name, kind,
607
Path is relative to the target tree. changed_content is True if the
608
file's content has changed. This includes changes to its kind, and to
611
versioned, parent, name, kind, executable are tuples of (from, to).
612
If a file is missing in a tree, its kind is None.
614
Iteration is done in parent-to-child order, relative to the target
618
from_entries_by_dir = list(self.source.inventory.iter_entries_by_dir(
619
specific_file_ids=specific_file_ids))
620
from_data = dict((e.file_id, (p, e)) for p, e in from_entries_by_dir)
621
to_entries_by_dir = list(self.target.inventory.iter_entries_by_dir(
622
specific_file_ids=specific_file_ids))
623
num_entries = len(from_entries_by_dir) + len(to_entries_by_dir)
625
for to_path, to_entry in to_entries_by_dir:
626
file_id = to_entry.file_id
627
to_paths[file_id] = to_path
629
changed_content = False
630
from_path, from_entry = from_data.get(file_id, (None, None))
631
from_versioned = (from_entry is not None)
632
if from_entry is not None:
633
from_versioned = True
634
from_name = from_entry.name
635
from_parent = from_entry.parent_id
636
from_kind, from_executable, from_stat = \
637
self.source._comparison_data(from_entry, from_path)
640
from_versioned = False
644
from_executable = None
645
versioned = (from_versioned, True)
646
to_kind, to_executable, to_stat = \
647
self.target._comparison_data(to_entry, to_path)
648
kind = (from_kind, to_kind)
649
if kind[0] != kind[1]:
650
changed_content = True
651
elif from_kind == 'file':
652
from_size = self.source._file_size(from_entry, from_stat)
653
to_size = self.target._file_size(to_entry, to_stat)
654
if from_size != to_size:
655
changed_content = True
656
elif (self.source.get_file_sha1(file_id, from_path, from_stat) !=
657
self.target.get_file_sha1(file_id, to_path, to_stat)):
658
changed_content = True
659
elif from_kind == 'symlink':
660
if (self.source.get_symlink_target(file_id) !=
661
self.target.get_symlink_target(file_id)):
662
changed_content = True
663
elif from_kind == 'tree-reference':
664
if (self.source.get_reference_revision(from_entry, from_path)
665
!= self.target.get_reference_revision(to_entry, to_path)):
666
changed_content = True
667
parent = (from_parent, to_entry.parent_id)
668
name = (from_name, to_entry.name)
669
executable = (from_executable, to_executable)
671
pb.update('comparing files', entry_count, num_entries)
672
if (changed_content is not False or versioned[0] != versioned[1]
673
or parent[0] != parent[1] or name[0] != name[1] or
674
executable[0] != executable[1] or include_unchanged):
675
yield (file_id, to_path, changed_content, versioned, parent,
676
name, kind, executable)
678
def get_to_path(from_entry):
679
if from_entry.parent_id is None:
682
if from_entry.parent_id not in to_paths:
683
get_to_path(self.source.inventory[from_entry.parent_id])
684
to_path = osutils.pathjoin(to_paths[from_entry.parent_id],
686
to_paths[from_entry.file_id] = to_path
689
for path, from_entry in from_entries_by_dir:
690
file_id = from_entry.file_id
691
if file_id in to_paths:
693
to_path = get_to_path(from_entry)
696
pb.update('comparing files', entry_count, num_entries)
697
versioned = (True, False)
698
parent = (from_entry.parent_id, None)
699
name = (from_entry.name, None)
700
from_kind, from_executable, stat_value = \
701
self.source._comparison_data(from_entry, path)
702
kind = (from_kind, None)
703
executable = (from_executable, None)
704
changed_content = True
705
# the parent's path is necessarily known at this point.
706
yield(file_id, to_path, changed_content, versioned, parent,
707
name, kind, executable)
710
# This was deprecated before 0.12, but did not have an official warning
711
@symbol_versioning.deprecated_function(symbol_versioning.zero_twelve)
712
def RevisionTree(*args, **kwargs):
713
"""RevisionTree has moved to bzrlib.revisiontree.RevisionTree()
715
Accessing it as bzrlib.tree.RevisionTree has been deprecated as of
718
from bzrlib.revisiontree import RevisionTree as _RevisionTree
719
return _RevisionTree(*args, **kwargs)