/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_c.pyx

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2008-09-26 05:14:51 UTC
  • mfrom: (3737.1.3 trivial_python_compat)
  • Revision ID: pqm@pqm.ubuntu.com-20080926051451-dvc1qg5inn7msjvr
(jam) Some win32 tweaks for the faster iter_changes code.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007, 2008, 2010 Canonical Ltd
 
1
# Copyright (C) 2007, 2008 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
16
 
17
17
"""Helper functions for DirState.
18
18
 
24
24
import errno
25
25
import os
26
26
import stat
27
 
import sys
28
27
 
29
28
from bzrlib import cache_utf8, errors, osutils
30
29
from bzrlib.dirstate import DirState
31
 
from bzrlib.osutils import parent_directories, pathjoin, splitpath
 
30
from bzrlib.osutils import pathjoin, splitpath
32
31
 
33
32
 
34
33
# This is the Windows equivalent of ENOTDIR
54
53
cdef extern from *:
55
54
    ctypedef unsigned long size_t
56
55
 
57
 
cdef extern from "_dirstate_helpers_pyx.h":
 
56
cdef extern from "_dirstate_helpers_c.h":
58
57
    ctypedef int intptr_t
59
58
 
60
59
 
119
118
    # void *memrchr(void *s, int c, size_t len)
120
119
 
121
120
 
122
 
cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
 
121
cdef void* _my_memrchr(void *s, int c, size_t n):
123
122
    # memrchr seems to be a GNU extension, so we have to implement it ourselves
124
123
    cdef char *pos
125
124
    cdef char *start
156
155
        return None
157
156
    return <char*>found - <char*>_s
158
157
 
159
 
 
160
158
cdef object safe_string_from_size(char *s, Py_ssize_t size):
161
159
    if size < 0:
 
160
        # XXX: On 64-bit machines the <int> cast causes a C compiler warning.
162
161
        raise AssertionError(
163
 
            'tried to create a string with an invalid size: %d'
164
 
            % (size))
 
162
            'tried to create a string with an invalid size: %d @0x%x'
 
163
            % (size, <int>s))
165
164
    return PyString_FromStringAndSize(s, size)
166
165
 
167
166
 
168
 
cdef int _is_aligned(void *ptr): # cannot_raise
 
167
cdef int _is_aligned(void *ptr):
169
168
    """Is this pointer aligned to an integer size offset?
170
169
 
171
170
    :return: 1 if this pointer is aligned, 0 otherwise.
173
172
    return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
174
173
 
175
174
 
176
 
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
 
175
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
177
176
    cdef unsigned char *cur1
178
177
    cdef unsigned char *cur2
179
178
    cdef unsigned char *end1
237
236
    return 0
238
237
 
239
238
 
240
 
def cmp_by_dirs(path1, path2):
 
239
def cmp_by_dirs_c(path1, path2):
241
240
    """Compare two paths directory by directory.
242
241
 
243
242
    This is equivalent to doing::
266
265
                        PyString_Size(path2))
267
266
 
268
267
 
269
 
def _cmp_path_by_dirblock(path1, path2):
 
268
def _cmp_path_by_dirblock_c(path1, path2):
270
269
    """Compare two paths based on what directory they are in.
271
270
 
272
271
    This generates a sort order, such that all children of a directory are
288
287
    if not PyString_CheckExact(path2):
289
288
        raise TypeError("'path2' must be a plain string, not %s: %r"
290
289
                        % (type(path2), path2))
291
 
    return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
292
 
                                        PyString_Size(path1),
293
 
                                        PyString_AsString(path2),
294
 
                                        PyString_Size(path2))
295
 
 
296
 
 
297
 
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
298
 
                                      char *path2, int path2_len): # cannot_raise
 
290
    return _cmp_path_by_dirblock(PyString_AsString(path1),
 
291
                                 PyString_Size(path1),
 
292
                                 PyString_AsString(path2),
 
293
                                 PyString_Size(path2))
 
294
 
 
295
 
 
296
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
 
297
                               char *path2, int path2_len):
299
298
    """Compare two paths by what directory they are in.
300
299
 
301
 
    see ``_cmp_path_by_dirblock`` for details.
 
300
    see ``_cmp_path_by_dirblock_c`` for details.
302
301
    """
303
302
    cdef char *dirname1
304
303
    cdef int dirname1_len
368
367
    return 1
369
368
 
370
369
 
371
 
def _bisect_path_left(paths, path):
 
370
def _bisect_path_left_c(paths, path):
372
371
    """Return the index where to insert path into paths.
373
372
 
374
373
    This uses a path-wise comparison so we get::
413
412
        cur = PyList_GetItem_object_void(paths, _mid)
414
413
        cur_cstr = PyString_AS_STRING_void(cur)
415
414
        cur_size = PyString_GET_SIZE_void(cur)
416
 
        if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
417
 
                                        path_cstr, path_size) < 0:
 
415
        if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
418
416
            _lo = _mid + 1
419
417
        else:
420
418
            _hi = _mid
421
419
    return _lo
422
420
 
423
421
 
424
 
def _bisect_path_right(paths, path):
 
422
def _bisect_path_right_c(paths, path):
425
423
    """Return the index where to insert path into paths.
426
424
 
427
425
    This uses a path-wise comparison so we get::
466
464
        cur = PyList_GetItem_object_void(paths, _mid)
467
465
        cur_cstr = PyString_AS_STRING_void(cur)
468
466
        cur_size = PyString_GET_SIZE_void(cur)
469
 
        if _cmp_path_by_dirblock_intern(path_cstr, path_size,
470
 
                                        cur_cstr, cur_size) < 0:
 
467
        if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
471
468
            _hi = _mid
472
469
        else:
473
470
            _lo = _mid + 1
474
471
    return _lo
475
472
 
476
473
 
477
 
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
 
474
def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):
478
475
    """Return the index where to insert dirname into the dirblocks.
479
476
 
480
477
    The return value idx is such that all directories blocks in dirblock[:idx]
746
743
        self.state._split_root_dirblock_into_contents()
747
744
 
748
745
 
749
 
def _read_dirblocks(state):
 
746
def _read_dirblocks_c(state):
750
747
    """Read in the dirblocks for the given DirState object.
751
748
 
752
749
    This is tightly bound to the DirState internal representation. It should be
768
765
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
769
766
 
770
767
 
771
 
cdef int minikind_from_mode(int mode): # cannot_raise
 
768
cdef int minikind_from_mode(int mode):
772
769
    # in order of frequency:
773
770
    if S_ISREG(mode):
774
771
        return c"f"
805
802
def update_entry(self, entry, abspath, stat_value):
806
803
    """Update the entry based on what is actually on disk.
807
804
 
808
 
    This function only calculates the sha if it needs to - if the entry is
809
 
    uncachable, or clearly different to the first parent's entry, no sha
810
 
    is calculated, and None is returned.
811
 
 
812
805
    :param entry: This is the dirblock entry for the file in question.
813
806
    :param abspath: The path on disk for this file.
814
807
    :param stat_value: (optional) if we already have done a stat on the
815
808
        file, re-use it.
816
 
    :return: None, or The sha1 hexdigest of the file (40 bytes) or link
817
 
        target of a symlink.
 
809
    :return: The sha1 hexdigest of the file (40 bytes) or link target of a
 
810
            symlink.
818
811
    """
819
812
    return _update_entry(self, entry, abspath, stat_value)
820
813
 
822
815
cdef _update_entry(self, entry, abspath, stat_value):
823
816
    """Update the entry based on what is actually on disk.
824
817
 
825
 
    This function only calculates the sha if it needs to - if the entry is
826
 
    uncachable, or clearly different to the first parent's entry, no sha
827
 
    is calculated, and None is returned.
828
 
 
829
818
    :param self: The dirstate object this is operating on.
830
819
    :param entry: This is the dirblock entry for the file in question.
831
820
    :param abspath: The path on disk for this file.
832
821
    :param stat_value: The stat value done on the path.
833
 
    :return: None, or The sha1 hexdigest of the file (40 bytes) or link
834
 
        target of a symlink.
 
822
    :return: The sha1 hexdigest of the file (40 bytes) or link target of a
 
823
            symlink.
835
824
    """
836
825
    # TODO - require pyrex 0.9.8, then use a pyd file to define access to the
837
826
    # _st mode of the compiled stat objects.
843
832
    packed_stat = _pack_stat(stat_value)
844
833
    details = PyList_GetItem_void_void(PyTuple_GetItem_void_void(<void *>entry, 1), 0)
845
834
    saved_minikind = PyString_AsString_obj(<PyObject *>PyTuple_GetItem_void_void(details, 0))[0]
846
 
    if minikind == c'd' and saved_minikind == c't':
847
 
        minikind = c't'
848
835
    saved_link_or_sha1 = PyTuple_GetItem_void_object(details, 1)
849
836
    saved_file_size = PyTuple_GetItem_void_object(details, 2)
850
837
    saved_executable = PyTuple_GetItem_void_object(details, 3)
872
859
    # process this entry.
873
860
    link_or_sha1 = None
874
861
    if minikind == c'f':
 
862
        link_or_sha1 = self._sha1_file(abspath)
875
863
        executable = self._is_executable(stat_value.st_mode,
876
864
                                         saved_executable)
877
865
        if self._cutoff_time is None:
878
866
            self._sha_cutoff_time()
879
867
        if (stat_value.st_mtime < self._cutoff_time
880
 
            and stat_value.st_ctime < self._cutoff_time
881
 
            and len(entry[1]) > 1
882
 
            and entry[1][1][0] != 'a'):
883
 
                # Could check for size changes for further optimised
884
 
                # avoidance of sha1's. However the most prominent case of
885
 
                # over-shaing is during initial add, which this catches.
886
 
            link_or_sha1 = self._sha1_file(abspath)
 
868
            and stat_value.st_ctime < self._cutoff_time):
887
869
            entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
888
870
                           executable, packed_stat)
889
871
        else:
915
897
    return link_or_sha1
916
898
 
917
899
 
918
 
# TODO: Do we want to worry about exceptions here?
919
 
cdef char _minikind_from_string(object string) except? -1:
 
900
cdef char _minikind_from_string(object string):
920
901
    """Convert a python string to a char."""
921
902
    return PyString_AsString(string)[0]
922
903
 
954
935
    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
955
936
 
956
937
 
957
 
cdef int _versioned_minikind(char minikind): # cannot_raise
 
938
cdef int _versioned_minikind(char minikind):
958
939
    """Return non-zero if minikind is in fltd"""
959
940
    return (minikind == c'f' or
960
941
            minikind == c'd' or
964
945
 
965
946
cdef class ProcessEntryC:
966
947
 
967
 
    cdef int doing_consistency_expansion
968
948
    cdef object old_dirname_to_file_id # dict
969
949
    cdef object new_dirname_to_file_id # dict
 
950
    cdef readonly object uninteresting
970
951
    cdef object last_source_parent
971
952
    cdef object last_target_parent
972
 
    cdef int include_unchanged
973
 
    cdef int partial
 
953
    cdef object include_unchanged
974
954
    cdef object use_filesystem_for_exec
975
955
    cdef object utf8_decode
976
956
    cdef readonly object searched_specific_files
977
 
    cdef readonly object searched_exact_paths
978
957
    cdef object search_specific_files
979
 
    # The parents up to the root of the paths we are searching.
980
 
    # After all normal paths are returned, these specific items are returned.
981
 
    cdef object search_specific_file_parents
982
958
    cdef object state
983
959
    # Current iteration variables:
984
960
    cdef object current_root
996
972
    cdef object current_block_list
997
973
    cdef object current_dir_info
998
974
    cdef object current_dir_list
999
 
    cdef object _pending_consistent_entries # list
1000
975
    cdef int path_index
1001
976
    cdef object root_dir_info
1002
977
    cdef object bisect_left
1003
978
    cdef object pathjoin
1004
 
    cdef object fstat
1005
 
    # A set of the ids we've output when doing partial output.
1006
 
    cdef object seen_ids
1007
 
    cdef object sha_file
1008
979
 
1009
980
    def __init__(self, include_unchanged, use_filesystem_for_exec,
1010
981
        search_specific_files, state, source_index, target_index,
1011
982
        want_unversioned, tree):
1012
 
        self.doing_consistency_expansion = 0
1013
983
        self.old_dirname_to_file_id = {}
1014
984
        self.new_dirname_to_file_id = {}
1015
 
        # Are we doing a partial iter_changes?
1016
 
        self.partial = set(['']).__ne__(search_specific_files)
 
985
        # Just a sentry, so that _process_entry can say that this
 
986
        # record is handled, but isn't interesting to process (unchanged)
 
987
        self.uninteresting = object()
1017
988
        # Using a list so that we can access the values and change them in
1018
989
        # nested scope. Each one is [path, file_id, entry]
1019
990
        self.last_source_parent = [None, None]
1020
991
        self.last_target_parent = [None, None]
1021
 
        if include_unchanged is None:
1022
 
            self.include_unchanged = False
1023
 
        else:
1024
 
            self.include_unchanged = int(include_unchanged)
 
992
        self.include_unchanged = include_unchanged
1025
993
        self.use_filesystem_for_exec = use_filesystem_for_exec
1026
994
        self.utf8_decode = cache_utf8._utf8_decode
1027
995
        # for all search_indexs in each path at or under each element of
1028
 
        # search_specific_files, if the detail is relocated: add the id, and
1029
 
        # add the relocated path as one to search if its not searched already.
1030
 
        # If the detail is not relocated, add the id.
 
996
        # search_specific_files, if the detail is relocated: add the id, and add the
 
997
        # relocated path as one to search if its not searched already. If the
 
998
        # detail is not relocated, add the id.
1031
999
        self.searched_specific_files = set()
1032
 
        # When we search exact paths without expanding downwards, we record
1033
 
        # that here.
1034
 
        self.searched_exact_paths = set()
1035
1000
        self.search_specific_files = search_specific_files
1036
 
        # The parents up to the root of the paths we are searching.
1037
 
        # After all normal paths are returned, these specific items are returned.
1038
 
        self.search_specific_file_parents = set()
1039
 
        # The ids we've sent out in the delta.
1040
 
        self.seen_ids = set()
1041
1001
        self.state = state
1042
1002
        self.current_root = None
1043
1003
        self.current_root_unicode = None
1059
1019
        self.current_block_pos = -1
1060
1020
        self.current_dir_info = None
1061
1021
        self.current_dir_list = None
1062
 
        self._pending_consistent_entries = []
1063
1022
        self.path_index = 0
1064
1023
        self.root_dir_info = None
1065
1024
        self.bisect_left = bisect.bisect_left
1066
1025
        self.pathjoin = osutils.pathjoin
1067
 
        self.fstat = os.fstat
1068
 
        self.sha_file = osutils.sha_file
1069
 
        if target_index != 0:
1070
 
            # A lot of code in here depends on target_index == 0
1071
 
            raise errors.BzrError('unsupported target index')
1072
1026
 
1073
1027
    cdef _process_entry(self, entry, path_info):
1074
1028
        """Compare an entry and real disk to generate delta information.
1075
1029
 
1076
1030
        :param path_info: top_relpath, basename, kind, lstat, abspath for
1077
 
            the path of entry. If None, then the path is considered absent in 
1078
 
            the target (Perhaps we should pass in a concrete entry for this ?)
 
1031
            the path of entry. If None, then the path is considered absent.
 
1032
            (Perhaps we should pass in a concrete entry for this ?)
1079
1033
            Basename is returned as a utf8 string because we expect this
1080
1034
            tuple will be ignored, and don't want to take the time to
1081
1035
            decode.
1082
 
        :return: (iter_changes_result, changed). If the entry has not been
1083
 
            handled then changed is None. Otherwise it is False if no content
1084
 
            or metadata changes have occured, and True if any content or
1085
 
            metadata change has occurred. If self.include_unchanged is True then
1086
 
            if changed is not None, iter_changes_result will always be a result
1087
 
            tuple. Otherwise, iter_changes_result is None unless changed is
1088
 
            True.
 
1036
        :return: None if the these don't match
 
1037
                 A tuple of information about the change, or
 
1038
                 the object 'uninteresting' if these match, but are
 
1039
                 basically identical.
1089
1040
        """
1090
1041
        cdef char target_minikind
1091
1042
        cdef char source_minikind
1102
1053
        target_minikind = _minikind_from_string(target_details[0])
1103
1054
        if path_info is not None and _versioned_minikind(target_minikind):
1104
1055
            if self.target_index != 0:
1105
 
                raise AssertionError("Unsupported target index %d" %
1106
 
                                     self.target_index)
 
1056
                raise AssertionError("Unsupported target index %d" % target_index)
1107
1057
            link_or_sha1 = _update_entry(self.state, entry, path_info[4], path_info[3])
1108
1058
            # The entry may have been modified by update_entry
1109
1059
            target_details = details_list[self.target_index]
1127
1077
            else:
1128
1078
                # add the source to the search path to find any children it
1129
1079
                # has.  TODO ? : only add if it is a container ?
1130
 
                if (not self.doing_consistency_expansion and 
1131
 
                    not osutils.is_inside_any(self.searched_specific_files,
1132
 
                                             source_details[1])):
 
1080
                if not osutils.is_inside_any(self.searched_specific_files,
 
1081
                                             source_details[1]):
1133
1082
                    self.search_specific_files.add(source_details[1])
1134
 
                    # expanding from a user requested path, parent expansion
1135
 
                    # for delta consistency happens later.
1136
1083
                # generate the old path; this is needed for stating later
1137
1084
                # as well.
1138
1085
                old_path = source_details[1]
1172
1119
                    if source_minikind != c'f':
1173
1120
                        content_change = 1
1174
1121
                    else:
1175
 
                        # Check the sha. We can't just rely on the size as
1176
 
                        # content filtering may mean differ sizes actually
1177
 
                        # map to the same content
1178
 
                        if link_or_sha1 is None:
1179
 
                            # Stat cache miss:
1180
 
                            statvalue, link_or_sha1 = \
1181
 
                                self.state._sha1_provider.stat_and_sha1(
1182
 
                                path_info[4])
1183
 
                            self.state._observed_sha1(entry, link_or_sha1,
1184
 
                                statvalue)
 
1122
                        # We could check the size, but we already have the
 
1123
                        # sha1 hash.
1185
1124
                        content_change = (link_or_sha1 != source_details[1])
1186
1125
                    # Target details is updated at update_entry time
1187
1126
                    if self.use_filesystem_for_exec:
1203
1142
                        content_change = 0
1204
1143
                    target_exec = False
1205
1144
                else:
1206
 
                    if path is None:
1207
 
                        path = self.pathjoin(old_dirname, old_basename)
1208
 
                    raise errors.BadFileKindError(path, path_info[2])
 
1145
                    raise Exception, "unknown kind %s" % path_info[2]
1209
1146
            if source_minikind == c'd':
1210
1147
                if path is None:
1211
1148
                    old_path = path = self.pathjoin(old_dirname, old_basename)
1213
1150
                    file_id = entry[0][2]
1214
1151
                self.old_dirname_to_file_id[old_path] = file_id
1215
1152
            # parent id is the entry for the path in the target tree
1216
 
            if old_basename and old_dirname == self.last_source_parent[0]:
1217
 
                # use a cached hit for non-root source entries.
 
1153
            if old_dirname == self.last_source_parent[0]:
1218
1154
                source_parent_id = self.last_source_parent[1]
1219
1155
            else:
1220
1156
                try:
1230
1166
                    self.last_source_parent[0] = old_dirname
1231
1167
                    self.last_source_parent[1] = source_parent_id
1232
1168
            new_dirname = entry[0][0]
1233
 
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
1234
 
                # use a cached hit for non-root target entries.
 
1169
            if new_dirname == self.last_target_parent[0]:
1235
1170
                target_parent_id = self.last_target_parent[1]
1236
1171
            else:
1237
1172
                try:
1254
1189
                    self.last_target_parent[1] = target_parent_id
1255
1190
 
1256
1191
            source_exec = source_details[3]
1257
 
            changed = (content_change
 
1192
            if (self.include_unchanged
 
1193
                or content_change
1258
1194
                or source_parent_id != target_parent_id
1259
1195
                or old_basename != entry[0][1]
1260
1196
                or source_exec != target_exec
1261
 
                )
1262
 
            if not changed and not self.include_unchanged:
1263
 
                return None, False
1264
 
            else:
 
1197
                ):
1265
1198
                if old_path is None:
1266
1199
                    path = self.pathjoin(old_dirname, old_basename)
1267
1200
                    old_path = path
1281
1214
                       (source_parent_id, target_parent_id),
1282
1215
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1283
1216
                       (source_kind, target_kind),
1284
 
                       (source_exec, target_exec)), changed
 
1217
                       (source_exec, target_exec))
 
1218
            else:
 
1219
                return self.uninteresting
1285
1220
        elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1286
1221
            # looks like a new file
1287
1222
            path = self.pathjoin(entry[0][0], entry[0][1])
1288
1223
            # parent id is the entry for the path in the target tree
1289
1224
            # TODO: these are the same for an entire directory: cache em.
1290
 
            parent_entry = self.state._get_entry(self.target_index,
1291
 
                                                 path_utf8=entry[0][0])
1292
 
            if parent_entry is None:
1293
 
                raise errors.DirstateCorrupt(self.state,
1294
 
                    "We could not find the parent entry in index %d"
1295
 
                    " for the entry: %s"
1296
 
                    % (self.target_index, entry[0]))
1297
 
            parent_id = parent_entry[0][2]
 
1225
            parent_id = self.state._get_entry(self.target_index,
 
1226
                                         path_utf8=entry[0][0])[0][2]
1298
1227
            if parent_id == entry[0][2]:
1299
1228
                parent_id = None
1300
1229
            if path_info is not None:
1314
1243
                       (None, parent_id),
1315
1244
                       (None, self.utf8_decode(entry[0][1])[0]),
1316
1245
                       (None, path_info[2]),
1317
 
                       (None, target_exec)), True
 
1246
                       (None, target_exec))
1318
1247
            else:
1319
1248
                # Its a missing file, report it as such.
1320
1249
                return (entry[0][2],
1324
1253
                       (None, parent_id),
1325
1254
                       (None, self.utf8_decode(entry[0][1])[0]),
1326
1255
                       (None, None),
1327
 
                       (None, False)), True
 
1256
                       (None, False))
1328
1257
        elif _versioned_minikind(source_minikind) and target_minikind == c'a':
1329
1258
            # unversioned, possibly, or possibly not deleted: we dont care.
1330
1259
            # if its still on disk, *and* theres no other entry at this
1342
1271
                   (parent_id, None),
1343
1272
                   (self.utf8_decode(entry[0][1])[0], None),
1344
1273
                   (_minikind_to_kind(source_minikind), None),
1345
 
                   (source_details[3], None)), True
 
1274
                   (source_details[3], None))
1346
1275
        elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1347
1276
            # a rename; could be a true rename, or a rename inherited from
1348
1277
            # a renamed parent. TODO: handle this efficiently. Its not
1349
1278
            # common case to rename dirs though, so a correct but slow
1350
1279
            # implementation will do.
1351
 
            if (not self.doing_consistency_expansion and 
1352
 
                not osutils.is_inside_any(self.searched_specific_files,
1353
 
                    target_details[1])):
 
1280
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
1354
1281
                self.search_specific_files.add(target_details[1])
1355
 
                # We don't expand the specific files parents list here as
1356
 
                # the path is absent in target and won't create a delta with
1357
 
                # missing parent.
1358
1282
        elif ((source_minikind == c'r' or source_minikind == c'a') and
1359
1283
              (target_minikind == c'r' or target_minikind == c'a')):
1360
1284
            # neither of the selected trees contain this path,
1366
1290
                "source_minikind=%r, target_minikind=%r"
1367
1291
                % (source_minikind, target_minikind))
1368
1292
            ## import pdb;pdb.set_trace()
1369
 
        return None, None
 
1293
        return None
1370
1294
 
1371
1295
    def __iter__(self):
1372
1296
        return self
1374
1298
    def iter_changes(self):
1375
1299
        return self
1376
1300
 
1377
 
    cdef int _gather_result_for_consistency(self, result) except -1:
1378
 
        """Check a result we will yield to make sure we are consistent later.
1379
 
        
1380
 
        This gathers result's parents into a set to output later.
1381
 
 
1382
 
        :param result: A result tuple.
1383
 
        """
1384
 
        if not self.partial or not result[0]:
1385
 
            return 0
1386
 
        self.seen_ids.add(result[0])
1387
 
        new_path = result[1][1]
1388
 
        if new_path:
1389
 
            # Not the root and not a delete: queue up the parents of the path.
1390
 
            self.search_specific_file_parents.update(
1391
 
                osutils.parent_directories(new_path.encode('utf8')))
1392
 
            # Add the root directory which parent_directories does not
1393
 
            # provide.
1394
 
            self.search_specific_file_parents.add('')
1395
 
        return 0
1396
 
 
1397
 
    cdef int _update_current_block(self) except -1:
 
1301
    cdef void _update_current_block(self):
1398
1302
        if (self.block_index < len(self.state._dirblocks) and
1399
1303
            osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1400
1304
            self.current_block = self.state._dirblocks[self.block_index]
1403
1307
        else:
1404
1308
            self.current_block = None
1405
1309
            self.current_block_list = None
1406
 
        return 0
1407
1310
 
1408
1311
    def __next__(self):
1409
1312
        # Simple thunk to allow tail recursion without pyrex confusion
1461
1364
        cdef char * current_dirname_c, * current_blockname_c
1462
1365
        cdef int advance_entry, advance_path
1463
1366
        cdef int path_handled
 
1367
        uninteresting = self.uninteresting
1464
1368
        searched_specific_files = self.searched_specific_files
1465
1369
        # Are we walking a root?
1466
1370
        while self.root_entries_pos < self.root_entries_len:
1467
1371
            entry = self.root_entries[self.root_entries_pos]
1468
1372
            self.root_entries_pos = self.root_entries_pos + 1
1469
 
            result, changed = self._process_entry(entry, self.root_dir_info)
1470
 
            if changed is not None:
1471
 
                if changed:
1472
 
                    self._gather_result_for_consistency(result)
1473
 
                if changed or self.include_unchanged:
1474
 
                    return result
 
1373
            result = self._process_entry(entry, self.root_dir_info)
 
1374
            if result is not None and result is not self.uninteresting:
 
1375
                return result
1475
1376
        # Have we finished the prior root, or never started one ?
1476
1377
        if self.current_root is None:
1477
1378
            # TODO: the pending list should be lexically sorted?  the
1480
1381
                self.current_root = self.search_specific_files.pop()
1481
1382
            except KeyError:
1482
1383
                raise StopIteration()
 
1384
            self.current_root_unicode = self.current_root.decode('utf8')
1483
1385
            self.searched_specific_files.add(self.current_root)
1484
1386
            # process the entries for this containing directory: the rest will be
1485
1387
            # found by their parents recursively.
1486
1388
            self.root_entries = self.state._entries_for_path(self.current_root)
1487
1389
            self.root_entries_len = len(self.root_entries)
1488
 
            self.current_root_unicode = self.current_root.decode('utf8')
1489
1390
            self.root_abspath = self.tree.abspath(self.current_root_unicode)
1490
1391
            try:
1491
1392
                root_stat = os.lstat(self.root_abspath)
1519
1420
            while self.root_entries_pos < self.root_entries_len:
1520
1421
                entry = self.root_entries[self.root_entries_pos]
1521
1422
                self.root_entries_pos = self.root_entries_pos + 1
1522
 
                result, changed = self._process_entry(entry, self.root_dir_info)
1523
 
                if changed is not None:
 
1423
                result = self._process_entry(entry, self.root_dir_info)
 
1424
                if result is not None:
1524
1425
                    path_handled = -1
1525
 
                    if changed:
1526
 
                        self._gather_result_for_consistency(result)
1527
 
                    if changed or self.include_unchanged:
 
1426
                    if result is not self.uninteresting:
1528
1427
                        return result
1529
1428
            # handle unversioned specified paths:
1530
1429
            if self.want_unversioned and not path_handled and self.root_dir_info:
1542
1441
                      )
1543
1442
            # If we reach here, the outer flow continues, which enters into the
1544
1443
            # per-root setup logic.
1545
 
        if (self.current_dir_info is None and self.current_block is None and not
1546
 
            self.doing_consistency_expansion):
 
1444
        if self.current_dir_info is None and self.current_block is None:
1547
1445
            # setup iteration of this root:
1548
1446
            self.current_dir_list = None
1549
1447
            if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
1671
1569
                        self.current_block_pos = self.current_block_pos + 1
1672
1570
                        # entry referring to file not present on disk.
1673
1571
                        # advance the entry only, after processing.
1674
 
                        result, changed = self._process_entry(current_entry, None)
1675
 
                        if changed is not None:
1676
 
                            if changed:
1677
 
                                self._gather_result_for_consistency(result)
1678
 
                            if changed or self.include_unchanged:
 
1572
                        result = self._process_entry(current_entry, None)
 
1573
                        if result is not None:
 
1574
                            if result is not self.uninteresting:
1679
1575
                                return result
1680
1576
                    self.block_index = self.block_index + 1
1681
1577
                    self._update_current_block()
1687
1583
            # More supplied paths to process
1688
1584
            self.current_root = None
1689
1585
            return self._iter_next()
1690
 
        # Start expanding more conservatively, adding paths the user may not
1691
 
        # have intended but required for consistent deltas.
1692
 
        self.doing_consistency_expansion = 1
1693
 
        if not self._pending_consistent_entries:
1694
 
            self._pending_consistent_entries = self._next_consistent_entries()
1695
 
        while self._pending_consistent_entries:
1696
 
            result, changed = self._pending_consistent_entries.pop()
1697
 
            if changed is not None:
1698
 
                return result
1699
1586
        raise StopIteration()
1700
1587
 
1701
1588
    cdef object _maybe_tree_ref(self, current_path_info):
1751
1638
                    pass
1752
1639
                elif current_path_info is None:
1753
1640
                    # no path is fine: the per entry code will handle it.
1754
 
                    result, changed = self._process_entry(current_entry,
1755
 
                        current_path_info)
 
1641
                    result = self._process_entry(current_entry, current_path_info)
 
1642
                    if result is not None:
 
1643
                        if result is self.uninteresting:
 
1644
                            result = None
1756
1645
                else:
1757
1646
                    minikind = _minikind_from_string(
1758
1647
                        current_entry[1][self.target_index][0])
1773
1662
                        else:
1774
1663
                            # entry referring to file not present on disk.
1775
1664
                            # advance the entry only, after processing.
1776
 
                            result, changed = self._process_entry(current_entry,
1777
 
                                None)
 
1665
                            result = self._process_entry(current_entry, None)
 
1666
                            if result is not None:
 
1667
                                if result is self.uninteresting:
 
1668
                                    result = None
1778
1669
                            advance_path = 0
1779
1670
                    else:
1780
1671
                        # paths are the same,and the dirstate entry is not
1781
1672
                        # absent or renamed.
1782
 
                        result, changed = self._process_entry(current_entry,
1783
 
                            current_path_info)
1784
 
                        if changed is not None:
 
1673
                        result = self._process_entry(current_entry, current_path_info)
 
1674
                        if result is not None:
1785
1675
                            path_handled = -1
1786
 
                            if not changed and not self.include_unchanged:
1787
 
                                changed = None
 
1676
                            if result is self.uninteresting:
 
1677
                                result = None
1788
1678
                # >- loop control starts here:
1789
1679
                # >- entry
1790
1680
                if advance_entry and current_entry is not None:
1806
1696
                            except UnicodeDecodeError:
1807
1697
                                raise errors.BadFilenameEncoding(
1808
1698
                                    current_path_info[0], osutils._fs_enc)
1809
 
                            if changed is not None:
 
1699
                            if result is not None:
1810
1700
                                raise AssertionError(
1811
1701
                                    "result is not None: %r" % result)
1812
1702
                            result = (None,
1817
1707
                                (None, self.utf8_decode(current_path_info[1])[0]),
1818
1708
                                (None, current_path_info[2]),
1819
1709
                                (None, new_executable))
1820
 
                            changed = True
1821
1710
                        # dont descend into this unversioned path if it is
1822
1711
                        # a dir
1823
1712
                        if current_path_info[2] in ('directory'):
1836
1725
                                current_path_info)
1837
1726
                    else:
1838
1727
                        current_path_info = None
1839
 
                if changed is not None:
 
1728
                if result is not None:
1840
1729
                    # Found a result on this pass, yield it
1841
 
                    if changed:
1842
 
                        self._gather_result_for_consistency(result)
1843
 
                    if changed or self.include_unchanged:
1844
 
                        return result
 
1730
                    return result
1845
1731
            if self.current_block is not None:
1846
1732
                self.block_index = self.block_index + 1
1847
1733
                self._update_current_block()
1853
1739
                    self.current_dir_list = self.current_dir_info[1]
1854
1740
                except StopIteration:
1855
1741
                    self.current_dir_info = None
1856
 
 
1857
 
    cdef object _next_consistent_entries(self):
1858
 
        """Grabs the next specific file parent case to consider.
1859
 
        
1860
 
        :return: A list of the results, each of which is as for _process_entry.
1861
 
        """
1862
 
        results = []
1863
 
        while self.search_specific_file_parents:
1864
 
            # Process the parent directories for the paths we were iterating.
1865
 
            # Even in extremely large trees this should be modest, so currently
1866
 
            # no attempt is made to optimise.
1867
 
            path_utf8 = self.search_specific_file_parents.pop()
1868
 
            if path_utf8 in self.searched_exact_paths:
1869
 
                # We've examined this path.
1870
 
                continue
1871
 
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
1872
 
                # We've examined this path.
1873
 
                continue
1874
 
            path_entries = self.state._entries_for_path(path_utf8)
1875
 
            # We need either one or two entries. If the path in
1876
 
            # self.target_index has moved (so the entry in source_index is in
1877
 
            # 'ar') then we need to also look for the entry for this path in
1878
 
            # self.source_index, to output the appropriate delete-or-rename.
1879
 
            selected_entries = []
1880
 
            found_item = False
1881
 
            for candidate_entry in path_entries:
1882
 
                # Find entries present in target at this path:
1883
 
                if candidate_entry[1][self.target_index][0] not in 'ar':
1884
 
                    found_item = True
1885
 
                    selected_entries.append(candidate_entry)
1886
 
                # Find entries present in source at this path:
1887
 
                elif (self.source_index is not None and
1888
 
                    candidate_entry[1][self.source_index][0] not in 'ar'):
1889
 
                    found_item = True
1890
 
                    if candidate_entry[1][self.target_index][0] == 'a':
1891
 
                        # Deleted, emit it here.
1892
 
                        selected_entries.append(candidate_entry)
1893
 
                    else:
1894
 
                        # renamed, emit it when we process the directory it
1895
 
                        # ended up at.
1896
 
                        self.search_specific_file_parents.add(
1897
 
                            candidate_entry[1][self.target_index][1])
1898
 
            if not found_item:
1899
 
                raise AssertionError(
1900
 
                    "Missing entry for specific path parent %r, %r" % (
1901
 
                    path_utf8, path_entries))
1902
 
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
1903
 
            for entry in selected_entries:
1904
 
                if entry[0][2] in self.seen_ids:
1905
 
                    continue
1906
 
                result, changed = self._process_entry(entry, path_info)
1907
 
                if changed is None:
1908
 
                    raise AssertionError(
1909
 
                        "Got entry<->path mismatch for specific path "
1910
 
                        "%r entry %r path_info %r " % (
1911
 
                        path_utf8, entry, path_info))
1912
 
                # Only include changes - we're outside the users requested
1913
 
                # expansion.
1914
 
                if changed:
1915
 
                    self._gather_result_for_consistency(result)
1916
 
                    if (result[6][0] == 'directory' and
1917
 
                        result[6][1] != 'directory'):
1918
 
                        # This stopped being a directory, the old children have
1919
 
                        # to be included.
1920
 
                        if entry[1][self.source_index][0] == 'r':
1921
 
                            # renamed, take the source path
1922
 
                            entry_path_utf8 = entry[1][self.source_index][1]
1923
 
                        else:
1924
 
                            entry_path_utf8 = path_utf8
1925
 
                        initial_key = (entry_path_utf8, '', '')
1926
 
                        block_index, _ = self.state._find_block_index_from_key(
1927
 
                            initial_key)
1928
 
                        if block_index == 0:
1929
 
                            # The children of the root are in block index 1.
1930
 
                            block_index = block_index + 1
1931
 
                        current_block = None
1932
 
                        if block_index < len(self.state._dirblocks):
1933
 
                            current_block = self.state._dirblocks[block_index]
1934
 
                            if not osutils.is_inside(
1935
 
                                entry_path_utf8, current_block[0]):
1936
 
                                # No entries for this directory at all.
1937
 
                                current_block = None
1938
 
                        if current_block is not None:
1939
 
                            for entry in current_block[1]:
1940
 
                                if entry[1][self.source_index][0] in 'ar':
1941
 
                                    # Not in the source tree, so doesn't have to be
1942
 
                                    # included.
1943
 
                                    continue
1944
 
                                # Path of the entry itself.
1945
 
                                self.search_specific_file_parents.add(
1946
 
                                    self.pathjoin(*entry[0][:2]))
1947
 
                if changed or self.include_unchanged:
1948
 
                    results.append((result, changed))
1949
 
            self.searched_exact_paths.add(path_utf8)
1950
 
        return results
1951
 
 
1952
 
    cdef object _path_info(self, utf8_path, unicode_path):
1953
 
        """Generate path_info for unicode_path.
1954
 
 
1955
 
        :return: None if unicode_path does not exist, or a path_info tuple.
1956
 
        """
1957
 
        abspath = self.tree.abspath(unicode_path)
1958
 
        try:
1959
 
            stat = os.lstat(abspath)
1960
 
        except OSError, e:
1961
 
            if e.errno == errno.ENOENT:
1962
 
                # the path does not exist.
1963
 
                return None
1964
 
            else:
1965
 
                raise
1966
 
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
1967
 
        dir_info = (utf8_path, utf8_basename,
1968
 
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
1969
 
            abspath)
1970
 
        if dir_info[2] == 'directory':
1971
 
            if self.tree._directory_is_tree_reference(
1972
 
                unicode_path):
1973
 
                self.root_dir_info = self.root_dir_info[:2] + \
1974
 
                    ('tree-reference',) + self.root_dir_info[3:]
1975
 
        return dir_info