/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_c.pyx

  • Committer: John Arbash Meinel
  • Date: 2008-10-14 21:35:27 UTC
  • mto: This revision was merged to the branch mainline in revision 3805.
  • Revision ID: john@arbash-meinel.com-20081014213527-4j9uc93aq1qmn43b
Add in a shortcut when we haven't cached much yet.

Document the current algorithm more completely, including the proper
justification for the various steps.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007, 2008, 2010 Canonical Ltd
 
1
# Copyright (C) 2007, 2008 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
16
 
17
17
"""Helper functions for DirState.
18
18
 
24
24
import errno
25
25
import os
26
26
import stat
27
 
import sys
28
27
 
29
28
from bzrlib import cache_utf8, errors, osutils
30
29
from bzrlib.dirstate import DirState
31
 
from bzrlib.osutils import parent_directories, pathjoin, splitpath
 
30
from bzrlib.osutils import pathjoin, splitpath
32
31
 
33
32
 
34
33
# This is the Windows equivalent of ENOTDIR
54
53
cdef extern from *:
55
54
    ctypedef unsigned long size_t
56
55
 
57
 
cdef extern from "_dirstate_helpers_pyx.h":
 
56
cdef extern from "_dirstate_helpers_c.h":
58
57
    ctypedef int intptr_t
59
58
 
60
59
 
119
118
    # void *memrchr(void *s, int c, size_t len)
120
119
 
121
120
 
122
 
cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
 
121
cdef void* _my_memrchr(void *s, int c, size_t n):
123
122
    # memrchr seems to be a GNU extension, so we have to implement it ourselves
124
123
    cdef char *pos
125
124
    cdef char *start
156
155
        return None
157
156
    return <char*>found - <char*>_s
158
157
 
159
 
 
160
158
cdef object safe_string_from_size(char *s, Py_ssize_t size):
161
159
    if size < 0:
 
160
        # XXX: On 64-bit machines the <int> cast causes a C compiler warning.
162
161
        raise AssertionError(
163
 
            'tried to create a string with an invalid size: %d'
164
 
            % (size))
 
162
            'tried to create a string with an invalid size: %d @0x%x'
 
163
            % (size, <int>s))
165
164
    return PyString_FromStringAndSize(s, size)
166
165
 
167
166
 
168
 
cdef int _is_aligned(void *ptr): # cannot_raise
 
167
cdef int _is_aligned(void *ptr):
169
168
    """Is this pointer aligned to an integer size offset?
170
169
 
171
170
    :return: 1 if this pointer is aligned, 0 otherwise.
173
172
    return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
174
173
 
175
174
 
176
 
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
 
175
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
177
176
    cdef unsigned char *cur1
178
177
    cdef unsigned char *cur2
179
178
    cdef unsigned char *end1
237
236
    return 0
238
237
 
239
238
 
240
 
def cmp_by_dirs(path1, path2):
 
239
def cmp_by_dirs_c(path1, path2):
241
240
    """Compare two paths directory by directory.
242
241
 
243
242
    This is equivalent to doing::
266
265
                        PyString_Size(path2))
267
266
 
268
267
 
269
 
def _cmp_path_by_dirblock(path1, path2):
 
268
def _cmp_path_by_dirblock_c(path1, path2):
270
269
    """Compare two paths based on what directory they are in.
271
270
 
272
271
    This generates a sort order, such that all children of a directory are
288
287
    if not PyString_CheckExact(path2):
289
288
        raise TypeError("'path2' must be a plain string, not %s: %r"
290
289
                        % (type(path2), path2))
291
 
    return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
292
 
                                        PyString_Size(path1),
293
 
                                        PyString_AsString(path2),
294
 
                                        PyString_Size(path2))
295
 
 
296
 
 
297
 
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
298
 
                                      char *path2, int path2_len): # cannot_raise
 
290
    return _cmp_path_by_dirblock(PyString_AsString(path1),
 
291
                                 PyString_Size(path1),
 
292
                                 PyString_AsString(path2),
 
293
                                 PyString_Size(path2))
 
294
 
 
295
 
 
296
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
 
297
                               char *path2, int path2_len):
299
298
    """Compare two paths by what directory they are in.
300
299
 
301
 
    see ``_cmp_path_by_dirblock`` for details.
 
300
    see ``_cmp_path_by_dirblock_c`` for details.
302
301
    """
303
302
    cdef char *dirname1
304
303
    cdef int dirname1_len
368
367
    return 1
369
368
 
370
369
 
371
 
def _bisect_path_left(paths, path):
 
370
def _bisect_path_left_c(paths, path):
372
371
    """Return the index where to insert path into paths.
373
372
 
374
373
    This uses a path-wise comparison so we get::
413
412
        cur = PyList_GetItem_object_void(paths, _mid)
414
413
        cur_cstr = PyString_AS_STRING_void(cur)
415
414
        cur_size = PyString_GET_SIZE_void(cur)
416
 
        if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
417
 
                                        path_cstr, path_size) < 0:
 
415
        if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
418
416
            _lo = _mid + 1
419
417
        else:
420
418
            _hi = _mid
421
419
    return _lo
422
420
 
423
421
 
424
 
def _bisect_path_right(paths, path):
 
422
def _bisect_path_right_c(paths, path):
425
423
    """Return the index where to insert path into paths.
426
424
 
427
425
    This uses a path-wise comparison so we get::
466
464
        cur = PyList_GetItem_object_void(paths, _mid)
467
465
        cur_cstr = PyString_AS_STRING_void(cur)
468
466
        cur_size = PyString_GET_SIZE_void(cur)
469
 
        if _cmp_path_by_dirblock_intern(path_cstr, path_size,
470
 
                                        cur_cstr, cur_size) < 0:
 
467
        if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
471
468
            _hi = _mid
472
469
        else:
473
470
            _lo = _mid + 1
474
471
    return _lo
475
472
 
476
473
 
477
 
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
 
474
def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):
478
475
    """Return the index where to insert dirname into the dirblocks.
479
476
 
480
477
    The return value idx is such that all directories blocks in dirblock[:idx]
746
743
        self.state._split_root_dirblock_into_contents()
747
744
 
748
745
 
749
 
def _read_dirblocks(state):
 
746
def _read_dirblocks_c(state):
750
747
    """Read in the dirblocks for the given DirState object.
751
748
 
752
749
    This is tightly bound to the DirState internal representation. It should be
768
765
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
769
766
 
770
767
 
771
 
cdef int minikind_from_mode(int mode): # cannot_raise
 
768
cdef int minikind_from_mode(int mode):
772
769
    # in order of frequency:
773
770
    if S_ISREG(mode):
774
771
        return c"f"
843
840
    packed_stat = _pack_stat(stat_value)
844
841
    details = PyList_GetItem_void_void(PyTuple_GetItem_void_void(<void *>entry, 1), 0)
845
842
    saved_minikind = PyString_AsString_obj(<PyObject *>PyTuple_GetItem_void_void(details, 0))[0]
846
 
    if minikind == c'd' and saved_minikind == c't':
847
 
        minikind = c't'
848
843
    saved_link_or_sha1 = PyTuple_GetItem_void_object(details, 1)
849
844
    saved_file_size = PyTuple_GetItem_void_object(details, 2)
850
845
    saved_executable = PyTuple_GetItem_void_object(details, 3)
915
910
    return link_or_sha1
916
911
 
917
912
 
918
 
# TODO: Do we want to worry about exceptions here?
919
 
cdef char _minikind_from_string(object string) except? -1:
 
913
cdef char _minikind_from_string(object string):
920
914
    """Convert a python string to a char."""
921
915
    return PyString_AsString(string)[0]
922
916
 
954
948
    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
955
949
 
956
950
 
957
 
cdef int _versioned_minikind(char minikind): # cannot_raise
 
951
cdef int _versioned_minikind(char minikind):
958
952
    """Return non-zero if minikind is in fltd"""
959
953
    return (minikind == c'f' or
960
954
            minikind == c'd' or
964
958
 
965
959
cdef class ProcessEntryC:
966
960
 
967
 
    cdef int doing_consistency_expansion
968
961
    cdef object old_dirname_to_file_id # dict
969
962
    cdef object new_dirname_to_file_id # dict
 
963
    cdef readonly object uninteresting
970
964
    cdef object last_source_parent
971
965
    cdef object last_target_parent
972
 
    cdef int include_unchanged
973
 
    cdef int partial
 
966
    cdef object include_unchanged
974
967
    cdef object use_filesystem_for_exec
975
968
    cdef object utf8_decode
976
969
    cdef readonly object searched_specific_files
977
 
    cdef readonly object searched_exact_paths
978
970
    cdef object search_specific_files
979
 
    # The parents up to the root of the paths we are searching.
980
 
    # After all normal paths are returned, these specific items are returned.
981
 
    cdef object search_specific_file_parents
982
971
    cdef object state
983
972
    # Current iteration variables:
984
973
    cdef object current_root
996
985
    cdef object current_block_list
997
986
    cdef object current_dir_info
998
987
    cdef object current_dir_list
999
 
    cdef object _pending_consistent_entries # list
1000
988
    cdef int path_index
1001
989
    cdef object root_dir_info
1002
990
    cdef object bisect_left
1003
991
    cdef object pathjoin
1004
992
    cdef object fstat
1005
 
    # A set of the ids we've output when doing partial output.
1006
 
    cdef object seen_ids
1007
993
    cdef object sha_file
1008
994
 
1009
995
    def __init__(self, include_unchanged, use_filesystem_for_exec,
1010
996
        search_specific_files, state, source_index, target_index,
1011
997
        want_unversioned, tree):
1012
 
        self.doing_consistency_expansion = 0
1013
998
        self.old_dirname_to_file_id = {}
1014
999
        self.new_dirname_to_file_id = {}
1015
 
        # Are we doing a partial iter_changes?
1016
 
        self.partial = set(['']).__ne__(search_specific_files)
 
1000
        # Just a sentry, so that _process_entry can say that this
 
1001
        # record is handled, but isn't interesting to process (unchanged)
 
1002
        self.uninteresting = object()
1017
1003
        # Using a list so that we can access the values and change them in
1018
1004
        # nested scope. Each one is [path, file_id, entry]
1019
1005
        self.last_source_parent = [None, None]
1020
1006
        self.last_target_parent = [None, None]
1021
 
        if include_unchanged is None:
1022
 
            self.include_unchanged = False
1023
 
        else:
1024
 
            self.include_unchanged = int(include_unchanged)
 
1007
        self.include_unchanged = include_unchanged
1025
1008
        self.use_filesystem_for_exec = use_filesystem_for_exec
1026
1009
        self.utf8_decode = cache_utf8._utf8_decode
1027
1010
        # for all search_indexs in each path at or under each element of
1028
 
        # search_specific_files, if the detail is relocated: add the id, and
1029
 
        # add the relocated path as one to search if its not searched already.
1030
 
        # If the detail is not relocated, add the id.
 
1011
        # search_specific_files, if the detail is relocated: add the id, and add the
 
1012
        # relocated path as one to search if its not searched already. If the
 
1013
        # detail is not relocated, add the id.
1031
1014
        self.searched_specific_files = set()
1032
 
        # When we search exact paths without expanding downwards, we record
1033
 
        # that here.
1034
 
        self.searched_exact_paths = set()
1035
1015
        self.search_specific_files = search_specific_files
1036
 
        # The parents up to the root of the paths we are searching.
1037
 
        # After all normal paths are returned, these specific items are returned.
1038
 
        self.search_specific_file_parents = set()
1039
 
        # The ids we've sent out in the delta.
1040
 
        self.seen_ids = set()
1041
1016
        self.state = state
1042
1017
        self.current_root = None
1043
1018
        self.current_root_unicode = None
1059
1034
        self.current_block_pos = -1
1060
1035
        self.current_dir_info = None
1061
1036
        self.current_dir_list = None
1062
 
        self._pending_consistent_entries = []
1063
1037
        self.path_index = 0
1064
1038
        self.root_dir_info = None
1065
1039
        self.bisect_left = bisect.bisect_left
1066
1040
        self.pathjoin = osutils.pathjoin
1067
1041
        self.fstat = os.fstat
1068
1042
        self.sha_file = osutils.sha_file
1069
 
        if target_index != 0:
1070
 
            # A lot of code in here depends on target_index == 0
1071
 
            raise errors.BzrError('unsupported target index')
1072
1043
 
1073
1044
    cdef _process_entry(self, entry, path_info):
1074
1045
        """Compare an entry and real disk to generate delta information.
1075
1046
 
1076
1047
        :param path_info: top_relpath, basename, kind, lstat, abspath for
1077
 
            the path of entry. If None, then the path is considered absent in 
1078
 
            the target (Perhaps we should pass in a concrete entry for this ?)
 
1048
            the path of entry. If None, then the path is considered absent.
 
1049
            (Perhaps we should pass in a concrete entry for this ?)
1079
1050
            Basename is returned as a utf8 string because we expect this
1080
1051
            tuple will be ignored, and don't want to take the time to
1081
1052
            decode.
1082
 
        :return: (iter_changes_result, changed). If the entry has not been
1083
 
            handled then changed is None. Otherwise it is False if no content
1084
 
            or metadata changes have occured, and True if any content or
1085
 
            metadata change has occurred. If self.include_unchanged is True then
1086
 
            if changed is not None, iter_changes_result will always be a result
1087
 
            tuple. Otherwise, iter_changes_result is None unless changed is
1088
 
            True.
 
1053
        :return: None if the these don't match
 
1054
                 A tuple of information about the change, or
 
1055
                 the object 'uninteresting' if these match, but are
 
1056
                 basically identical.
1089
1057
        """
1090
1058
        cdef char target_minikind
1091
1059
        cdef char source_minikind
1102
1070
        target_minikind = _minikind_from_string(target_details[0])
1103
1071
        if path_info is not None and _versioned_minikind(target_minikind):
1104
1072
            if self.target_index != 0:
1105
 
                raise AssertionError("Unsupported target index %d" %
1106
 
                                     self.target_index)
 
1073
                raise AssertionError("Unsupported target index %d" % target_index)
1107
1074
            link_or_sha1 = _update_entry(self.state, entry, path_info[4], path_info[3])
1108
1075
            # The entry may have been modified by update_entry
1109
1076
            target_details = details_list[self.target_index]
1127
1094
            else:
1128
1095
                # add the source to the search path to find any children it
1129
1096
                # has.  TODO ? : only add if it is a container ?
1130
 
                if (not self.doing_consistency_expansion and 
1131
 
                    not osutils.is_inside_any(self.searched_specific_files,
1132
 
                                             source_details[1])):
 
1097
                if not osutils.is_inside_any(self.searched_specific_files,
 
1098
                                             source_details[1]):
1133
1099
                    self.search_specific_files.add(source_details[1])
1134
 
                    # expanding from a user requested path, parent expansion
1135
 
                    # for delta consistency happens later.
1136
1100
                # generate the old path; this is needed for stating later
1137
1101
                # as well.
1138
1102
                old_path = source_details[1]
1172
1136
                    if source_minikind != c'f':
1173
1137
                        content_change = 1
1174
1138
                    else:
1175
 
                        # Check the sha. We can't just rely on the size as
1176
 
                        # content filtering may mean differ sizes actually
1177
 
                        # map to the same content
1178
 
                        if link_or_sha1 is None:
1179
 
                            # Stat cache miss:
1180
 
                            statvalue, link_or_sha1 = \
1181
 
                                self.state._sha1_provider.stat_and_sha1(
1182
 
                                path_info[4])
1183
 
                            self.state._observed_sha1(entry, link_or_sha1,
1184
 
                                statvalue)
1185
 
                        content_change = (link_or_sha1 != source_details[1])
 
1139
                        # If the size is the same, check the sha:
 
1140
                        if target_details[2] == source_details[2]:
 
1141
                            if link_or_sha1 is None:
 
1142
                                # Stat cache miss:
 
1143
                                file_obj = file(path_info[4], 'rb')
 
1144
                                try:
 
1145
                                    # XXX: TODO: Use lower level file IO rather
 
1146
                                    # than python objects for sha-misses.
 
1147
                                    statvalue = self.fstat(file_obj.fileno())
 
1148
                                    link_or_sha1 = self.sha_file(file_obj)
 
1149
                                finally:
 
1150
                                    file_obj.close()
 
1151
                                self.state._observed_sha1(entry, link_or_sha1,
 
1152
                                    statvalue)
 
1153
                            content_change = (link_or_sha1 != source_details[1])
 
1154
                        else:
 
1155
                            # Size changed, so must be different
 
1156
                            content_change = 1
1186
1157
                    # Target details is updated at update_entry time
1187
1158
                    if self.use_filesystem_for_exec:
1188
1159
                        # We don't need S_ISREG here, because we are sure
1203
1174
                        content_change = 0
1204
1175
                    target_exec = False
1205
1176
                else:
1206
 
                    if path is None:
1207
 
                        path = self.pathjoin(old_dirname, old_basename)
1208
 
                    raise errors.BadFileKindError(path, path_info[2])
 
1177
                    raise Exception, "unknown kind %s" % path_info[2]
1209
1178
            if source_minikind == c'd':
1210
1179
                if path is None:
1211
1180
                    old_path = path = self.pathjoin(old_dirname, old_basename)
1213
1182
                    file_id = entry[0][2]
1214
1183
                self.old_dirname_to_file_id[old_path] = file_id
1215
1184
            # parent id is the entry for the path in the target tree
1216
 
            if old_basename and old_dirname == self.last_source_parent[0]:
1217
 
                # use a cached hit for non-root source entries.
 
1185
            if old_dirname == self.last_source_parent[0]:
1218
1186
                source_parent_id = self.last_source_parent[1]
1219
1187
            else:
1220
1188
                try:
1230
1198
                    self.last_source_parent[0] = old_dirname
1231
1199
                    self.last_source_parent[1] = source_parent_id
1232
1200
            new_dirname = entry[0][0]
1233
 
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
1234
 
                # use a cached hit for non-root target entries.
 
1201
            if new_dirname == self.last_target_parent[0]:
1235
1202
                target_parent_id = self.last_target_parent[1]
1236
1203
            else:
1237
1204
                try:
1254
1221
                    self.last_target_parent[1] = target_parent_id
1255
1222
 
1256
1223
            source_exec = source_details[3]
1257
 
            changed = (content_change
 
1224
            if (self.include_unchanged
 
1225
                or content_change
1258
1226
                or source_parent_id != target_parent_id
1259
1227
                or old_basename != entry[0][1]
1260
1228
                or source_exec != target_exec
1261
 
                )
1262
 
            if not changed and not self.include_unchanged:
1263
 
                return None, False
1264
 
            else:
 
1229
                ):
1265
1230
                if old_path is None:
1266
1231
                    path = self.pathjoin(old_dirname, old_basename)
1267
1232
                    old_path = path
1281
1246
                       (source_parent_id, target_parent_id),
1282
1247
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1283
1248
                       (source_kind, target_kind),
1284
 
                       (source_exec, target_exec)), changed
 
1249
                       (source_exec, target_exec))
 
1250
            else:
 
1251
                return self.uninteresting
1285
1252
        elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1286
1253
            # looks like a new file
1287
1254
            path = self.pathjoin(entry[0][0], entry[0][1])
1288
1255
            # parent id is the entry for the path in the target tree
1289
1256
            # TODO: these are the same for an entire directory: cache em.
1290
 
            parent_entry = self.state._get_entry(self.target_index,
1291
 
                                                 path_utf8=entry[0][0])
1292
 
            if parent_entry is None:
1293
 
                raise errors.DirstateCorrupt(self.state,
1294
 
                    "We could not find the parent entry in index %d"
1295
 
                    " for the entry: %s"
1296
 
                    % (self.target_index, entry[0]))
1297
 
            parent_id = parent_entry[0][2]
 
1257
            parent_id = self.state._get_entry(self.target_index,
 
1258
                                         path_utf8=entry[0][0])[0][2]
1298
1259
            if parent_id == entry[0][2]:
1299
1260
                parent_id = None
1300
1261
            if path_info is not None:
1314
1275
                       (None, parent_id),
1315
1276
                       (None, self.utf8_decode(entry[0][1])[0]),
1316
1277
                       (None, path_info[2]),
1317
 
                       (None, target_exec)), True
 
1278
                       (None, target_exec))
1318
1279
            else:
1319
1280
                # Its a missing file, report it as such.
1320
1281
                return (entry[0][2],
1324
1285
                       (None, parent_id),
1325
1286
                       (None, self.utf8_decode(entry[0][1])[0]),
1326
1287
                       (None, None),
1327
 
                       (None, False)), True
 
1288
                       (None, False))
1328
1289
        elif _versioned_minikind(source_minikind) and target_minikind == c'a':
1329
1290
            # unversioned, possibly, or possibly not deleted: we dont care.
1330
1291
            # if its still on disk, *and* theres no other entry at this
1342
1303
                   (parent_id, None),
1343
1304
                   (self.utf8_decode(entry[0][1])[0], None),
1344
1305
                   (_minikind_to_kind(source_minikind), None),
1345
 
                   (source_details[3], None)), True
 
1306
                   (source_details[3], None))
1346
1307
        elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1347
1308
            # a rename; could be a true rename, or a rename inherited from
1348
1309
            # a renamed parent. TODO: handle this efficiently. Its not
1349
1310
            # common case to rename dirs though, so a correct but slow
1350
1311
            # implementation will do.
1351
 
            if (not self.doing_consistency_expansion and 
1352
 
                not osutils.is_inside_any(self.searched_specific_files,
1353
 
                    target_details[1])):
 
1312
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
1354
1313
                self.search_specific_files.add(target_details[1])
1355
 
                # We don't expand the specific files parents list here as
1356
 
                # the path is absent in target and won't create a delta with
1357
 
                # missing parent.
1358
1314
        elif ((source_minikind == c'r' or source_minikind == c'a') and
1359
1315
              (target_minikind == c'r' or target_minikind == c'a')):
1360
1316
            # neither of the selected trees contain this path,
1366
1322
                "source_minikind=%r, target_minikind=%r"
1367
1323
                % (source_minikind, target_minikind))
1368
1324
            ## import pdb;pdb.set_trace()
1369
 
        return None, None
 
1325
        return None
1370
1326
 
1371
1327
    def __iter__(self):
1372
1328
        return self
1374
1330
    def iter_changes(self):
1375
1331
        return self
1376
1332
 
1377
 
    cdef int _gather_result_for_consistency(self, result) except -1:
1378
 
        """Check a result we will yield to make sure we are consistent later.
1379
 
        
1380
 
        This gathers result's parents into a set to output later.
1381
 
 
1382
 
        :param result: A result tuple.
1383
 
        """
1384
 
        if not self.partial or not result[0]:
1385
 
            return 0
1386
 
        self.seen_ids.add(result[0])
1387
 
        new_path = result[1][1]
1388
 
        if new_path:
1389
 
            # Not the root and not a delete: queue up the parents of the path.
1390
 
            self.search_specific_file_parents.update(
1391
 
                osutils.parent_directories(new_path.encode('utf8')))
1392
 
            # Add the root directory which parent_directories does not
1393
 
            # provide.
1394
 
            self.search_specific_file_parents.add('')
1395
 
        return 0
1396
 
 
1397
 
    cdef int _update_current_block(self) except -1:
 
1333
    cdef void _update_current_block(self):
1398
1334
        if (self.block_index < len(self.state._dirblocks) and
1399
1335
            osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1400
1336
            self.current_block = self.state._dirblocks[self.block_index]
1403
1339
        else:
1404
1340
            self.current_block = None
1405
1341
            self.current_block_list = None
1406
 
        return 0
1407
1342
 
1408
1343
    def __next__(self):
1409
1344
        # Simple thunk to allow tail recursion without pyrex confusion
1461
1396
        cdef char * current_dirname_c, * current_blockname_c
1462
1397
        cdef int advance_entry, advance_path
1463
1398
        cdef int path_handled
 
1399
        uninteresting = self.uninteresting
1464
1400
        searched_specific_files = self.searched_specific_files
1465
1401
        # Are we walking a root?
1466
1402
        while self.root_entries_pos < self.root_entries_len:
1467
1403
            entry = self.root_entries[self.root_entries_pos]
1468
1404
            self.root_entries_pos = self.root_entries_pos + 1
1469
 
            result, changed = self._process_entry(entry, self.root_dir_info)
1470
 
            if changed is not None:
1471
 
                if changed:
1472
 
                    self._gather_result_for_consistency(result)
1473
 
                if changed or self.include_unchanged:
1474
 
                    return result
 
1405
            result = self._process_entry(entry, self.root_dir_info)
 
1406
            if result is not None and result is not self.uninteresting:
 
1407
                return result
1475
1408
        # Have we finished the prior root, or never started one ?
1476
1409
        if self.current_root is None:
1477
1410
            # TODO: the pending list should be lexically sorted?  the
1480
1413
                self.current_root = self.search_specific_files.pop()
1481
1414
            except KeyError:
1482
1415
                raise StopIteration()
 
1416
            self.current_root_unicode = self.current_root.decode('utf8')
1483
1417
            self.searched_specific_files.add(self.current_root)
1484
1418
            # process the entries for this containing directory: the rest will be
1485
1419
            # found by their parents recursively.
1486
1420
            self.root_entries = self.state._entries_for_path(self.current_root)
1487
1421
            self.root_entries_len = len(self.root_entries)
1488
 
            self.current_root_unicode = self.current_root.decode('utf8')
1489
1422
            self.root_abspath = self.tree.abspath(self.current_root_unicode)
1490
1423
            try:
1491
1424
                root_stat = os.lstat(self.root_abspath)
1519
1452
            while self.root_entries_pos < self.root_entries_len:
1520
1453
                entry = self.root_entries[self.root_entries_pos]
1521
1454
                self.root_entries_pos = self.root_entries_pos + 1
1522
 
                result, changed = self._process_entry(entry, self.root_dir_info)
1523
 
                if changed is not None:
 
1455
                result = self._process_entry(entry, self.root_dir_info)
 
1456
                if result is not None:
1524
1457
                    path_handled = -1
1525
 
                    if changed:
1526
 
                        self._gather_result_for_consistency(result)
1527
 
                    if changed or self.include_unchanged:
 
1458
                    if result is not self.uninteresting:
1528
1459
                        return result
1529
1460
            # handle unversioned specified paths:
1530
1461
            if self.want_unversioned and not path_handled and self.root_dir_info:
1542
1473
                      )
1543
1474
            # If we reach here, the outer flow continues, which enters into the
1544
1475
            # per-root setup logic.
1545
 
        if (self.current_dir_info is None and self.current_block is None and not
1546
 
            self.doing_consistency_expansion):
 
1476
        if self.current_dir_info is None and self.current_block is None:
1547
1477
            # setup iteration of this root:
1548
1478
            self.current_dir_list = None
1549
1479
            if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
1671
1601
                        self.current_block_pos = self.current_block_pos + 1
1672
1602
                        # entry referring to file not present on disk.
1673
1603
                        # advance the entry only, after processing.
1674
 
                        result, changed = self._process_entry(current_entry, None)
1675
 
                        if changed is not None:
1676
 
                            if changed:
1677
 
                                self._gather_result_for_consistency(result)
1678
 
                            if changed or self.include_unchanged:
 
1604
                        result = self._process_entry(current_entry, None)
 
1605
                        if result is not None:
 
1606
                            if result is not self.uninteresting:
1679
1607
                                return result
1680
1608
                    self.block_index = self.block_index + 1
1681
1609
                    self._update_current_block()
1687
1615
            # More supplied paths to process
1688
1616
            self.current_root = None
1689
1617
            return self._iter_next()
1690
 
        # Start expanding more conservatively, adding paths the user may not
1691
 
        # have intended but required for consistent deltas.
1692
 
        self.doing_consistency_expansion = 1
1693
 
        if not self._pending_consistent_entries:
1694
 
            self._pending_consistent_entries = self._next_consistent_entries()
1695
 
        while self._pending_consistent_entries:
1696
 
            result, changed = self._pending_consistent_entries.pop()
1697
 
            if changed is not None:
1698
 
                return result
1699
1618
        raise StopIteration()
1700
1619
 
1701
1620
    cdef object _maybe_tree_ref(self, current_path_info):
1751
1670
                    pass
1752
1671
                elif current_path_info is None:
1753
1672
                    # no path is fine: the per entry code will handle it.
1754
 
                    result, changed = self._process_entry(current_entry,
1755
 
                        current_path_info)
 
1673
                    result = self._process_entry(current_entry, current_path_info)
 
1674
                    if result is not None:
 
1675
                        if result is self.uninteresting:
 
1676
                            result = None
1756
1677
                else:
1757
1678
                    minikind = _minikind_from_string(
1758
1679
                        current_entry[1][self.target_index][0])
1773
1694
                        else:
1774
1695
                            # entry referring to file not present on disk.
1775
1696
                            # advance the entry only, after processing.
1776
 
                            result, changed = self._process_entry(current_entry,
1777
 
                                None)
 
1697
                            result = self._process_entry(current_entry, None)
 
1698
                            if result is not None:
 
1699
                                if result is self.uninteresting:
 
1700
                                    result = None
1778
1701
                            advance_path = 0
1779
1702
                    else:
1780
1703
                        # paths are the same,and the dirstate entry is not
1781
1704
                        # absent or renamed.
1782
 
                        result, changed = self._process_entry(current_entry,
1783
 
                            current_path_info)
1784
 
                        if changed is not None:
 
1705
                        result = self._process_entry(current_entry, current_path_info)
 
1706
                        if result is not None:
1785
1707
                            path_handled = -1
1786
 
                            if not changed and not self.include_unchanged:
1787
 
                                changed = None
 
1708
                            if result is self.uninteresting:
 
1709
                                result = None
1788
1710
                # >- loop control starts here:
1789
1711
                # >- entry
1790
1712
                if advance_entry and current_entry is not None:
1806
1728
                            except UnicodeDecodeError:
1807
1729
                                raise errors.BadFilenameEncoding(
1808
1730
                                    current_path_info[0], osutils._fs_enc)
1809
 
                            if changed is not None:
 
1731
                            if result is not None:
1810
1732
                                raise AssertionError(
1811
1733
                                    "result is not None: %r" % result)
1812
1734
                            result = (None,
1817
1739
                                (None, self.utf8_decode(current_path_info[1])[0]),
1818
1740
                                (None, current_path_info[2]),
1819
1741
                                (None, new_executable))
1820
 
                            changed = True
1821
1742
                        # dont descend into this unversioned path if it is
1822
1743
                        # a dir
1823
1744
                        if current_path_info[2] in ('directory'):
1836
1757
                                current_path_info)
1837
1758
                    else:
1838
1759
                        current_path_info = None
1839
 
                if changed is not None:
 
1760
                if result is not None:
1840
1761
                    # Found a result on this pass, yield it
1841
 
                    if changed:
1842
 
                        self._gather_result_for_consistency(result)
1843
 
                    if changed or self.include_unchanged:
1844
 
                        return result
 
1762
                    return result
1845
1763
            if self.current_block is not None:
1846
1764
                self.block_index = self.block_index + 1
1847
1765
                self._update_current_block()
1853
1771
                    self.current_dir_list = self.current_dir_info[1]
1854
1772
                except StopIteration:
1855
1773
                    self.current_dir_info = None
1856
 
 
1857
 
    cdef object _next_consistent_entries(self):
1858
 
        """Grabs the next specific file parent case to consider.
1859
 
        
1860
 
        :return: A list of the results, each of which is as for _process_entry.
1861
 
        """
1862
 
        results = []
1863
 
        while self.search_specific_file_parents:
1864
 
            # Process the parent directories for the paths we were iterating.
1865
 
            # Even in extremely large trees this should be modest, so currently
1866
 
            # no attempt is made to optimise.
1867
 
            path_utf8 = self.search_specific_file_parents.pop()
1868
 
            if path_utf8 in self.searched_exact_paths:
1869
 
                # We've examined this path.
1870
 
                continue
1871
 
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
1872
 
                # We've examined this path.
1873
 
                continue
1874
 
            path_entries = self.state._entries_for_path(path_utf8)
1875
 
            # We need either one or two entries. If the path in
1876
 
            # self.target_index has moved (so the entry in source_index is in
1877
 
            # 'ar') then we need to also look for the entry for this path in
1878
 
            # self.source_index, to output the appropriate delete-or-rename.
1879
 
            selected_entries = []
1880
 
            found_item = False
1881
 
            for candidate_entry in path_entries:
1882
 
                # Find entries present in target at this path:
1883
 
                if candidate_entry[1][self.target_index][0] not in 'ar':
1884
 
                    found_item = True
1885
 
                    selected_entries.append(candidate_entry)
1886
 
                # Find entries present in source at this path:
1887
 
                elif (self.source_index is not None and
1888
 
                    candidate_entry[1][self.source_index][0] not in 'ar'):
1889
 
                    found_item = True
1890
 
                    if candidate_entry[1][self.target_index][0] == 'a':
1891
 
                        # Deleted, emit it here.
1892
 
                        selected_entries.append(candidate_entry)
1893
 
                    else:
1894
 
                        # renamed, emit it when we process the directory it
1895
 
                        # ended up at.
1896
 
                        self.search_specific_file_parents.add(
1897
 
                            candidate_entry[1][self.target_index][1])
1898
 
            if not found_item:
1899
 
                raise AssertionError(
1900
 
                    "Missing entry for specific path parent %r, %r" % (
1901
 
                    path_utf8, path_entries))
1902
 
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
1903
 
            for entry in selected_entries:
1904
 
                if entry[0][2] in self.seen_ids:
1905
 
                    continue
1906
 
                result, changed = self._process_entry(entry, path_info)
1907
 
                if changed is None:
1908
 
                    raise AssertionError(
1909
 
                        "Got entry<->path mismatch for specific path "
1910
 
                        "%r entry %r path_info %r " % (
1911
 
                        path_utf8, entry, path_info))
1912
 
                # Only include changes - we're outside the users requested
1913
 
                # expansion.
1914
 
                if changed:
1915
 
                    self._gather_result_for_consistency(result)
1916
 
                    if (result[6][0] == 'directory' and
1917
 
                        result[6][1] != 'directory'):
1918
 
                        # This stopped being a directory, the old children have
1919
 
                        # to be included.
1920
 
                        if entry[1][self.source_index][0] == 'r':
1921
 
                            # renamed, take the source path
1922
 
                            entry_path_utf8 = entry[1][self.source_index][1]
1923
 
                        else:
1924
 
                            entry_path_utf8 = path_utf8
1925
 
                        initial_key = (entry_path_utf8, '', '')
1926
 
                        block_index, _ = self.state._find_block_index_from_key(
1927
 
                            initial_key)
1928
 
                        if block_index == 0:
1929
 
                            # The children of the root are in block index 1.
1930
 
                            block_index = block_index + 1
1931
 
                        current_block = None
1932
 
                        if block_index < len(self.state._dirblocks):
1933
 
                            current_block = self.state._dirblocks[block_index]
1934
 
                            if not osutils.is_inside(
1935
 
                                entry_path_utf8, current_block[0]):
1936
 
                                # No entries for this directory at all.
1937
 
                                current_block = None
1938
 
                        if current_block is not None:
1939
 
                            for entry in current_block[1]:
1940
 
                                if entry[1][self.source_index][0] in 'ar':
1941
 
                                    # Not in the source tree, so doesn't have to be
1942
 
                                    # included.
1943
 
                                    continue
1944
 
                                # Path of the entry itself.
1945
 
                                self.search_specific_file_parents.add(
1946
 
                                    self.pathjoin(*entry[0][:2]))
1947
 
                if changed or self.include_unchanged:
1948
 
                    results.append((result, changed))
1949
 
            self.searched_exact_paths.add(path_utf8)
1950
 
        return results
1951
 
 
1952
 
    cdef object _path_info(self, utf8_path, unicode_path):
1953
 
        """Generate path_info for unicode_path.
1954
 
 
1955
 
        :return: None if unicode_path does not exist, or a path_info tuple.
1956
 
        """
1957
 
        abspath = self.tree.abspath(unicode_path)
1958
 
        try:
1959
 
            stat = os.lstat(abspath)
1960
 
        except OSError, e:
1961
 
            if e.errno == errno.ENOENT:
1962
 
                # the path does not exist.
1963
 
                return None
1964
 
            else:
1965
 
                raise
1966
 
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
1967
 
        dir_info = (utf8_path, utf8_basename,
1968
 
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
1969
 
            abspath)
1970
 
        if dir_info[2] == 'directory':
1971
 
            if self.tree._directory_is_tree_reference(
1972
 
                unicode_path):
1973
 
                self.root_dir_info = self.root_dir_info[:2] + \
1974
 
                    ('tree-reference',) + self.root_dir_info[3:]
1975
 
        return dir_info