/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_pyx.pyx

  • Committer: Martin von Gagern
  • Date: 2010-04-20 08:47:38 UTC
  • mfrom: (5167 +trunk)
  • mto: This revision was merged to the branch mainline in revision 5195.
  • Revision ID: martin.vgagern@gmx.net-20100420084738-ygymnqmdllzrhpfn
merge trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2007, 2008 Canonical Ltd
 
1
# Copyright (C) 2007, 2008, 2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
"""Helper functions for DirState.
18
18
 
24
24
import errno
25
25
import os
26
26
import stat
 
27
import sys
27
28
 
28
29
from bzrlib import cache_utf8, errors, osutils
29
30
from bzrlib.dirstate import DirState
30
 
from bzrlib.osutils import pathjoin, splitpath
 
31
from bzrlib.osutils import parent_directories, pathjoin, splitpath
31
32
 
32
33
 
33
34
# This is the Windows equivalent of ENOTDIR
53
54
cdef extern from *:
54
55
    ctypedef unsigned long size_t
55
56
 
56
 
cdef extern from "_dirstate_helpers_c.h":
 
57
cdef extern from "_dirstate_helpers_pyx.h":
57
58
    ctypedef int intptr_t
58
59
 
59
60
 
118
119
    # void *memrchr(void *s, int c, size_t len)
119
120
 
120
121
 
121
 
cdef void* _my_memrchr(void *s, int c, size_t n):
 
122
cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
122
123
    # memrchr seems to be a GNU extension, so we have to implement it ourselves
123
124
    cdef char *pos
124
125
    cdef char *start
155
156
        return None
156
157
    return <char*>found - <char*>_s
157
158
 
 
159
 
158
160
cdef object safe_string_from_size(char *s, Py_ssize_t size):
159
161
    if size < 0:
160
 
        # XXX: On 64-bit machines the <int> cast causes a C compiler warning.
161
162
        raise AssertionError(
162
 
            'tried to create a string with an invalid size: %d @0x%x'
163
 
            % (size, <int>s))
 
163
            'tried to create a string with an invalid size: %d'
 
164
            % (size))
164
165
    return PyString_FromStringAndSize(s, size)
165
166
 
166
167
 
167
 
cdef int _is_aligned(void *ptr):
 
168
cdef int _is_aligned(void *ptr): # cannot_raise
168
169
    """Is this pointer aligned to an integer size offset?
169
170
 
170
171
    :return: 1 if this pointer is aligned, 0 otherwise.
172
173
    return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
173
174
 
174
175
 
175
 
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
 
176
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
176
177
    cdef unsigned char *cur1
177
178
    cdef unsigned char *cur2
178
179
    cdef unsigned char *end1
236
237
    return 0
237
238
 
238
239
 
239
 
def cmp_by_dirs_c(path1, path2):
 
240
def cmp_by_dirs(path1, path2):
240
241
    """Compare two paths directory by directory.
241
242
 
242
243
    This is equivalent to doing::
265
266
                        PyString_Size(path2))
266
267
 
267
268
 
268
 
def _cmp_path_by_dirblock_c(path1, path2):
 
269
def _cmp_path_by_dirblock(path1, path2):
269
270
    """Compare two paths based on what directory they are in.
270
271
 
271
272
    This generates a sort order, such that all children of a directory are
287
288
    if not PyString_CheckExact(path2):
288
289
        raise TypeError("'path2' must be a plain string, not %s: %r"
289
290
                        % (type(path2), path2))
290
 
    return _cmp_path_by_dirblock(PyString_AsString(path1),
291
 
                                 PyString_Size(path1),
292
 
                                 PyString_AsString(path2),
293
 
                                 PyString_Size(path2))
294
 
 
295
 
 
296
 
cdef int _cmp_path_by_dirblock(char *path1, int path1_len,
297
 
                               char *path2, int path2_len):
 
291
    return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
 
292
                                        PyString_Size(path1),
 
293
                                        PyString_AsString(path2),
 
294
                                        PyString_Size(path2))
 
295
 
 
296
 
 
297
cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
 
298
                                      char *path2, int path2_len): # cannot_raise
298
299
    """Compare two paths by what directory they are in.
299
300
 
300
 
    see ``_cmp_path_by_dirblock_c`` for details.
 
301
    see ``_cmp_path_by_dirblock`` for details.
301
302
    """
302
303
    cdef char *dirname1
303
304
    cdef int dirname1_len
367
368
    return 1
368
369
 
369
370
 
370
 
def _bisect_path_left_c(paths, path):
 
371
def _bisect_path_left(paths, path):
371
372
    """Return the index where to insert path into paths.
372
373
 
373
374
    This uses a path-wise comparison so we get::
412
413
        cur = PyList_GetItem_object_void(paths, _mid)
413
414
        cur_cstr = PyString_AS_STRING_void(cur)
414
415
        cur_size = PyString_GET_SIZE_void(cur)
415
 
        if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
 
416
        if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
 
417
                                        path_cstr, path_size) < 0:
416
418
            _lo = _mid + 1
417
419
        else:
418
420
            _hi = _mid
419
421
    return _lo
420
422
 
421
423
 
422
 
def _bisect_path_right_c(paths, path):
 
424
def _bisect_path_right(paths, path):
423
425
    """Return the index where to insert path into paths.
424
426
 
425
427
    This uses a path-wise comparison so we get::
464
466
        cur = PyList_GetItem_object_void(paths, _mid)
465
467
        cur_cstr = PyString_AS_STRING_void(cur)
466
468
        cur_size = PyString_GET_SIZE_void(cur)
467
 
        if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
 
469
        if _cmp_path_by_dirblock_intern(path_cstr, path_size,
 
470
                                        cur_cstr, cur_size) < 0:
468
471
            _hi = _mid
469
472
        else:
470
473
            _lo = _mid + 1
471
474
    return _lo
472
475
 
473
476
 
474
 
def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):
 
477
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
475
478
    """Return the index where to insert dirname into the dirblocks.
476
479
 
477
480
    The return value idx is such that all directories blocks in dirblock[:idx]
743
746
        self.state._split_root_dirblock_into_contents()
744
747
 
745
748
 
746
 
def _read_dirblocks_c(state):
 
749
def _read_dirblocks(state):
747
750
    """Read in the dirblocks for the given DirState object.
748
751
 
749
752
    This is tightly bound to the DirState internal representation. It should be
765
768
    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
766
769
 
767
770
 
768
 
cdef int minikind_from_mode(int mode):
 
771
cdef int minikind_from_mode(int mode): # cannot_raise
769
772
    # in order of frequency:
770
773
    if S_ISREG(mode):
771
774
        return c"f"
840
843
    packed_stat = _pack_stat(stat_value)
841
844
    details = PyList_GetItem_void_void(PyTuple_GetItem_void_void(<void *>entry, 1), 0)
842
845
    saved_minikind = PyString_AsString_obj(<PyObject *>PyTuple_GetItem_void_void(details, 0))[0]
 
846
    if minikind == c'd' and saved_minikind == c't':
 
847
        minikind = c't'
843
848
    saved_link_or_sha1 = PyTuple_GetItem_void_object(details, 1)
844
849
    saved_file_size = PyTuple_GetItem_void_object(details, 2)
845
850
    saved_executable = PyTuple_GetItem_void_object(details, 3)
910
915
    return link_or_sha1
911
916
 
912
917
 
913
 
cdef char _minikind_from_string(object string):
 
918
# TODO: Do we want to worry about exceptions here?
 
919
cdef char _minikind_from_string(object string) except? -1:
914
920
    """Convert a python string to a char."""
915
921
    return PyString_AsString(string)[0]
916
922
 
948
954
    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
949
955
 
950
956
 
951
 
cdef int _versioned_minikind(char minikind):
 
957
cdef int _versioned_minikind(char minikind): # cannot_raise
952
958
    """Return non-zero if minikind is in fltd"""
953
959
    return (minikind == c'f' or
954
960
            minikind == c'd' or
958
964
 
959
965
cdef class ProcessEntryC:
960
966
 
 
967
    cdef int doing_consistency_expansion
961
968
    cdef object old_dirname_to_file_id # dict
962
969
    cdef object new_dirname_to_file_id # dict
963
 
    cdef readonly object uninteresting
964
970
    cdef object last_source_parent
965
971
    cdef object last_target_parent
966
 
    cdef object include_unchanged
 
972
    cdef int include_unchanged
 
973
    cdef int partial
967
974
    cdef object use_filesystem_for_exec
968
975
    cdef object utf8_decode
969
976
    cdef readonly object searched_specific_files
 
977
    cdef readonly object searched_exact_paths
970
978
    cdef object search_specific_files
 
979
    # The parents up to the root of the paths we are searching.
 
980
    # After all normal paths are returned, these specific items are returned.
 
981
    cdef object search_specific_file_parents
971
982
    cdef object state
972
983
    # Current iteration variables:
973
984
    cdef object current_root
985
996
    cdef object current_block_list
986
997
    cdef object current_dir_info
987
998
    cdef object current_dir_list
 
999
    cdef object _pending_consistent_entries # list
988
1000
    cdef int path_index
989
1001
    cdef object root_dir_info
990
1002
    cdef object bisect_left
991
1003
    cdef object pathjoin
992
1004
    cdef object fstat
 
1005
    # A set of the ids we've output when doing partial output.
 
1006
    cdef object seen_ids
993
1007
    cdef object sha_file
994
1008
 
995
1009
    def __init__(self, include_unchanged, use_filesystem_for_exec,
996
1010
        search_specific_files, state, source_index, target_index,
997
1011
        want_unversioned, tree):
 
1012
        self.doing_consistency_expansion = 0
998
1013
        self.old_dirname_to_file_id = {}
999
1014
        self.new_dirname_to_file_id = {}
1000
 
        # Just a sentry, so that _process_entry can say that this
1001
 
        # record is handled, but isn't interesting to process (unchanged)
1002
 
        self.uninteresting = object()
 
1015
        # Are we doing a partial iter_changes?
 
1016
        self.partial = set(['']).__ne__(search_specific_files)
1003
1017
        # Using a list so that we can access the values and change them in
1004
1018
        # nested scope. Each one is [path, file_id, entry]
1005
1019
        self.last_source_parent = [None, None]
1006
1020
        self.last_target_parent = [None, None]
1007
 
        self.include_unchanged = include_unchanged
 
1021
        if include_unchanged is None:
 
1022
            self.include_unchanged = False
 
1023
        else:
 
1024
            self.include_unchanged = int(include_unchanged)
1008
1025
        self.use_filesystem_for_exec = use_filesystem_for_exec
1009
1026
        self.utf8_decode = cache_utf8._utf8_decode
1010
1027
        # for all search_indexs in each path at or under each element of
1011
 
        # search_specific_files, if the detail is relocated: add the id, and add the
1012
 
        # relocated path as one to search if its not searched already. If the
1013
 
        # detail is not relocated, add the id.
 
1028
        # search_specific_files, if the detail is relocated: add the id, and
 
1029
        # add the relocated path as one to search if its not searched already.
 
1030
        # If the detail is not relocated, add the id.
1014
1031
        self.searched_specific_files = set()
 
1032
        # When we search exact paths without expanding downwards, we record
 
1033
        # that here.
 
1034
        self.searched_exact_paths = set()
1015
1035
        self.search_specific_files = search_specific_files
 
1036
        # The parents up to the root of the paths we are searching.
 
1037
        # After all normal paths are returned, these specific items are returned.
 
1038
        self.search_specific_file_parents = set()
 
1039
        # The ids we've sent out in the delta.
 
1040
        self.seen_ids = set()
1016
1041
        self.state = state
1017
1042
        self.current_root = None
1018
1043
        self.current_root_unicode = None
1034
1059
        self.current_block_pos = -1
1035
1060
        self.current_dir_info = None
1036
1061
        self.current_dir_list = None
 
1062
        self._pending_consistent_entries = []
1037
1063
        self.path_index = 0
1038
1064
        self.root_dir_info = None
1039
1065
        self.bisect_left = bisect.bisect_left
1040
1066
        self.pathjoin = osutils.pathjoin
1041
1067
        self.fstat = os.fstat
1042
1068
        self.sha_file = osutils.sha_file
 
1069
        if target_index != 0:
 
1070
            # A lot of code in here depends on target_index == 0
 
1071
            raise errors.BzrError('unsupported target index')
1043
1072
 
1044
1073
    cdef _process_entry(self, entry, path_info):
1045
1074
        """Compare an entry and real disk to generate delta information.
1046
1075
 
1047
1076
        :param path_info: top_relpath, basename, kind, lstat, abspath for
1048
 
            the path of entry. If None, then the path is considered absent.
1049
 
            (Perhaps we should pass in a concrete entry for this ?)
 
1077
            the path of entry. If None, then the path is considered absent in 
 
1078
            the target (Perhaps we should pass in a concrete entry for this ?)
1050
1079
            Basename is returned as a utf8 string because we expect this
1051
1080
            tuple will be ignored, and don't want to take the time to
1052
1081
            decode.
1053
 
        :return: None if the these don't match
1054
 
                 A tuple of information about the change, or
1055
 
                 the object 'uninteresting' if these match, but are
1056
 
                 basically identical.
 
1082
        :return: (iter_changes_result, changed). If the entry has not been
 
1083
            handled then changed is None. Otherwise it is False if no content
 
1084
            or metadata changes have occured, and True if any content or
 
1085
            metadata change has occurred. If self.include_unchanged is True then
 
1086
            if changed is not None, iter_changes_result will always be a result
 
1087
            tuple. Otherwise, iter_changes_result is None unless changed is
 
1088
            True.
1057
1089
        """
1058
1090
        cdef char target_minikind
1059
1091
        cdef char source_minikind
1070
1102
        target_minikind = _minikind_from_string(target_details[0])
1071
1103
        if path_info is not None and _versioned_minikind(target_minikind):
1072
1104
            if self.target_index != 0:
1073
 
                raise AssertionError("Unsupported target index %d" % target_index)
 
1105
                raise AssertionError("Unsupported target index %d" %
 
1106
                                     self.target_index)
1074
1107
            link_or_sha1 = _update_entry(self.state, entry, path_info[4], path_info[3])
1075
1108
            # The entry may have been modified by update_entry
1076
1109
            target_details = details_list[self.target_index]
1094
1127
            else:
1095
1128
                # add the source to the search path to find any children it
1096
1129
                # has.  TODO ? : only add if it is a container ?
1097
 
                if not osutils.is_inside_any(self.searched_specific_files,
1098
 
                                             source_details[1]):
 
1130
                if (not self.doing_consistency_expansion and 
 
1131
                    not osutils.is_inside_any(self.searched_specific_files,
 
1132
                                             source_details[1])):
1099
1133
                    self.search_specific_files.add(source_details[1])
 
1134
                    # expanding from a user requested path, parent expansion
 
1135
                    # for delta consistency happens later.
1100
1136
                # generate the old path; this is needed for stating later
1101
1137
                # as well.
1102
1138
                old_path = source_details[1]
1136
1172
                    if source_minikind != c'f':
1137
1173
                        content_change = 1
1138
1174
                    else:
1139
 
                        # If the size is the same, check the sha:
1140
 
                        if target_details[2] == source_details[2]:
1141
 
                            if link_or_sha1 is None:
1142
 
                                # Stat cache miss:
1143
 
                                file_obj = file(path_info[4], 'rb')
1144
 
                                try:
1145
 
                                    # XXX: TODO: Use lower level file IO rather
1146
 
                                    # than python objects for sha-misses.
1147
 
                                    statvalue = self.fstat(file_obj.fileno())
1148
 
                                    link_or_sha1 = self.sha_file(file_obj)
1149
 
                                finally:
1150
 
                                    file_obj.close()
1151
 
                                self.state._observed_sha1(entry, link_or_sha1,
1152
 
                                    statvalue)
1153
 
                            content_change = (link_or_sha1 != source_details[1])
1154
 
                        else:
1155
 
                            # Size changed, so must be different
1156
 
                            content_change = 1
 
1175
                        # Check the sha. We can't just rely on the size as
 
1176
                        # content filtering may mean differ sizes actually
 
1177
                        # map to the same content
 
1178
                        if link_or_sha1 is None:
 
1179
                            # Stat cache miss:
 
1180
                            statvalue, link_or_sha1 = \
 
1181
                                self.state._sha1_provider.stat_and_sha1(
 
1182
                                path_info[4])
 
1183
                            self.state._observed_sha1(entry, link_or_sha1,
 
1184
                                statvalue)
 
1185
                        content_change = (link_or_sha1 != source_details[1])
1157
1186
                    # Target details is updated at update_entry time
1158
1187
                    if self.use_filesystem_for_exec:
1159
1188
                        # We don't need S_ISREG here, because we are sure
1174
1203
                        content_change = 0
1175
1204
                    target_exec = False
1176
1205
                else:
1177
 
                    raise Exception, "unknown kind %s" % path_info[2]
 
1206
                    if path is None:
 
1207
                        path = self.pathjoin(old_dirname, old_basename)
 
1208
                    raise errors.BadFileKindError(path, path_info[2])
1178
1209
            if source_minikind == c'd':
1179
1210
                if path is None:
1180
1211
                    old_path = path = self.pathjoin(old_dirname, old_basename)
1182
1213
                    file_id = entry[0][2]
1183
1214
                self.old_dirname_to_file_id[old_path] = file_id
1184
1215
            # parent id is the entry for the path in the target tree
1185
 
            if old_dirname == self.last_source_parent[0]:
 
1216
            if old_basename and old_dirname == self.last_source_parent[0]:
 
1217
                # use a cached hit for non-root source entries.
1186
1218
                source_parent_id = self.last_source_parent[1]
1187
1219
            else:
1188
1220
                try:
1198
1230
                    self.last_source_parent[0] = old_dirname
1199
1231
                    self.last_source_parent[1] = source_parent_id
1200
1232
            new_dirname = entry[0][0]
1201
 
            if new_dirname == self.last_target_parent[0]:
 
1233
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
 
1234
                # use a cached hit for non-root target entries.
1202
1235
                target_parent_id = self.last_target_parent[1]
1203
1236
            else:
1204
1237
                try:
1221
1254
                    self.last_target_parent[1] = target_parent_id
1222
1255
 
1223
1256
            source_exec = source_details[3]
1224
 
            if (self.include_unchanged
1225
 
                or content_change
 
1257
            changed = (content_change
1226
1258
                or source_parent_id != target_parent_id
1227
1259
                or old_basename != entry[0][1]
1228
1260
                or source_exec != target_exec
1229
 
                ):
 
1261
                )
 
1262
            if not changed and not self.include_unchanged:
 
1263
                return None, False
 
1264
            else:
1230
1265
                if old_path is None:
1231
1266
                    path = self.pathjoin(old_dirname, old_basename)
1232
1267
                    old_path = path
1246
1281
                       (source_parent_id, target_parent_id),
1247
1282
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
1248
1283
                       (source_kind, target_kind),
1249
 
                       (source_exec, target_exec))
1250
 
            else:
1251
 
                return self.uninteresting
 
1284
                       (source_exec, target_exec)), changed
1252
1285
        elif source_minikind == c'a' and _versioned_minikind(target_minikind):
1253
1286
            # looks like a new file
1254
1287
            path = self.pathjoin(entry[0][0], entry[0][1])
1255
1288
            # parent id is the entry for the path in the target tree
1256
1289
            # TODO: these are the same for an entire directory: cache em.
1257
 
            parent_id = self.state._get_entry(self.target_index,
1258
 
                                         path_utf8=entry[0][0])[0][2]
 
1290
            parent_entry = self.state._get_entry(self.target_index,
 
1291
                                                 path_utf8=entry[0][0])
 
1292
            if parent_entry is None:
 
1293
                raise errors.DirstateCorrupt(self.state,
 
1294
                    "We could not find the parent entry in index %d"
 
1295
                    " for the entry: %s"
 
1296
                    % (self.target_index, entry[0]))
 
1297
            parent_id = parent_entry[0][2]
1259
1298
            if parent_id == entry[0][2]:
1260
1299
                parent_id = None
1261
1300
            if path_info is not None:
1275
1314
                       (None, parent_id),
1276
1315
                       (None, self.utf8_decode(entry[0][1])[0]),
1277
1316
                       (None, path_info[2]),
1278
 
                       (None, target_exec))
 
1317
                       (None, target_exec)), True
1279
1318
            else:
1280
1319
                # Its a missing file, report it as such.
1281
1320
                return (entry[0][2],
1285
1324
                       (None, parent_id),
1286
1325
                       (None, self.utf8_decode(entry[0][1])[0]),
1287
1326
                       (None, None),
1288
 
                       (None, False))
 
1327
                       (None, False)), True
1289
1328
        elif _versioned_minikind(source_minikind) and target_minikind == c'a':
1290
1329
            # unversioned, possibly, or possibly not deleted: we dont care.
1291
1330
            # if its still on disk, *and* theres no other entry at this
1303
1342
                   (parent_id, None),
1304
1343
                   (self.utf8_decode(entry[0][1])[0], None),
1305
1344
                   (_minikind_to_kind(source_minikind), None),
1306
 
                   (source_details[3], None))
 
1345
                   (source_details[3], None)), True
1307
1346
        elif _versioned_minikind(source_minikind) and target_minikind == c'r':
1308
1347
            # a rename; could be a true rename, or a rename inherited from
1309
1348
            # a renamed parent. TODO: handle this efficiently. Its not
1310
1349
            # common case to rename dirs though, so a correct but slow
1311
1350
            # implementation will do.
1312
 
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
 
1351
            if (not self.doing_consistency_expansion and 
 
1352
                not osutils.is_inside_any(self.searched_specific_files,
 
1353
                    target_details[1])):
1313
1354
                self.search_specific_files.add(target_details[1])
 
1355
                # We don't expand the specific files parents list here as
 
1356
                # the path is absent in target and won't create a delta with
 
1357
                # missing parent.
1314
1358
        elif ((source_minikind == c'r' or source_minikind == c'a') and
1315
1359
              (target_minikind == c'r' or target_minikind == c'a')):
1316
1360
            # neither of the selected trees contain this path,
1322
1366
                "source_minikind=%r, target_minikind=%r"
1323
1367
                % (source_minikind, target_minikind))
1324
1368
            ## import pdb;pdb.set_trace()
1325
 
        return None
 
1369
        return None, None
1326
1370
 
1327
1371
    def __iter__(self):
1328
1372
        return self
1330
1374
    def iter_changes(self):
1331
1375
        return self
1332
1376
 
1333
 
    cdef void _update_current_block(self):
 
1377
    cdef int _gather_result_for_consistency(self, result) except -1:
 
1378
        """Check a result we will yield to make sure we are consistent later.
 
1379
        
 
1380
        This gathers result's parents into a set to output later.
 
1381
 
 
1382
        :param result: A result tuple.
 
1383
        """
 
1384
        if not self.partial or not result[0]:
 
1385
            return 0
 
1386
        self.seen_ids.add(result[0])
 
1387
        new_path = result[1][1]
 
1388
        if new_path:
 
1389
            # Not the root and not a delete: queue up the parents of the path.
 
1390
            self.search_specific_file_parents.update(
 
1391
                osutils.parent_directories(new_path.encode('utf8')))
 
1392
            # Add the root directory which parent_directories does not
 
1393
            # provide.
 
1394
            self.search_specific_file_parents.add('')
 
1395
        return 0
 
1396
 
 
1397
    cdef int _update_current_block(self) except -1:
1334
1398
        if (self.block_index < len(self.state._dirblocks) and
1335
1399
            osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
1336
1400
            self.current_block = self.state._dirblocks[self.block_index]
1339
1403
        else:
1340
1404
            self.current_block = None
1341
1405
            self.current_block_list = None
 
1406
        return 0
1342
1407
 
1343
1408
    def __next__(self):
1344
1409
        # Simple thunk to allow tail recursion without pyrex confusion
1396
1461
        cdef char * current_dirname_c, * current_blockname_c
1397
1462
        cdef int advance_entry, advance_path
1398
1463
        cdef int path_handled
1399
 
        uninteresting = self.uninteresting
1400
1464
        searched_specific_files = self.searched_specific_files
1401
1465
        # Are we walking a root?
1402
1466
        while self.root_entries_pos < self.root_entries_len:
1403
1467
            entry = self.root_entries[self.root_entries_pos]
1404
1468
            self.root_entries_pos = self.root_entries_pos + 1
1405
 
            result = self._process_entry(entry, self.root_dir_info)
1406
 
            if result is not None and result is not self.uninteresting:
1407
 
                return result
 
1469
            result, changed = self._process_entry(entry, self.root_dir_info)
 
1470
            if changed is not None:
 
1471
                if changed:
 
1472
                    self._gather_result_for_consistency(result)
 
1473
                if changed or self.include_unchanged:
 
1474
                    return result
1408
1475
        # Have we finished the prior root, or never started one ?
1409
1476
        if self.current_root is None:
1410
1477
            # TODO: the pending list should be lexically sorted?  the
1413
1480
                self.current_root = self.search_specific_files.pop()
1414
1481
            except KeyError:
1415
1482
                raise StopIteration()
1416
 
            self.current_root_unicode = self.current_root.decode('utf8')
1417
1483
            self.searched_specific_files.add(self.current_root)
1418
1484
            # process the entries for this containing directory: the rest will be
1419
1485
            # found by their parents recursively.
1420
1486
            self.root_entries = self.state._entries_for_path(self.current_root)
1421
1487
            self.root_entries_len = len(self.root_entries)
 
1488
            self.current_root_unicode = self.current_root.decode('utf8')
1422
1489
            self.root_abspath = self.tree.abspath(self.current_root_unicode)
1423
1490
            try:
1424
1491
                root_stat = os.lstat(self.root_abspath)
1452
1519
            while self.root_entries_pos < self.root_entries_len:
1453
1520
                entry = self.root_entries[self.root_entries_pos]
1454
1521
                self.root_entries_pos = self.root_entries_pos + 1
1455
 
                result = self._process_entry(entry, self.root_dir_info)
1456
 
                if result is not None:
 
1522
                result, changed = self._process_entry(entry, self.root_dir_info)
 
1523
                if changed is not None:
1457
1524
                    path_handled = -1
1458
 
                    if result is not self.uninteresting:
 
1525
                    if changed:
 
1526
                        self._gather_result_for_consistency(result)
 
1527
                    if changed or self.include_unchanged:
1459
1528
                        return result
1460
1529
            # handle unversioned specified paths:
1461
1530
            if self.want_unversioned and not path_handled and self.root_dir_info:
1473
1542
                      )
1474
1543
            # If we reach here, the outer flow continues, which enters into the
1475
1544
            # per-root setup logic.
1476
 
        if self.current_dir_info is None and self.current_block is None:
 
1545
        if (self.current_dir_info is None and self.current_block is None and not
 
1546
            self.doing_consistency_expansion):
1477
1547
            # setup iteration of this root:
1478
1548
            self.current_dir_list = None
1479
1549
            if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
1601
1671
                        self.current_block_pos = self.current_block_pos + 1
1602
1672
                        # entry referring to file not present on disk.
1603
1673
                        # advance the entry only, after processing.
1604
 
                        result = self._process_entry(current_entry, None)
1605
 
                        if result is not None:
1606
 
                            if result is not self.uninteresting:
 
1674
                        result, changed = self._process_entry(current_entry, None)
 
1675
                        if changed is not None:
 
1676
                            if changed:
 
1677
                                self._gather_result_for_consistency(result)
 
1678
                            if changed or self.include_unchanged:
1607
1679
                                return result
1608
1680
                    self.block_index = self.block_index + 1
1609
1681
                    self._update_current_block()
1615
1687
            # More supplied paths to process
1616
1688
            self.current_root = None
1617
1689
            return self._iter_next()
 
1690
        # Start expanding more conservatively, adding paths the user may not
 
1691
        # have intended but required for consistent deltas.
 
1692
        self.doing_consistency_expansion = 1
 
1693
        if not self._pending_consistent_entries:
 
1694
            self._pending_consistent_entries = self._next_consistent_entries()
 
1695
        while self._pending_consistent_entries:
 
1696
            result, changed = self._pending_consistent_entries.pop()
 
1697
            if changed is not None:
 
1698
                return result
1618
1699
        raise StopIteration()
1619
1700
 
1620
1701
    cdef object _maybe_tree_ref(self, current_path_info):
1670
1751
                    pass
1671
1752
                elif current_path_info is None:
1672
1753
                    # no path is fine: the per entry code will handle it.
1673
 
                    result = self._process_entry(current_entry, current_path_info)
1674
 
                    if result is not None:
1675
 
                        if result is self.uninteresting:
1676
 
                            result = None
 
1754
                    result, changed = self._process_entry(current_entry,
 
1755
                        current_path_info)
1677
1756
                else:
1678
1757
                    minikind = _minikind_from_string(
1679
1758
                        current_entry[1][self.target_index][0])
1694
1773
                        else:
1695
1774
                            # entry referring to file not present on disk.
1696
1775
                            # advance the entry only, after processing.
1697
 
                            result = self._process_entry(current_entry, None)
1698
 
                            if result is not None:
1699
 
                                if result is self.uninteresting:
1700
 
                                    result = None
 
1776
                            result, changed = self._process_entry(current_entry,
 
1777
                                None)
1701
1778
                            advance_path = 0
1702
1779
                    else:
1703
1780
                        # paths are the same,and the dirstate entry is not
1704
1781
                        # absent or renamed.
1705
 
                        result = self._process_entry(current_entry, current_path_info)
1706
 
                        if result is not None:
 
1782
                        result, changed = self._process_entry(current_entry,
 
1783
                            current_path_info)
 
1784
                        if changed is not None:
1707
1785
                            path_handled = -1
1708
 
                            if result is self.uninteresting:
1709
 
                                result = None
 
1786
                            if not changed and not self.include_unchanged:
 
1787
                                changed = None
1710
1788
                # >- loop control starts here:
1711
1789
                # >- entry
1712
1790
                if advance_entry and current_entry is not None:
1728
1806
                            except UnicodeDecodeError:
1729
1807
                                raise errors.BadFilenameEncoding(
1730
1808
                                    current_path_info[0], osutils._fs_enc)
1731
 
                            if result is not None:
 
1809
                            if changed is not None:
1732
1810
                                raise AssertionError(
1733
1811
                                    "result is not None: %r" % result)
1734
1812
                            result = (None,
1739
1817
                                (None, self.utf8_decode(current_path_info[1])[0]),
1740
1818
                                (None, current_path_info[2]),
1741
1819
                                (None, new_executable))
 
1820
                            changed = True
1742
1821
                        # dont descend into this unversioned path if it is
1743
1822
                        # a dir
1744
1823
                        if current_path_info[2] in ('directory'):
1757
1836
                                current_path_info)
1758
1837
                    else:
1759
1838
                        current_path_info = None
1760
 
                if result is not None:
 
1839
                if changed is not None:
1761
1840
                    # Found a result on this pass, yield it
1762
 
                    return result
 
1841
                    if changed:
 
1842
                        self._gather_result_for_consistency(result)
 
1843
                    if changed or self.include_unchanged:
 
1844
                        return result
1763
1845
            if self.current_block is not None:
1764
1846
                self.block_index = self.block_index + 1
1765
1847
                self._update_current_block()
1771
1853
                    self.current_dir_list = self.current_dir_info[1]
1772
1854
                except StopIteration:
1773
1855
                    self.current_dir_info = None
 
1856
 
 
1857
    cdef object _next_consistent_entries(self):
 
1858
        """Grabs the next specific file parent case to consider.
 
1859
        
 
1860
        :return: A list of the results, each of which is as for _process_entry.
 
1861
        """
 
1862
        results = []
 
1863
        while self.search_specific_file_parents:
 
1864
            # Process the parent directories for the paths we were iterating.
 
1865
            # Even in extremely large trees this should be modest, so currently
 
1866
            # no attempt is made to optimise.
 
1867
            path_utf8 = self.search_specific_file_parents.pop()
 
1868
            if path_utf8 in self.searched_exact_paths:
 
1869
                # We've examined this path.
 
1870
                continue
 
1871
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
 
1872
                # We've examined this path.
 
1873
                continue
 
1874
            path_entries = self.state._entries_for_path(path_utf8)
 
1875
            # We need either one or two entries. If the path in
 
1876
            # self.target_index has moved (so the entry in source_index is in
 
1877
            # 'ar') then we need to also look for the entry for this path in
 
1878
            # self.source_index, to output the appropriate delete-or-rename.
 
1879
            selected_entries = []
 
1880
            found_item = False
 
1881
            for candidate_entry in path_entries:
 
1882
                # Find entries present in target at this path:
 
1883
                if candidate_entry[1][self.target_index][0] not in 'ar':
 
1884
                    found_item = True
 
1885
                    selected_entries.append(candidate_entry)
 
1886
                # Find entries present in source at this path:
 
1887
                elif (self.source_index is not None and
 
1888
                    candidate_entry[1][self.source_index][0] not in 'ar'):
 
1889
                    found_item = True
 
1890
                    if candidate_entry[1][self.target_index][0] == 'a':
 
1891
                        # Deleted, emit it here.
 
1892
                        selected_entries.append(candidate_entry)
 
1893
                    else:
 
1894
                        # renamed, emit it when we process the directory it
 
1895
                        # ended up at.
 
1896
                        self.search_specific_file_parents.add(
 
1897
                            candidate_entry[1][self.target_index][1])
 
1898
            if not found_item:
 
1899
                raise AssertionError(
 
1900
                    "Missing entry for specific path parent %r, %r" % (
 
1901
                    path_utf8, path_entries))
 
1902
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
 
1903
            for entry in selected_entries:
 
1904
                if entry[0][2] in self.seen_ids:
 
1905
                    continue
 
1906
                result, changed = self._process_entry(entry, path_info)
 
1907
                if changed is None:
 
1908
                    raise AssertionError(
 
1909
                        "Got entry<->path mismatch for specific path "
 
1910
                        "%r entry %r path_info %r " % (
 
1911
                        path_utf8, entry, path_info))
 
1912
                # Only include changes - we're outside the users requested
 
1913
                # expansion.
 
1914
                if changed:
 
1915
                    self._gather_result_for_consistency(result)
 
1916
                    if (result[6][0] == 'directory' and
 
1917
                        result[6][1] != 'directory'):
 
1918
                        # This stopped being a directory, the old children have
 
1919
                        # to be included.
 
1920
                        if entry[1][self.source_index][0] == 'r':
 
1921
                            # renamed, take the source path
 
1922
                            entry_path_utf8 = entry[1][self.source_index][1]
 
1923
                        else:
 
1924
                            entry_path_utf8 = path_utf8
 
1925
                        initial_key = (entry_path_utf8, '', '')
 
1926
                        block_index, _ = self.state._find_block_index_from_key(
 
1927
                            initial_key)
 
1928
                        if block_index == 0:
 
1929
                            # The children of the root are in block index 1.
 
1930
                            block_index = block_index + 1
 
1931
                        current_block = None
 
1932
                        if block_index < len(self.state._dirblocks):
 
1933
                            current_block = self.state._dirblocks[block_index]
 
1934
                            if not osutils.is_inside(
 
1935
                                entry_path_utf8, current_block[0]):
 
1936
                                # No entries for this directory at all.
 
1937
                                current_block = None
 
1938
                        if current_block is not None:
 
1939
                            for entry in current_block[1]:
 
1940
                                if entry[1][self.source_index][0] in 'ar':
 
1941
                                    # Not in the source tree, so doesn't have to be
 
1942
                                    # included.
 
1943
                                    continue
 
1944
                                # Path of the entry itself.
 
1945
                                self.search_specific_file_parents.add(
 
1946
                                    self.pathjoin(*entry[0][:2]))
 
1947
                if changed or self.include_unchanged:
 
1948
                    results.append((result, changed))
 
1949
            self.searched_exact_paths.add(path_utf8)
 
1950
        return results
 
1951
 
 
1952
    cdef object _path_info(self, utf8_path, unicode_path):
 
1953
        """Generate path_info for unicode_path.
 
1954
 
 
1955
        :return: None if unicode_path does not exist, or a path_info tuple.
 
1956
        """
 
1957
        abspath = self.tree.abspath(unicode_path)
 
1958
        try:
 
1959
            stat = os.lstat(abspath)
 
1960
        except OSError, e:
 
1961
            if e.errno == errno.ENOENT:
 
1962
                # the path does not exist.
 
1963
                return None
 
1964
            else:
 
1965
                raise
 
1966
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
 
1967
        dir_info = (utf8_path, utf8_basename,
 
1968
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
1969
            abspath)
 
1970
        if dir_info[2] == 'directory':
 
1971
            if self.tree._directory_is_tree_reference(
 
1972
                unicode_path):
 
1973
                self.root_dir_info = self.root_dir_info[:2] + \
 
1974
                    ('tree-reference',) + self.root_dir_info[3:]
 
1975
        return dir_info