/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

1st cut merge of bzr.dev r3907

Show diffs side-by-side

added added

removed removed

Lines of Context:
223
223
    )
224
224
 
225
225
 
226
 
def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
227
 
    """Convert stat values into a packed representation."""
228
 
    # jam 20060614 it isn't really worth removing more entries if we
229
 
    # are going to leave it in packed form.
230
 
    # With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
231
 
    # With all entries, filesize is 5.9M and read time is maybe 280ms
232
 
    # well within the noise margin
233
 
 
234
 
    # base64 encoding always adds a final newline, so strip it off
235
 
    # The current version
236
 
    return _encode(_pack('>LLLLLL'
237
 
        , st.st_size, int(st.st_mtime), int(st.st_ctime)
238
 
        , st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
239
 
    # This is 0.060s / 1.520s faster by not encoding as much information
240
 
    # return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]
241
 
    # This is not strictly faster than _encode(_pack())[:-1]
242
 
    # return '%X.%X.%X.%X.%X.%X' % (
243
 
    #      st.st_size, int(st.st_mtime), int(st.st_ctime),
244
 
    #      st.st_dev, st.st_ino, st.st_mode)
245
 
    # Similar to the _encode(_pack('>LL'))
246
 
    # return '%X.%X' % (int(st.st_mtime), st.st_mode)
 
226
# This is the Windows equivalent of ENOTDIR
 
227
# It is defined in pywin32.winerror, but we don't want a strong dependency for
 
228
# just an error code.
 
229
ERROR_PATH_NOT_FOUND = 3
 
230
ERROR_DIRECTORY = 267
 
231
 
 
232
 
 
233
if not getattr(struct, '_compile', None):
 
234
    # Cannot pre-compile the dirstate pack_stat
 
235
    def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
 
236
        """Convert stat values into a packed representation."""
 
237
        return _encode(_pack('>LLLLLL', st.st_size, int(st.st_mtime),
 
238
            int(st.st_ctime), st.st_dev, st.st_ino & 0xFFFFFFFF,
 
239
            st.st_mode))[:-1]
 
240
else:
 
241
    # compile the struct compiler we need, so as to only do it once
 
242
    from _struct import Struct
 
243
    _compiled_pack = Struct('>LLLLLL').pack
 
244
    def pack_stat(st, _encode=binascii.b2a_base64, _pack=_compiled_pack):
 
245
        """Convert stat values into a packed representation."""
 
246
        # jam 20060614 it isn't really worth removing more entries if we
 
247
        # are going to leave it in packed form.
 
248
        # With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
 
249
        # With all entries, filesize is 5.9M and read time is maybe 280ms
 
250
        # well within the noise margin
 
251
 
 
252
        # base64 encoding always adds a final newline, so strip it off
 
253
        # The current version
 
254
        return _encode(_pack(st.st_size, int(st.st_mtime), int(st.st_ctime),
 
255
            st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
 
256
        # This is 0.060s / 1.520s faster by not encoding as much information
 
257
        # return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]
 
258
        # This is not strictly faster than _encode(_pack())[:-1]
 
259
        # return '%X.%X.%X.%X.%X.%X' % (
 
260
        #      st.st_size, int(st.st_mtime), int(st.st_ctime),
 
261
        #      st.st_dev, st.st_ino, st.st_mode)
 
262
        # Similar to the _encode(_pack('>LL'))
 
263
        # return '%X.%X' % (int(st.st_mtime), st.st_mode)
247
264
 
248
265
 
249
266
class DirState(object):
1040
1057
        self._dirblocks[0] = ('', root_block)
1041
1058
        self._dirblocks[1] = ('', contents_of_root_block)
1042
1059
 
 
1060
    def _entries_for_path(self, path):
 
1061
        """Return a list with all the entries that match path for all ids."""
 
1062
        dirname, basename = os.path.split(path)
 
1063
        key = (dirname, basename, '')
 
1064
        block_index, present = self._find_block_index_from_key(key)
 
1065
        if not present:
 
1066
            # the block which should contain path is absent.
 
1067
            return []
 
1068
        result = []
 
1069
        block = self._dirblocks[block_index][1]
 
1070
        entry_index, _ = self._find_entry_index(key, block)
 
1071
        # we may need to look at multiple entries at this path: walk while the specific_files match.
 
1072
        while (entry_index < len(block) and
 
1073
            block[entry_index][0][0:2] == key[0:2]):
 
1074
            result.append(block[entry_index])
 
1075
            entry_index += 1
 
1076
        return result
 
1077
 
1043
1078
    def _entry_to_line(self, entry):
1044
1079
        """Serialize entry to a NULL delimited line ready for _get_output_lines.
1045
1080
 
1479
1514
                    # it is being resurrected here, so blank it out temporarily.
1480
1515
                    self._dirblocks[block_index][1][entry_index][1][1] = null
1481
1516
 
1482
 
    def update_entry(self, entry, abspath, stat_value,
1483
 
                     _stat_to_minikind=_stat_to_minikind,
1484
 
                     _pack_stat=pack_stat):
1485
 
        """Update the entry based on what is actually on disk.
 
1517
    def _observed_sha1(self, entry, sha1, stat_value,
 
1518
        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
 
1519
        """Note the sha1 of a file.
1486
1520
 
1487
 
        :param entry: This is the dirblock entry for the file in question.
1488
 
        :param abspath: The path on disk for this file.
1489
 
        :param stat_value: (optional) if we already have done a stat on the
1490
 
            file, re-use it.
1491
 
        :return: The sha1 hexdigest of the file (40 bytes) or link target of a
1492
 
                symlink.
 
1521
        :param entry: The entry the sha1 is for.
 
1522
        :param sha1: The observed sha1.
 
1523
        :param stat_value: The os.lstat for the file.
1493
1524
        """
1494
1525
        try:
1495
1526
            minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
1497
1528
            # Unhandled kind
1498
1529
            return None
1499
1530
        packed_stat = _pack_stat(stat_value)
1500
 
        (saved_minikind, saved_link_or_sha1, saved_file_size,
1501
 
         saved_executable, saved_packed_stat) = entry[1][0]
1502
 
 
1503
 
        if (minikind == saved_minikind
1504
 
            and packed_stat == saved_packed_stat):
1505
 
            # The stat hasn't changed since we saved, so we can re-use the
1506
 
            # saved sha hash.
1507
 
            if minikind == 'd':
1508
 
                return None
1509
 
 
1510
 
            # size should also be in packed_stat
1511
 
            if saved_file_size == stat_value.st_size:
1512
 
                return saved_link_or_sha1
1513
 
 
1514
 
        # If we have gotten this far, that means that we need to actually
1515
 
        # process this entry.
1516
 
        link_or_sha1 = None
1517
1531
        if minikind == 'f':
1518
 
            if self._filter_provider is None:
1519
 
                filter_list = []
1520
 
            else:
1521
 
                relpath = osutils.pathjoin(entry[0][0], entry[0][1])
1522
 
                file_id = entry[0][2]
1523
 
                filter_list = self._filter_provider(relpath, file_id)
1524
 
            link_or_sha1 = self._size_sha1_file(abspath, filter_list)[1]
1525
 
            executable = self._is_executable(stat_value.st_mode,
1526
 
                                             saved_executable)
1527
 
            if self._cutoff_time is None:
1528
 
                self._sha_cutoff_time()
1529
 
            if (stat_value.st_mtime < self._cutoff_time
1530
 
                and stat_value.st_ctime < self._cutoff_time):
1531
 
                entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
1532
 
                               executable, packed_stat)
1533
 
            else:
1534
 
                entry[1][0] = ('f', '', stat_value.st_size,
1535
 
                               executable, DirState.NULLSTAT)
1536
 
        elif minikind == 'd':
1537
 
            link_or_sha1 = None
1538
 
            entry[1][0] = ('d', '', 0, False, packed_stat)
1539
 
            if saved_minikind != 'd':
1540
 
                # This changed from something into a directory. Make sure we
1541
 
                # have a directory block for it. This doesn't happen very
1542
 
                # often, so this doesn't have to be super fast.
1543
 
                block_index, entry_index, dir_present, file_present = \
1544
 
                    self._get_block_entry_index(entry[0][0], entry[0][1], 0)
1545
 
                self._ensure_block(block_index, entry_index,
1546
 
                                   osutils.pathjoin(entry[0][0], entry[0][1]))
1547
 
        elif minikind == 'l':
1548
 
            link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
1549
 
            if self._cutoff_time is None:
1550
 
                self._sha_cutoff_time()
1551
 
            if (stat_value.st_mtime < self._cutoff_time
1552
 
                and stat_value.st_ctime < self._cutoff_time):
1553
 
                entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
1554
 
                               False, packed_stat)
1555
 
            else:
1556
 
                entry[1][0] = ('l', '', stat_value.st_size,
1557
 
                               False, DirState.NULLSTAT)
1558
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1559
 
        return link_or_sha1
 
1532
            if self._cutoff_time is None:
 
1533
                self._sha_cutoff_time()
 
1534
            if (stat_value.st_mtime < self._cutoff_time
 
1535
                and stat_value.st_ctime < self._cutoff_time):
 
1536
                entry[1][0] = ('f', sha1, entry[1][0][2], entry[1][0][3],
 
1537
                    packed_stat)
 
1538
                self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1560
1539
 
1561
1540
    def _sha_cutoff_time(self):
1562
1541
        """Return cutoff time.
2791
2770
            raise errors.ObjectNotLocked(self)
2792
2771
 
2793
2772
 
 
2773
def py_update_entry(state, entry, abspath, stat_value,
 
2774
                 _stat_to_minikind=DirState._stat_to_minikind,
 
2775
                 _pack_stat=pack_stat):
 
2776
    """Update the entry based on what is actually on disk.
 
2777
 
 
2778
    This function only calculates the sha if it needs to - if the entry is
 
2779
    uncachable, or clearly different to the first parent's entry, no sha
 
2780
    is calculated, and None is returned.
 
2781
 
 
2782
    :param state: The dirstate this entry is in.
 
2783
    :param entry: This is the dirblock entry for the file in question.
 
2784
    :param abspath: The path on disk for this file.
 
2785
    :param stat_value: The stat value done on the path.
 
2786
    :return: None, or The sha1 hexdigest of the file (40 bytes) or link
 
2787
        target of a symlink.
 
2788
    """
 
2789
    try:
 
2790
        minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
 
2791
    except KeyError:
 
2792
        # Unhandled kind
 
2793
        return None
 
2794
    packed_stat = _pack_stat(stat_value)
 
2795
    (saved_minikind, saved_link_or_sha1, saved_file_size,
 
2796
     saved_executable, saved_packed_stat) = entry[1][0]
 
2797
 
 
2798
    if (minikind == saved_minikind
 
2799
        and packed_stat == saved_packed_stat):
 
2800
        # The stat hasn't changed since we saved, so we can re-use the
 
2801
        # saved sha hash.
 
2802
        if minikind == 'd':
 
2803
            return None
 
2804
 
 
2805
        # size should also be in packed_stat
 
2806
        if saved_file_size == stat_value.st_size:
 
2807
            return saved_link_or_sha1
 
2808
 
 
2809
    # If we have gotten this far, that means that we need to actually
 
2810
    # process this entry.
 
2811
    link_or_sha1 = None
 
2812
    if minikind == 'f':
 
2813
        executable = state._is_executable(stat_value.st_mode,
 
2814
                                         saved_executable)
 
2815
        if state._cutoff_time is None:
 
2816
            state._sha_cutoff_time()
 
2817
        if (stat_value.st_mtime < state._cutoff_time
 
2818
            and stat_value.st_ctime < state._cutoff_time
 
2819
            and len(entry[1]) > 1
 
2820
            and entry[1][1][0] != 'a'):
 
2821
                # Could check for size changes for further optimised
 
2822
                # avoidance of sha1's. However the most prominent case of
 
2823
                # over-shaing is during initial add, which this catches.
 
2824
                # Besides, if content filtering happens, size and sha
 
2825
                # need to be checked together - checking just the size
 
2826
                # would be wrong.
 
2827
            if state._filter_provider is None:
 
2828
                filter_list = []
 
2829
            else:
 
2830
                relpath = osutils.pathjoin(entry[0][0], entry[0][1])
 
2831
                file_id = entry[0][2]
 
2832
                filter_list = state._filter_provider(relpath, file_id)
 
2833
            link_or_sha1 = state._size_sha1_file(abspath, filter_list)[1]
 
2834
            entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
 
2835
                           executable, packed_stat)
 
2836
        else:
 
2837
            entry[1][0] = ('f', '', stat_value.st_size,
 
2838
                           executable, DirState.NULLSTAT)
 
2839
    elif minikind == 'd':
 
2840
        link_or_sha1 = None
 
2841
        entry[1][0] = ('d', '', 0, False, packed_stat)
 
2842
        if saved_minikind != 'd':
 
2843
            # This changed from something into a directory. Make sure we
 
2844
            # have a directory block for it. This doesn't happen very
 
2845
            # often, so this doesn't have to be super fast.
 
2846
            block_index, entry_index, dir_present, file_present = \
 
2847
                state._get_block_entry_index(entry[0][0], entry[0][1], 0)
 
2848
            state._ensure_block(block_index, entry_index,
 
2849
                               osutils.pathjoin(entry[0][0], entry[0][1]))
 
2850
    elif minikind == 'l':
 
2851
        link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)
 
2852
        if state._cutoff_time is None:
 
2853
            state._sha_cutoff_time()
 
2854
        if (stat_value.st_mtime < state._cutoff_time
 
2855
            and stat_value.st_ctime < state._cutoff_time):
 
2856
            entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
 
2857
                           False, packed_stat)
 
2858
        else:
 
2859
            entry[1][0] = ('l', '', stat_value.st_size,
 
2860
                           False, DirState.NULLSTAT)
 
2861
    state._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2862
    return link_or_sha1
 
2863
update_entry = py_update_entry
 
2864
 
 
2865
 
 
2866
class ProcessEntryPython(object):
 
2867
 
 
2868
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
 
2869
        "last_source_parent", "last_target_parent", "include_unchanged",
 
2870
        "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
 
2871
        "search_specific_files", "state", "source_index", "target_index",
 
2872
        "want_unversioned", "tree"]
 
2873
 
 
2874
    def __init__(self, include_unchanged, use_filesystem_for_exec,
 
2875
        search_specific_files, state, source_index, target_index,
 
2876
        want_unversioned, tree):
 
2877
        self.old_dirname_to_file_id = {}
 
2878
        self.new_dirname_to_file_id = {}
 
2879
        # Just a sentry, so that _process_entry can say that this
 
2880
        # record is handled, but isn't interesting to process (unchanged)
 
2881
        self.uninteresting = object()
 
2882
        # Using a list so that we can access the values and change them in
 
2883
        # nested scope. Each one is [path, file_id, entry]
 
2884
        self.last_source_parent = [None, None]
 
2885
        self.last_target_parent = [None, None]
 
2886
        self.include_unchanged = include_unchanged
 
2887
        self.use_filesystem_for_exec = use_filesystem_for_exec
 
2888
        self.utf8_decode = cache_utf8._utf8_decode
 
2889
        # for all search_indexs in each path at or under each element of
 
2890
        # search_specific_files, if the detail is relocated: add the id, and add the
 
2891
        # relocated path as one to search if its not searched already. If the
 
2892
        # detail is not relocated, add the id.
 
2893
        self.searched_specific_files = set()
 
2894
        self.search_specific_files = search_specific_files
 
2895
        self.state = state
 
2896
        self.source_index = source_index
 
2897
        self.target_index = target_index
 
2898
        self.want_unversioned = want_unversioned
 
2899
        self.tree = tree
 
2900
 
 
2901
    def _process_entry(self, entry, path_info, pathjoin=osutils.pathjoin):
 
2902
        """Compare an entry and real disk to generate delta information.
 
2903
 
 
2904
        :param path_info: top_relpath, basename, kind, lstat, abspath for
 
2905
            the path of entry. If None, then the path is considered absent.
 
2906
            (Perhaps we should pass in a concrete entry for this ?)
 
2907
            Basename is returned as a utf8 string because we expect this
 
2908
            tuple will be ignored, and don't want to take the time to
 
2909
            decode.
 
2910
        :return: None if these don't match
 
2911
                 A tuple of information about the change, or
 
2912
                 the object 'uninteresting' if these match, but are
 
2913
                 basically identical.
 
2914
        """
 
2915
        if self.source_index is None:
 
2916
            source_details = DirState.NULL_PARENT_DETAILS
 
2917
        else:
 
2918
            source_details = entry[1][self.source_index]
 
2919
        target_details = entry[1][self.target_index]
 
2920
        target_minikind = target_details[0]
 
2921
        if path_info is not None and target_minikind in 'fdlt':
 
2922
            if not (self.target_index == 0):
 
2923
                raise AssertionError()
 
2924
            link_or_sha1 = update_entry(self.state, entry,
 
2925
                abspath=path_info[4], stat_value=path_info[3])
 
2926
            # The entry may have been modified by update_entry
 
2927
            target_details = entry[1][self.target_index]
 
2928
            target_minikind = target_details[0]
 
2929
        else:
 
2930
            link_or_sha1 = None
 
2931
        file_id = entry[0][2]
 
2932
        source_minikind = source_details[0]
 
2933
        if source_minikind in 'fdltr' and target_minikind in 'fdlt':
 
2934
            # claimed content in both: diff
 
2935
            #   r    | fdlt   |      | add source to search, add id path move and perform
 
2936
            #        |        |      | diff check on source-target
 
2937
            #   r    | fdlt   |  a   | dangling file that was present in the basis.
 
2938
            #        |        |      | ???
 
2939
            if source_minikind in 'r':
 
2940
                # add the source to the search path to find any children it
 
2941
                # has.  TODO ? : only add if it is a container ?
 
2942
                if not osutils.is_inside_any(self.searched_specific_files,
 
2943
                                             source_details[1]):
 
2944
                    self.search_specific_files.add(source_details[1])
 
2945
                # generate the old path; this is needed for stating later
 
2946
                # as well.
 
2947
                old_path = source_details[1]
 
2948
                old_dirname, old_basename = os.path.split(old_path)
 
2949
                path = pathjoin(entry[0][0], entry[0][1])
 
2950
                old_entry = self.state._get_entry(self.source_index,
 
2951
                                             path_utf8=old_path)
 
2952
                # update the source details variable to be the real
 
2953
                # location.
 
2954
                if old_entry == (None, None):
 
2955
                    raise errors.CorruptDirstate(self.state._filename,
 
2956
                        "entry '%s/%s' is considered renamed from %r"
 
2957
                        " but source does not exist\n"
 
2958
                        "entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
 
2959
                source_details = old_entry[1][self.source_index]
 
2960
                source_minikind = source_details[0]
 
2961
            else:
 
2962
                old_dirname = entry[0][0]
 
2963
                old_basename = entry[0][1]
 
2964
                old_path = path = None
 
2965
            if path_info is None:
 
2966
                # the file is missing on disk, show as removed.
 
2967
                content_change = True
 
2968
                target_kind = None
 
2969
                target_exec = False
 
2970
            else:
 
2971
                # source and target are both versioned and disk file is present.
 
2972
                target_kind = path_info[2]
 
2973
                if target_kind == 'directory':
 
2974
                    if path is None:
 
2975
                        old_path = path = pathjoin(old_dirname, old_basename)
 
2976
                    self.new_dirname_to_file_id[path] = file_id
 
2977
                    if source_minikind != 'd':
 
2978
                        content_change = True
 
2979
                    else:
 
2980
                        # directories have no fingerprint
 
2981
                        content_change = False
 
2982
                    target_exec = False
 
2983
                elif target_kind == 'file':
 
2984
                    if source_minikind != 'f':
 
2985
                        content_change = True
 
2986
                    else:
 
2987
                        # If the size is the same, check the sha:
 
2988
                        if target_details[2] == source_details[2]:
 
2989
                            if link_or_sha1 is None:
 
2990
                                # Stat cache miss:
 
2991
                                file_obj = file(path_info[4], 'rb')
 
2992
                                try:
 
2993
                                    statvalue = os.fstat(file_obj.fileno())
 
2994
                                    link_or_sha1 = osutils.sha_file(file_obj)
 
2995
                                finally:
 
2996
                                    file_obj.close()
 
2997
                                self.state._observed_sha1(entry, link_or_sha1,
 
2998
                                    statvalue)
 
2999
                            content_change = (link_or_sha1 != source_details[1])
 
3000
                        else:
 
3001
                            # Size changed, so must be different
 
3002
                            content_change = True
 
3003
                    # Target details is updated at update_entry time
 
3004
                    if self.use_filesystem_for_exec:
 
3005
                        # We don't need S_ISREG here, because we are sure
 
3006
                        # we are dealing with a file.
 
3007
                        target_exec = bool(stat.S_IEXEC & path_info[3].st_mode)
 
3008
                    else:
 
3009
                        target_exec = target_details[3]
 
3010
                elif target_kind == 'symlink':
 
3011
                    if source_minikind != 'l':
 
3012
                        content_change = True
 
3013
                    else:
 
3014
                        content_change = (link_or_sha1 != source_details[1])
 
3015
                    target_exec = False
 
3016
                elif target_kind == 'tree-reference':
 
3017
                    if source_minikind != 't':
 
3018
                        content_change = True
 
3019
                    else:
 
3020
                        content_change = False
 
3021
                    target_exec = False
 
3022
                else:
 
3023
                    raise Exception, "unknown kind %s" % path_info[2]
 
3024
            if source_minikind == 'd':
 
3025
                if path is None:
 
3026
                    old_path = path = pathjoin(old_dirname, old_basename)
 
3027
                self.old_dirname_to_file_id[old_path] = file_id
 
3028
            # parent id is the entry for the path in the target tree
 
3029
            if old_dirname == self.last_source_parent[0]:
 
3030
                source_parent_id = self.last_source_parent[1]
 
3031
            else:
 
3032
                try:
 
3033
                    source_parent_id = self.old_dirname_to_file_id[old_dirname]
 
3034
                except KeyError:
 
3035
                    source_parent_entry = self.state._get_entry(self.source_index,
 
3036
                                                           path_utf8=old_dirname)
 
3037
                    source_parent_id = source_parent_entry[0][2]
 
3038
                if source_parent_id == entry[0][2]:
 
3039
                    # This is the root, so the parent is None
 
3040
                    source_parent_id = None
 
3041
                else:
 
3042
                    self.last_source_parent[0] = old_dirname
 
3043
                    self.last_source_parent[1] = source_parent_id
 
3044
            new_dirname = entry[0][0]
 
3045
            if new_dirname == self.last_target_parent[0]:
 
3046
                target_parent_id = self.last_target_parent[1]
 
3047
            else:
 
3048
                try:
 
3049
                    target_parent_id = self.new_dirname_to_file_id[new_dirname]
 
3050
                except KeyError:
 
3051
                    # TODO: We don't always need to do the lookup, because the
 
3052
                    #       parent entry will be the same as the source entry.
 
3053
                    target_parent_entry = self.state._get_entry(self.target_index,
 
3054
                                                           path_utf8=new_dirname)
 
3055
                    if target_parent_entry == (None, None):
 
3056
                        raise AssertionError(
 
3057
                            "Could not find target parent in wt: %s\nparent of: %s"
 
3058
                            % (new_dirname, entry))
 
3059
                    target_parent_id = target_parent_entry[0][2]
 
3060
                if target_parent_id == entry[0][2]:
 
3061
                    # This is the root, so the parent is None
 
3062
                    target_parent_id = None
 
3063
                else:
 
3064
                    self.last_target_parent[0] = new_dirname
 
3065
                    self.last_target_parent[1] = target_parent_id
 
3066
 
 
3067
            source_exec = source_details[3]
 
3068
            if (self.include_unchanged
 
3069
                or content_change
 
3070
                or source_parent_id != target_parent_id
 
3071
                or old_basename != entry[0][1]
 
3072
                or source_exec != target_exec
 
3073
                ):
 
3074
                if old_path is None:
 
3075
                    old_path = path = pathjoin(old_dirname, old_basename)
 
3076
                    old_path_u = self.utf8_decode(old_path)[0]
 
3077
                    path_u = old_path_u
 
3078
                else:
 
3079
                    old_path_u = self.utf8_decode(old_path)[0]
 
3080
                    if old_path == path:
 
3081
                        path_u = old_path_u
 
3082
                    else:
 
3083
                        path_u = self.utf8_decode(path)[0]
 
3084
                source_kind = DirState._minikind_to_kind[source_minikind]
 
3085
                return (entry[0][2],
 
3086
                       (old_path_u, path_u),
 
3087
                       content_change,
 
3088
                       (True, True),
 
3089
                       (source_parent_id, target_parent_id),
 
3090
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
 
3091
                       (source_kind, target_kind),
 
3092
                       (source_exec, target_exec))
 
3093
            else:
 
3094
                return self.uninteresting
 
3095
        elif source_minikind in 'a' and target_minikind in 'fdlt':
 
3096
            # looks like a new file
 
3097
            path = pathjoin(entry[0][0], entry[0][1])
 
3098
            # parent id is the entry for the path in the target tree
 
3099
            # TODO: these are the same for an entire directory: cache em.
 
3100
            parent_id = self.state._get_entry(self.target_index,
 
3101
                                         path_utf8=entry[0][0])[0][2]
 
3102
            if parent_id == entry[0][2]:
 
3103
                parent_id = None
 
3104
            if path_info is not None:
 
3105
                # Present on disk:
 
3106
                if self.use_filesystem_for_exec:
 
3107
                    # We need S_ISREG here, because we aren't sure if this
 
3108
                    # is a file or not.
 
3109
                    target_exec = bool(
 
3110
                        stat.S_ISREG(path_info[3].st_mode)
 
3111
                        and stat.S_IEXEC & path_info[3].st_mode)
 
3112
                else:
 
3113
                    target_exec = target_details[3]
 
3114
                return (entry[0][2],
 
3115
                       (None, self.utf8_decode(path)[0]),
 
3116
                       True,
 
3117
                       (False, True),
 
3118
                       (None, parent_id),
 
3119
                       (None, self.utf8_decode(entry[0][1])[0]),
 
3120
                       (None, path_info[2]),
 
3121
                       (None, target_exec))
 
3122
            else:
 
3123
                # Its a missing file, report it as such.
 
3124
                return (entry[0][2],
 
3125
                       (None, self.utf8_decode(path)[0]),
 
3126
                       False,
 
3127
                       (False, True),
 
3128
                       (None, parent_id),
 
3129
                       (None, self.utf8_decode(entry[0][1])[0]),
 
3130
                       (None, None),
 
3131
                       (None, False))
 
3132
        elif source_minikind in 'fdlt' and target_minikind in 'a':
 
3133
            # unversioned, possibly, or possibly not deleted: we dont care.
 
3134
            # if its still on disk, *and* theres no other entry at this
 
3135
            # path [we dont know this in this routine at the moment -
 
3136
            # perhaps we should change this - then it would be an unknown.
 
3137
            old_path = pathjoin(entry[0][0], entry[0][1])
 
3138
            # parent id is the entry for the path in the target tree
 
3139
            parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]
 
3140
            if parent_id == entry[0][2]:
 
3141
                parent_id = None
 
3142
            return (entry[0][2],
 
3143
                   (self.utf8_decode(old_path)[0], None),
 
3144
                   True,
 
3145
                   (True, False),
 
3146
                   (parent_id, None),
 
3147
                   (self.utf8_decode(entry[0][1])[0], None),
 
3148
                   (DirState._minikind_to_kind[source_minikind], None),
 
3149
                   (source_details[3], None))
 
3150
        elif source_minikind in 'fdlt' and target_minikind in 'r':
 
3151
            # a rename; could be a true rename, or a rename inherited from
 
3152
            # a renamed parent. TODO: handle this efficiently. Its not
 
3153
            # common case to rename dirs though, so a correct but slow
 
3154
            # implementation will do.
 
3155
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
 
3156
                self.search_specific_files.add(target_details[1])
 
3157
        elif source_minikind in 'ra' and target_minikind in 'ra':
 
3158
            # neither of the selected trees contain this file,
 
3159
            # so skip over it. This is not currently directly tested, but
 
3160
            # is indirectly via test_too_much.TestCommands.test_conflicts.
 
3161
            pass
 
3162
        else:
 
3163
            raise AssertionError("don't know how to compare "
 
3164
                "source_minikind=%r, target_minikind=%r"
 
3165
                % (source_minikind, target_minikind))
 
3166
            ## import pdb;pdb.set_trace()
 
3167
        return None
 
3168
 
 
3169
    def __iter__(self):
 
3170
        return self
 
3171
 
 
3172
    def iter_changes(self):
 
3173
        """Iterate over the changes."""
 
3174
        utf8_decode = cache_utf8._utf8_decode
 
3175
        _cmp_by_dirs = cmp_by_dirs
 
3176
        _process_entry = self._process_entry
 
3177
        uninteresting = self.uninteresting
 
3178
        search_specific_files = self.search_specific_files
 
3179
        searched_specific_files = self.searched_specific_files
 
3180
        splitpath = osutils.splitpath
 
3181
        # sketch: 
 
3182
        # compare source_index and target_index at or under each element of search_specific_files.
 
3183
        # follow the following comparison table. Note that we only want to do diff operations when
 
3184
        # the target is fdl because thats when the walkdirs logic will have exposed the pathinfo 
 
3185
        # for the target.
 
3186
        # cases:
 
3187
        # 
 
3188
        # Source | Target | disk | action
 
3189
        #   r    | fdlt   |      | add source to search, add id path move and perform
 
3190
        #        |        |      | diff check on source-target
 
3191
        #   r    | fdlt   |  a   | dangling file that was present in the basis. 
 
3192
        #        |        |      | ???
 
3193
        #   r    |  a     |      | add source to search
 
3194
        #   r    |  a     |  a   | 
 
3195
        #   r    |  r     |      | this path is present in a non-examined tree, skip.
 
3196
        #   r    |  r     |  a   | this path is present in a non-examined tree, skip.
 
3197
        #   a    | fdlt   |      | add new id
 
3198
        #   a    | fdlt   |  a   | dangling locally added file, skip
 
3199
        #   a    |  a     |      | not present in either tree, skip
 
3200
        #   a    |  a     |  a   | not present in any tree, skip
 
3201
        #   a    |  r     |      | not present in either tree at this path, skip as it
 
3202
        #        |        |      | may not be selected by the users list of paths.
 
3203
        #   a    |  r     |  a   | not present in either tree at this path, skip as it
 
3204
        #        |        |      | may not be selected by the users list of paths.
 
3205
        #  fdlt  | fdlt   |      | content in both: diff them
 
3206
        #  fdlt  | fdlt   |  a   | deleted locally, but not unversioned - show as deleted ?
 
3207
        #  fdlt  |  a     |      | unversioned: output deleted id for now
 
3208
        #  fdlt  |  a     |  a   | unversioned and deleted: output deleted id
 
3209
        #  fdlt  |  r     |      | relocated in this tree, so add target to search.
 
3210
        #        |        |      | Dont diff, we will see an r,fd; pair when we reach
 
3211
        #        |        |      | this id at the other path.
 
3212
        #  fdlt  |  r     |  a   | relocated in this tree, so add target to search.
 
3213
        #        |        |      | Dont diff, we will see an r,fd; pair when we reach
 
3214
        #        |        |      | this id at the other path.
 
3215
 
 
3216
        # TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
 
3217
        #       keeping a cache of directories that we have seen.
 
3218
 
 
3219
        while search_specific_files:
 
3220
            # TODO: the pending list should be lexically sorted?  the
 
3221
            # interface doesn't require it.
 
3222
            current_root = search_specific_files.pop()
 
3223
            current_root_unicode = current_root.decode('utf8')
 
3224
            searched_specific_files.add(current_root)
 
3225
            # process the entries for this containing directory: the rest will be
 
3226
            # found by their parents recursively.
 
3227
            root_entries = self.state._entries_for_path(current_root)
 
3228
            root_abspath = self.tree.abspath(current_root_unicode)
 
3229
            try:
 
3230
                root_stat = os.lstat(root_abspath)
 
3231
            except OSError, e:
 
3232
                if e.errno == errno.ENOENT:
 
3233
                    # the path does not exist: let _process_entry know that.
 
3234
                    root_dir_info = None
 
3235
                else:
 
3236
                    # some other random error: hand it up.
 
3237
                    raise
 
3238
            else:
 
3239
                root_dir_info = ('', current_root,
 
3240
                    osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,
 
3241
                    root_abspath)
 
3242
                if root_dir_info[2] == 'directory':
 
3243
                    if self.tree._directory_is_tree_reference(
 
3244
                        current_root.decode('utf8')):
 
3245
                        root_dir_info = root_dir_info[:2] + \
 
3246
                            ('tree-reference',) + root_dir_info[3:]
 
3247
 
 
3248
            if not root_entries and not root_dir_info:
 
3249
                # this specified path is not present at all, skip it.
 
3250
                continue
 
3251
            path_handled = False
 
3252
            for entry in root_entries:
 
3253
                result = _process_entry(entry, root_dir_info)
 
3254
                if result is not None:
 
3255
                    path_handled = True
 
3256
                    if result is not uninteresting:
 
3257
                        yield result
 
3258
            if self.want_unversioned and not path_handled and root_dir_info:
 
3259
                new_executable = bool(
 
3260
                    stat.S_ISREG(root_dir_info[3].st_mode)
 
3261
                    and stat.S_IEXEC & root_dir_info[3].st_mode)
 
3262
                yield (None,
 
3263
                       (None, current_root_unicode),
 
3264
                       True,
 
3265
                       (False, False),
 
3266
                       (None, None),
 
3267
                       (None, splitpath(current_root_unicode)[-1]),
 
3268
                       (None, root_dir_info[2]),
 
3269
                       (None, new_executable)
 
3270
                      )
 
3271
            initial_key = (current_root, '', '')
 
3272
            block_index, _ = self.state._find_block_index_from_key(initial_key)
 
3273
            if block_index == 0:
 
3274
                # we have processed the total root already, but because the
 
3275
                # initial key matched it we should skip it here.
 
3276
                block_index +=1
 
3277
            if root_dir_info and root_dir_info[2] == 'tree-reference':
 
3278
                current_dir_info = None
 
3279
            else:
 
3280
                dir_iterator = osutils._walkdirs_utf8(root_abspath, prefix=current_root)
 
3281
                try:
 
3282
                    current_dir_info = dir_iterator.next()
 
3283
                except OSError, e:
 
3284
                    # on win32, python2.4 has e.errno == ERROR_DIRECTORY, but
 
3285
                    # python 2.5 has e.errno == EINVAL,
 
3286
                    #            and e.winerror == ERROR_DIRECTORY
 
3287
                    e_winerror = getattr(e, 'winerror', None)
 
3288
                    win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
 
3289
                    # there may be directories in the inventory even though
 
3290
                    # this path is not a file on disk: so mark it as end of
 
3291
                    # iterator
 
3292
                    if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
 
3293
                        current_dir_info = None
 
3294
                    elif (sys.platform == 'win32'
 
3295
                          and (e.errno in win_errors
 
3296
                               or e_winerror in win_errors)):
 
3297
                        current_dir_info = None
 
3298
                    else:
 
3299
                        raise
 
3300
                else:
 
3301
                    if current_dir_info[0][0] == '':
 
3302
                        # remove .bzr from iteration
 
3303
                        bzr_index = bisect.bisect_left(current_dir_info[1], ('.bzr',))
 
3304
                        if current_dir_info[1][bzr_index][0] != '.bzr':
 
3305
                            raise AssertionError()
 
3306
                        del current_dir_info[1][bzr_index]
 
3307
            # walk until both the directory listing and the versioned metadata
 
3308
            # are exhausted. 
 
3309
            if (block_index < len(self.state._dirblocks) and
 
3310
                osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
 
3311
                current_block = self.state._dirblocks[block_index]
 
3312
            else:
 
3313
                current_block = None
 
3314
            while (current_dir_info is not None or
 
3315
                   current_block is not None):
 
3316
                if (current_dir_info and current_block
 
3317
                    and current_dir_info[0][0] != current_block[0]):
 
3318
                    if _cmp_by_dirs(current_dir_info[0][0], current_block[0]) < 0:
 
3319
                        # filesystem data refers to paths not covered by the dirblock.
 
3320
                        # this has two possibilities:
 
3321
                        # A) it is versioned but empty, so there is no block for it
 
3322
                        # B) it is not versioned.
 
3323
 
 
3324
                        # if (A) then we need to recurse into it to check for
 
3325
                        # new unknown files or directories.
 
3326
                        # if (B) then we should ignore it, because we don't
 
3327
                        # recurse into unknown directories.
 
3328
                        path_index = 0
 
3329
                        while path_index < len(current_dir_info[1]):
 
3330
                                current_path_info = current_dir_info[1][path_index]
 
3331
                                if self.want_unversioned:
 
3332
                                    if current_path_info[2] == 'directory':
 
3333
                                        if self.tree._directory_is_tree_reference(
 
3334
                                            current_path_info[0].decode('utf8')):
 
3335
                                            current_path_info = current_path_info[:2] + \
 
3336
                                                ('tree-reference',) + current_path_info[3:]
 
3337
                                    new_executable = bool(
 
3338
                                        stat.S_ISREG(current_path_info[3].st_mode)
 
3339
                                        and stat.S_IEXEC & current_path_info[3].st_mode)
 
3340
                                    yield (None,
 
3341
                                        (None, utf8_decode(current_path_info[0])[0]),
 
3342
                                        True,
 
3343
                                        (False, False),
 
3344
                                        (None, None),
 
3345
                                        (None, utf8_decode(current_path_info[1])[0]),
 
3346
                                        (None, current_path_info[2]),
 
3347
                                        (None, new_executable))
 
3348
                                # dont descend into this unversioned path if it is
 
3349
                                # a dir
 
3350
                                if current_path_info[2] in ('directory',
 
3351
                                                            'tree-reference'):
 
3352
                                    del current_dir_info[1][path_index]
 
3353
                                    path_index -= 1
 
3354
                                path_index += 1
 
3355
 
 
3356
                        # This dir info has been handled, go to the next
 
3357
                        try:
 
3358
                            current_dir_info = dir_iterator.next()
 
3359
                        except StopIteration:
 
3360
                            current_dir_info = None
 
3361
                    else:
 
3362
                        # We have a dirblock entry for this location, but there
 
3363
                        # is no filesystem path for this. This is most likely
 
3364
                        # because a directory was removed from the disk.
 
3365
                        # We don't have to report the missing directory,
 
3366
                        # because that should have already been handled, but we
 
3367
                        # need to handle all of the files that are contained
 
3368
                        # within.
 
3369
                        for current_entry in current_block[1]:
 
3370
                            # entry referring to file not present on disk.
 
3371
                            # advance the entry only, after processing.
 
3372
                            result = _process_entry(current_entry, None)
 
3373
                            if result is not None:
 
3374
                                if result is not uninteresting:
 
3375
                                    yield result
 
3376
                        block_index +=1
 
3377
                        if (block_index < len(self.state._dirblocks) and
 
3378
                            osutils.is_inside(current_root,
 
3379
                                              self.state._dirblocks[block_index][0])):
 
3380
                            current_block = self.state._dirblocks[block_index]
 
3381
                        else:
 
3382
                            current_block = None
 
3383
                    continue
 
3384
                entry_index = 0
 
3385
                if current_block and entry_index < len(current_block[1]):
 
3386
                    current_entry = current_block[1][entry_index]
 
3387
                else:
 
3388
                    current_entry = None
 
3389
                advance_entry = True
 
3390
                path_index = 0
 
3391
                if current_dir_info and path_index < len(current_dir_info[1]):
 
3392
                    current_path_info = current_dir_info[1][path_index]
 
3393
                    if current_path_info[2] == 'directory':
 
3394
                        if self.tree._directory_is_tree_reference(
 
3395
                            current_path_info[0].decode('utf8')):
 
3396
                            current_path_info = current_path_info[:2] + \
 
3397
                                ('tree-reference',) + current_path_info[3:]
 
3398
                else:
 
3399
                    current_path_info = None
 
3400
                advance_path = True
 
3401
                path_handled = False
 
3402
                while (current_entry is not None or
 
3403
                    current_path_info is not None):
 
3404
                    if current_entry is None:
 
3405
                        # the check for path_handled when the path is adnvaced
 
3406
                        # will yield this path if needed.
 
3407
                        pass
 
3408
                    elif current_path_info is None:
 
3409
                        # no path is fine: the per entry code will handle it.
 
3410
                        result = _process_entry(current_entry, current_path_info)
 
3411
                        if result is not None:
 
3412
                            if result is not uninteresting:
 
3413
                                yield result
 
3414
                    elif (current_entry[0][1] != current_path_info[1]
 
3415
                          or current_entry[1][self.target_index][0] in 'ar'):
 
3416
                        # The current path on disk doesn't match the dirblock
 
3417
                        # record. Either the dirblock is marked as absent, or
 
3418
                        # the file on disk is not present at all in the
 
3419
                        # dirblock. Either way, report about the dirblock
 
3420
                        # entry, and let other code handle the filesystem one.
 
3421
 
 
3422
                        # Compare the basename for these files to determine
 
3423
                        # which comes first
 
3424
                        if current_path_info[1] < current_entry[0][1]:
 
3425
                            # extra file on disk: pass for now, but only
 
3426
                            # increment the path, not the entry
 
3427
                            advance_entry = False
 
3428
                        else:
 
3429
                            # entry referring to file not present on disk.
 
3430
                            # advance the entry only, after processing.
 
3431
                            result = _process_entry(current_entry, None)
 
3432
                            if result is not None:
 
3433
                                if result is not uninteresting:
 
3434
                                    yield result
 
3435
                            advance_path = False
 
3436
                    else:
 
3437
                        result = _process_entry(current_entry, current_path_info)
 
3438
                        if result is not None:
 
3439
                            path_handled = True
 
3440
                            if result is not uninteresting:
 
3441
                                yield result
 
3442
                    if advance_entry and current_entry is not None:
 
3443
                        entry_index += 1
 
3444
                        if entry_index < len(current_block[1]):
 
3445
                            current_entry = current_block[1][entry_index]
 
3446
                        else:
 
3447
                            current_entry = None
 
3448
                    else:
 
3449
                        advance_entry = True # reset the advance flaga
 
3450
                    if advance_path and current_path_info is not None:
 
3451
                        if not path_handled:
 
3452
                            # unversioned in all regards
 
3453
                            if self.want_unversioned:
 
3454
                                new_executable = bool(
 
3455
                                    stat.S_ISREG(current_path_info[3].st_mode)
 
3456
                                    and stat.S_IEXEC & current_path_info[3].st_mode)
 
3457
                                try:
 
3458
                                    relpath_unicode = utf8_decode(current_path_info[0])[0]
 
3459
                                except UnicodeDecodeError:
 
3460
                                    raise errors.BadFilenameEncoding(
 
3461
                                        current_path_info[0], osutils._fs_enc)
 
3462
                                yield (None,
 
3463
                                    (None, relpath_unicode),
 
3464
                                    True,
 
3465
                                    (False, False),
 
3466
                                    (None, None),
 
3467
                                    (None, utf8_decode(current_path_info[1])[0]),
 
3468
                                    (None, current_path_info[2]),
 
3469
                                    (None, new_executable))
 
3470
                            # dont descend into this unversioned path if it is
 
3471
                            # a dir
 
3472
                            if current_path_info[2] in ('directory'):
 
3473
                                del current_dir_info[1][path_index]
 
3474
                                path_index -= 1
 
3475
                        # dont descend the disk iterator into any tree 
 
3476
                        # paths.
 
3477
                        if current_path_info[2] == 'tree-reference':
 
3478
                            del current_dir_info[1][path_index]
 
3479
                            path_index -= 1
 
3480
                        path_index += 1
 
3481
                        if path_index < len(current_dir_info[1]):
 
3482
                            current_path_info = current_dir_info[1][path_index]
 
3483
                            if current_path_info[2] == 'directory':
 
3484
                                if self.tree._directory_is_tree_reference(
 
3485
                                    current_path_info[0].decode('utf8')):
 
3486
                                    current_path_info = current_path_info[:2] + \
 
3487
                                        ('tree-reference',) + current_path_info[3:]
 
3488
                        else:
 
3489
                            current_path_info = None
 
3490
                        path_handled = False
 
3491
                    else:
 
3492
                        advance_path = True # reset the advance flagg.
 
3493
                if current_block is not None:
 
3494
                    block_index += 1
 
3495
                    if (block_index < len(self.state._dirblocks) and
 
3496
                        osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
 
3497
                        current_block = self.state._dirblocks[block_index]
 
3498
                    else:
 
3499
                        current_block = None
 
3500
                if current_dir_info is not None:
 
3501
                    try:
 
3502
                        current_dir_info = dir_iterator.next()
 
3503
                    except StopIteration:
 
3504
                        current_dir_info = None
 
3505
_process_entry = ProcessEntryPython
 
3506
 
 
3507
 
2794
3508
# Try to load the compiled form if possible
2795
3509
try:
2796
3510
    from bzrlib._dirstate_helpers_c import (
2799
3513
        _bisect_path_left_c as _bisect_path_left,
2800
3514
        _bisect_path_right_c as _bisect_path_right,
2801
3515
        cmp_by_dirs_c as cmp_by_dirs,
 
3516
        ProcessEntryC as _process_entry,
 
3517
        update_entry as update_entry,
2802
3518
        )
2803
3519
except ImportError:
2804
3520
    from bzrlib._dirstate_helpers_py import (