/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Andrew Bennetts
  • Date: 2008-10-27 06:14:45 UTC
  • mfrom: (3793 +trunk)
  • mto: This revision was merged to the branch mainline in revision 3795.
  • Revision ID: andrew.bennetts@canonical.com-20081027061445-eqt9lz6uw1mbvq4g
Merge from bzr.dev.

Show diffs side-by-side

added added

removed removed

Lines of Context:
222
222
    )
223
223
 
224
224
 
225
 
def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
226
 
    """Convert stat values into a packed representation."""
227
 
    # jam 20060614 it isn't really worth removing more entries if we
228
 
    # are going to leave it in packed form.
229
 
    # With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
230
 
    # With all entries, filesize is 5.9M and read time is maybe 280ms
231
 
    # well within the noise margin
232
 
 
233
 
    # base64 encoding always adds a final newline, so strip it off
234
 
    # The current version
235
 
    return _encode(_pack('>LLLLLL'
236
 
        , st.st_size, int(st.st_mtime), int(st.st_ctime)
237
 
        , st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
238
 
    # This is 0.060s / 1.520s faster by not encoding as much information
239
 
    # return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]
240
 
    # This is not strictly faster than _encode(_pack())[:-1]
241
 
    # return '%X.%X.%X.%X.%X.%X' % (
242
 
    #      st.st_size, int(st.st_mtime), int(st.st_ctime),
243
 
    #      st.st_dev, st.st_ino, st.st_mode)
244
 
    # Similar to the _encode(_pack('>LL'))
245
 
    # return '%X.%X' % (int(st.st_mtime), st.st_mode)
 
225
# This is the Windows equivalent of ENOTDIR
 
226
# It is defined in pywin32.winerror, but we don't want a strong dependency for
 
227
# just an error code.
 
228
ERROR_PATH_NOT_FOUND = 3
 
229
ERROR_DIRECTORY = 267
 
230
 
 
231
 
 
232
if not getattr(struct, '_compile', None):
 
233
    # Cannot pre-compile the dirstate pack_stat
 
234
    def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
 
235
        """Convert stat values into a packed representation."""
 
236
        return _encode(_pack('>LLLLLL', st.st_size, int(st.st_mtime),
 
237
            int(st.st_ctime), st.st_dev, st.st_ino & 0xFFFFFFFF,
 
238
            st.st_mode))[:-1]
 
239
else:
 
240
    # compile the struct compiler we need, so as to only do it once
 
241
    from _struct import Struct
 
242
    _compiled_pack = Struct('>LLLLLL').pack
 
243
    def pack_stat(st, _encode=binascii.b2a_base64, _pack=_compiled_pack):
 
244
        """Convert stat values into a packed representation."""
 
245
        # jam 20060614 it isn't really worth removing more entries if we
 
246
        # are going to leave it in packed form.
 
247
        # With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
 
248
        # With all entries, filesize is 5.9M and read time is maybe 280ms
 
249
        # well within the noise margin
 
250
 
 
251
        # base64 encoding always adds a final newline, so strip it off
 
252
        # The current version
 
253
        return _encode(_pack(st.st_size, int(st.st_mtime), int(st.st_ctime),
 
254
            st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
 
255
        # This is 0.060s / 1.520s faster by not encoding as much information
 
256
        # return _encode(_pack('>LL', int(st.st_mtime), st.st_mode))[:-1]
 
257
        # This is not strictly faster than _encode(_pack())[:-1]
 
258
        # return '%X.%X.%X.%X.%X.%X' % (
 
259
        #      st.st_size, int(st.st_mtime), int(st.st_ctime),
 
260
        #      st.st_dev, st.st_ino, st.st_mode)
 
261
        # Similar to the _encode(_pack('>LL'))
 
262
        # return '%X.%X' % (int(st.st_mtime), st.st_mode)
246
263
 
247
264
 
248
265
class DirState(object):
1033
1050
        self._dirblocks[0] = ('', root_block)
1034
1051
        self._dirblocks[1] = ('', contents_of_root_block)
1035
1052
 
 
1053
    def _entries_for_path(self, path):
 
1054
        """Return a list with all the entries that match path for all ids."""
 
1055
        dirname, basename = os.path.split(path)
 
1056
        key = (dirname, basename, '')
 
1057
        block_index, present = self._find_block_index_from_key(key)
 
1058
        if not present:
 
1059
            # the block which should contain path is absent.
 
1060
            return []
 
1061
        result = []
 
1062
        block = self._dirblocks[block_index][1]
 
1063
        entry_index, _ = self._find_entry_index(key, block)
 
1064
        # we may need to look at multiple entries at this path: walk while the specific_files match.
 
1065
        while (entry_index < len(block) and
 
1066
            block[entry_index][0][0:2] == key[0:2]):
 
1067
            result.append(block[entry_index])
 
1068
            entry_index += 1
 
1069
        return result
 
1070
 
1036
1071
    def _entry_to_line(self, entry):
1037
1072
        """Serialize entry to a NULL delimited line ready for _get_output_lines.
1038
1073
 
1472
1507
                    # it is being resurrected here, so blank it out temporarily.
1473
1508
                    self._dirblocks[block_index][1][entry_index][1][1] = null
1474
1509
 
1475
 
    def update_entry(self, entry, abspath, stat_value,
1476
 
                     _stat_to_minikind=_stat_to_minikind,
1477
 
                     _pack_stat=pack_stat):
1478
 
        """Update the entry based on what is actually on disk.
 
1510
    def _observed_sha1(self, entry, sha1, stat_value,
 
1511
        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
 
1512
        """Note the sha1 of a file.
1479
1513
 
1480
 
        :param entry: This is the dirblock entry for the file in question.
1481
 
        :param abspath: The path on disk for this file.
1482
 
        :param stat_value: (optional) if we already have done a stat on the
1483
 
            file, re-use it.
1484
 
        :return: The sha1 hexdigest of the file (40 bytes) or link target of a
1485
 
                symlink.
 
1514
        :param entry: The entry the sha1 is for.
 
1515
        :param sha1: The observed sha1.
 
1516
        :param stat_value: The os.lstat for the file.
1486
1517
        """
1487
1518
        try:
1488
1519
            minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
1490
1521
            # Unhandled kind
1491
1522
            return None
1492
1523
        packed_stat = _pack_stat(stat_value)
1493
 
        (saved_minikind, saved_link_or_sha1, saved_file_size,
1494
 
         saved_executable, saved_packed_stat) = entry[1][0]
1495
 
 
1496
 
        if (minikind == saved_minikind
1497
 
            and packed_stat == saved_packed_stat):
1498
 
            # The stat hasn't changed since we saved, so we can re-use the
1499
 
            # saved sha hash.
1500
 
            if minikind == 'd':
1501
 
                return None
1502
 
 
1503
 
            # size should also be in packed_stat
1504
 
            if saved_file_size == stat_value.st_size:
1505
 
                return saved_link_or_sha1
1506
 
 
1507
 
        # If we have gotten this far, that means that we need to actually
1508
 
        # process this entry.
1509
 
        link_or_sha1 = None
1510
1524
        if minikind == 'f':
1511
 
            link_or_sha1 = self._sha1_file(abspath)
1512
 
            executable = self._is_executable(stat_value.st_mode,
1513
 
                                             saved_executable)
1514
 
            if self._cutoff_time is None:
1515
 
                self._sha_cutoff_time()
1516
 
            if (stat_value.st_mtime < self._cutoff_time
1517
 
                and stat_value.st_ctime < self._cutoff_time):
1518
 
                entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
1519
 
                               executable, packed_stat)
1520
 
            else:
1521
 
                entry[1][0] = ('f', '', stat_value.st_size,
1522
 
                               executable, DirState.NULLSTAT)
1523
 
        elif minikind == 'd':
1524
 
            link_or_sha1 = None
1525
 
            entry[1][0] = ('d', '', 0, False, packed_stat)
1526
 
            if saved_minikind != 'd':
1527
 
                # This changed from something into a directory. Make sure we
1528
 
                # have a directory block for it. This doesn't happen very
1529
 
                # often, so this doesn't have to be super fast.
1530
 
                block_index, entry_index, dir_present, file_present = \
1531
 
                    self._get_block_entry_index(entry[0][0], entry[0][1], 0)
1532
 
                self._ensure_block(block_index, entry_index,
1533
 
                                   osutils.pathjoin(entry[0][0], entry[0][1]))
1534
 
        elif minikind == 'l':
1535
 
            link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
1536
 
            if self._cutoff_time is None:
1537
 
                self._sha_cutoff_time()
1538
 
            if (stat_value.st_mtime < self._cutoff_time
1539
 
                and stat_value.st_ctime < self._cutoff_time):
1540
 
                entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
1541
 
                               False, packed_stat)
1542
 
            else:
1543
 
                entry[1][0] = ('l', '', stat_value.st_size,
1544
 
                               False, DirState.NULLSTAT)
1545
 
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1546
 
        return link_or_sha1
 
1525
            if self._cutoff_time is None:
 
1526
                self._sha_cutoff_time()
 
1527
            if (stat_value.st_mtime < self._cutoff_time
 
1528
                and stat_value.st_ctime < self._cutoff_time):
 
1529
                entry[1][0] = ('f', sha1, entry[1][0][2], entry[1][0][3],
 
1530
                    packed_stat)
 
1531
                self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1547
1532
 
1548
1533
    def _sha_cutoff_time(self):
1549
1534
        """Return cutoff time.
2774
2759
            raise errors.ObjectNotLocked(self)
2775
2760
 
2776
2761
 
 
2762
def py_update_entry(state, entry, abspath, stat_value,
 
2763
                 _stat_to_minikind=DirState._stat_to_minikind,
 
2764
                 _pack_stat=pack_stat):
 
2765
    """Update the entry based on what is actually on disk.
 
2766
 
 
2767
    This function only calculates the sha if it needs to - if the entry is
 
2768
    uncachable, or clearly different to the first parent's entry, no sha
 
2769
    is calculated, and None is returned.
 
2770
 
 
2771
    :param state: The dirstate this entry is in.
 
2772
    :param entry: This is the dirblock entry for the file in question.
 
2773
    :param abspath: The path on disk for this file.
 
2774
    :param stat_value: The stat value done on the path.
 
2775
    :return: None, or The sha1 hexdigest of the file (40 bytes) or link
 
2776
        target of a symlink.
 
2777
    """
 
2778
    try:
 
2779
        minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
 
2780
    except KeyError:
 
2781
        # Unhandled kind
 
2782
        return None
 
2783
    packed_stat = _pack_stat(stat_value)
 
2784
    (saved_minikind, saved_link_or_sha1, saved_file_size,
 
2785
     saved_executable, saved_packed_stat) = entry[1][0]
 
2786
 
 
2787
    if (minikind == saved_minikind
 
2788
        and packed_stat == saved_packed_stat):
 
2789
        # The stat hasn't changed since we saved, so we can re-use the
 
2790
        # saved sha hash.
 
2791
        if minikind == 'd':
 
2792
            return None
 
2793
 
 
2794
        # size should also be in packed_stat
 
2795
        if saved_file_size == stat_value.st_size:
 
2796
            return saved_link_or_sha1
 
2797
 
 
2798
    # If we have gotten this far, that means that we need to actually
 
2799
    # process this entry.
 
2800
    link_or_sha1 = None
 
2801
    if minikind == 'f':
 
2802
        executable = state._is_executable(stat_value.st_mode,
 
2803
                                         saved_executable)
 
2804
        if state._cutoff_time is None:
 
2805
            state._sha_cutoff_time()
 
2806
        if (stat_value.st_mtime < state._cutoff_time
 
2807
            and stat_value.st_ctime < state._cutoff_time
 
2808
            and len(entry[1]) > 1
 
2809
            and entry[1][1][0] != 'a'):
 
2810
                # Could check for size changes for further optimised
 
2811
                # avoidance of sha1's. However the most prominent case of
 
2812
                # over-shaing is during initial add, which this catches.
 
2813
            link_or_sha1 = state._sha1_file(abspath)
 
2814
            entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
 
2815
                           executable, packed_stat)
 
2816
        else:
 
2817
            entry[1][0] = ('f', '', stat_value.st_size,
 
2818
                           executable, DirState.NULLSTAT)
 
2819
    elif minikind == 'd':
 
2820
        link_or_sha1 = None
 
2821
        entry[1][0] = ('d', '', 0, False, packed_stat)
 
2822
        if saved_minikind != 'd':
 
2823
            # This changed from something into a directory. Make sure we
 
2824
            # have a directory block for it. This doesn't happen very
 
2825
            # often, so this doesn't have to be super fast.
 
2826
            block_index, entry_index, dir_present, file_present = \
 
2827
                state._get_block_entry_index(entry[0][0], entry[0][1], 0)
 
2828
            state._ensure_block(block_index, entry_index,
 
2829
                               osutils.pathjoin(entry[0][0], entry[0][1]))
 
2830
    elif minikind == 'l':
 
2831
        link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)
 
2832
        if state._cutoff_time is None:
 
2833
            state._sha_cutoff_time()
 
2834
        if (stat_value.st_mtime < state._cutoff_time
 
2835
            and stat_value.st_ctime < state._cutoff_time):
 
2836
            entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
 
2837
                           False, packed_stat)
 
2838
        else:
 
2839
            entry[1][0] = ('l', '', stat_value.st_size,
 
2840
                           False, DirState.NULLSTAT)
 
2841
    state._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
2842
    return link_or_sha1
 
2843
update_entry = py_update_entry
 
2844
 
 
2845
 
 
2846
class ProcessEntryPython(object):
 
2847
 
 
2848
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
 
2849
        "last_source_parent", "last_target_parent", "include_unchanged",
 
2850
        "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
 
2851
        "search_specific_files", "state", "source_index", "target_index",
 
2852
        "want_unversioned", "tree"]
 
2853
 
 
2854
    def __init__(self, include_unchanged, use_filesystem_for_exec,
 
2855
        search_specific_files, state, source_index, target_index,
 
2856
        want_unversioned, tree):
 
2857
        self.old_dirname_to_file_id = {}
 
2858
        self.new_dirname_to_file_id = {}
 
2859
        # Just a sentry, so that _process_entry can say that this
 
2860
        # record is handled, but isn't interesting to process (unchanged)
 
2861
        self.uninteresting = object()
 
2862
        # Using a list so that we can access the values and change them in
 
2863
        # nested scope. Each one is [path, file_id, entry]
 
2864
        self.last_source_parent = [None, None]
 
2865
        self.last_target_parent = [None, None]
 
2866
        self.include_unchanged = include_unchanged
 
2867
        self.use_filesystem_for_exec = use_filesystem_for_exec
 
2868
        self.utf8_decode = cache_utf8._utf8_decode
 
2869
        # for all search_indexs in each path at or under each element of
 
2870
        # search_specific_files, if the detail is relocated: add the id, and add the
 
2871
        # relocated path as one to search if its not searched already. If the
 
2872
        # detail is not relocated, add the id.
 
2873
        self.searched_specific_files = set()
 
2874
        self.search_specific_files = search_specific_files
 
2875
        self.state = state
 
2876
        self.source_index = source_index
 
2877
        self.target_index = target_index
 
2878
        self.want_unversioned = want_unversioned
 
2879
        self.tree = tree
 
2880
 
 
2881
    def _process_entry(self, entry, path_info, pathjoin=osutils.pathjoin):
 
2882
        """Compare an entry and real disk to generate delta information.
 
2883
 
 
2884
        :param path_info: top_relpath, basename, kind, lstat, abspath for
 
2885
            the path of entry. If None, then the path is considered absent.
 
2886
            (Perhaps we should pass in a concrete entry for this ?)
 
2887
            Basename is returned as a utf8 string because we expect this
 
2888
            tuple will be ignored, and don't want to take the time to
 
2889
            decode.
 
2890
        :return: None if these don't match
 
2891
                 A tuple of information about the change, or
 
2892
                 the object 'uninteresting' if these match, but are
 
2893
                 basically identical.
 
2894
        """
 
2895
        if self.source_index is None:
 
2896
            source_details = DirState.NULL_PARENT_DETAILS
 
2897
        else:
 
2898
            source_details = entry[1][self.source_index]
 
2899
        target_details = entry[1][self.target_index]
 
2900
        target_minikind = target_details[0]
 
2901
        if path_info is not None and target_minikind in 'fdlt':
 
2902
            if not (self.target_index == 0):
 
2903
                raise AssertionError()
 
2904
            link_or_sha1 = update_entry(self.state, entry,
 
2905
                abspath=path_info[4], stat_value=path_info[3])
 
2906
            # The entry may have been modified by update_entry
 
2907
            target_details = entry[1][self.target_index]
 
2908
            target_minikind = target_details[0]
 
2909
        else:
 
2910
            link_or_sha1 = None
 
2911
        file_id = entry[0][2]
 
2912
        source_minikind = source_details[0]
 
2913
        if source_minikind in 'fdltr' and target_minikind in 'fdlt':
 
2914
            # claimed content in both: diff
 
2915
            #   r    | fdlt   |      | add source to search, add id path move and perform
 
2916
            #        |        |      | diff check on source-target
 
2917
            #   r    | fdlt   |  a   | dangling file that was present in the basis.
 
2918
            #        |        |      | ???
 
2919
            if source_minikind in 'r':
 
2920
                # add the source to the search path to find any children it
 
2921
                # has.  TODO ? : only add if it is a container ?
 
2922
                if not osutils.is_inside_any(self.searched_specific_files,
 
2923
                                             source_details[1]):
 
2924
                    self.search_specific_files.add(source_details[1])
 
2925
                # generate the old path; this is needed for stating later
 
2926
                # as well.
 
2927
                old_path = source_details[1]
 
2928
                old_dirname, old_basename = os.path.split(old_path)
 
2929
                path = pathjoin(entry[0][0], entry[0][1])
 
2930
                old_entry = self.state._get_entry(self.source_index,
 
2931
                                             path_utf8=old_path)
 
2932
                # update the source details variable to be the real
 
2933
                # location.
 
2934
                if old_entry == (None, None):
 
2935
                    raise errors.CorruptDirstate(self.state._filename,
 
2936
                        "entry '%s/%s' is considered renamed from %r"
 
2937
                        " but source does not exist\n"
 
2938
                        "entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
 
2939
                source_details = old_entry[1][self.source_index]
 
2940
                source_minikind = source_details[0]
 
2941
            else:
 
2942
                old_dirname = entry[0][0]
 
2943
                old_basename = entry[0][1]
 
2944
                old_path = path = None
 
2945
            if path_info is None:
 
2946
                # the file is missing on disk, show as removed.
 
2947
                content_change = True
 
2948
                target_kind = None
 
2949
                target_exec = False
 
2950
            else:
 
2951
                # source and target are both versioned and disk file is present.
 
2952
                target_kind = path_info[2]
 
2953
                if target_kind == 'directory':
 
2954
                    if path is None:
 
2955
                        old_path = path = pathjoin(old_dirname, old_basename)
 
2956
                    self.new_dirname_to_file_id[path] = file_id
 
2957
                    if source_minikind != 'd':
 
2958
                        content_change = True
 
2959
                    else:
 
2960
                        # directories have no fingerprint
 
2961
                        content_change = False
 
2962
                    target_exec = False
 
2963
                elif target_kind == 'file':
 
2964
                    if source_minikind != 'f':
 
2965
                        content_change = True
 
2966
                    else:
 
2967
                        # If the size is the same, check the sha:
 
2968
                        if target_details[2] == source_details[2]:
 
2969
                            if link_or_sha1 is None:
 
2970
                                # Stat cache miss:
 
2971
                                file_obj = file(path_info[4], 'rb')
 
2972
                                try:
 
2973
                                    statvalue = os.fstat(file_obj.fileno())
 
2974
                                    link_or_sha1 = osutils.sha_file(file_obj)
 
2975
                                finally:
 
2976
                                    file_obj.close()
 
2977
                                self.state._observed_sha1(entry, link_or_sha1,
 
2978
                                    statvalue)
 
2979
                            content_change = (link_or_sha1 != source_details[1])
 
2980
                        else:
 
2981
                            # Size changed, so must be different
 
2982
                            content_change = True
 
2983
                    # Target details is updated at update_entry time
 
2984
                    if self.use_filesystem_for_exec:
 
2985
                        # We don't need S_ISREG here, because we are sure
 
2986
                        # we are dealing with a file.
 
2987
                        target_exec = bool(stat.S_IEXEC & path_info[3].st_mode)
 
2988
                    else:
 
2989
                        target_exec = target_details[3]
 
2990
                elif target_kind == 'symlink':
 
2991
                    if source_minikind != 'l':
 
2992
                        content_change = True
 
2993
                    else:
 
2994
                        content_change = (link_or_sha1 != source_details[1])
 
2995
                    target_exec = False
 
2996
                elif target_kind == 'tree-reference':
 
2997
                    if source_minikind != 't':
 
2998
                        content_change = True
 
2999
                    else:
 
3000
                        content_change = False
 
3001
                    target_exec = False
 
3002
                else:
 
3003
                    raise Exception, "unknown kind %s" % path_info[2]
 
3004
            if source_minikind == 'd':
 
3005
                if path is None:
 
3006
                    old_path = path = pathjoin(old_dirname, old_basename)
 
3007
                self.old_dirname_to_file_id[old_path] = file_id
 
3008
            # parent id is the entry for the path in the target tree
 
3009
            if old_dirname == self.last_source_parent[0]:
 
3010
                source_parent_id = self.last_source_parent[1]
 
3011
            else:
 
3012
                try:
 
3013
                    source_parent_id = self.old_dirname_to_file_id[old_dirname]
 
3014
                except KeyError:
 
3015
                    source_parent_entry = self.state._get_entry(self.source_index,
 
3016
                                                           path_utf8=old_dirname)
 
3017
                    source_parent_id = source_parent_entry[0][2]
 
3018
                if source_parent_id == entry[0][2]:
 
3019
                    # This is the root, so the parent is None
 
3020
                    source_parent_id = None
 
3021
                else:
 
3022
                    self.last_source_parent[0] = old_dirname
 
3023
                    self.last_source_parent[1] = source_parent_id
 
3024
            new_dirname = entry[0][0]
 
3025
            if new_dirname == self.last_target_parent[0]:
 
3026
                target_parent_id = self.last_target_parent[1]
 
3027
            else:
 
3028
                try:
 
3029
                    target_parent_id = self.new_dirname_to_file_id[new_dirname]
 
3030
                except KeyError:
 
3031
                    # TODO: We don't always need to do the lookup, because the
 
3032
                    #       parent entry will be the same as the source entry.
 
3033
                    target_parent_entry = self.state._get_entry(self.target_index,
 
3034
                                                           path_utf8=new_dirname)
 
3035
                    if target_parent_entry == (None, None):
 
3036
                        raise AssertionError(
 
3037
                            "Could not find target parent in wt: %s\nparent of: %s"
 
3038
                            % (new_dirname, entry))
 
3039
                    target_parent_id = target_parent_entry[0][2]
 
3040
                if target_parent_id == entry[0][2]:
 
3041
                    # This is the root, so the parent is None
 
3042
                    target_parent_id = None
 
3043
                else:
 
3044
                    self.last_target_parent[0] = new_dirname
 
3045
                    self.last_target_parent[1] = target_parent_id
 
3046
 
 
3047
            source_exec = source_details[3]
 
3048
            if (self.include_unchanged
 
3049
                or content_change
 
3050
                or source_parent_id != target_parent_id
 
3051
                or old_basename != entry[0][1]
 
3052
                or source_exec != target_exec
 
3053
                ):
 
3054
                if old_path is None:
 
3055
                    old_path = path = pathjoin(old_dirname, old_basename)
 
3056
                    old_path_u = self.utf8_decode(old_path)[0]
 
3057
                    path_u = old_path_u
 
3058
                else:
 
3059
                    old_path_u = self.utf8_decode(old_path)[0]
 
3060
                    if old_path == path:
 
3061
                        path_u = old_path_u
 
3062
                    else:
 
3063
                        path_u = self.utf8_decode(path)[0]
 
3064
                source_kind = DirState._minikind_to_kind[source_minikind]
 
3065
                return (entry[0][2],
 
3066
                       (old_path_u, path_u),
 
3067
                       content_change,
 
3068
                       (True, True),
 
3069
                       (source_parent_id, target_parent_id),
 
3070
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
 
3071
                       (source_kind, target_kind),
 
3072
                       (source_exec, target_exec))
 
3073
            else:
 
3074
                return self.uninteresting
 
3075
        elif source_minikind in 'a' and target_minikind in 'fdlt':
 
3076
            # looks like a new file
 
3077
            path = pathjoin(entry[0][0], entry[0][1])
 
3078
            # parent id is the entry for the path in the target tree
 
3079
            # TODO: these are the same for an entire directory: cache em.
 
3080
            parent_id = self.state._get_entry(self.target_index,
 
3081
                                         path_utf8=entry[0][0])[0][2]
 
3082
            if parent_id == entry[0][2]:
 
3083
                parent_id = None
 
3084
            if path_info is not None:
 
3085
                # Present on disk:
 
3086
                if self.use_filesystem_for_exec:
 
3087
                    # We need S_ISREG here, because we aren't sure if this
 
3088
                    # is a file or not.
 
3089
                    target_exec = bool(
 
3090
                        stat.S_ISREG(path_info[3].st_mode)
 
3091
                        and stat.S_IEXEC & path_info[3].st_mode)
 
3092
                else:
 
3093
                    target_exec = target_details[3]
 
3094
                return (entry[0][2],
 
3095
                       (None, self.utf8_decode(path)[0]),
 
3096
                       True,
 
3097
                       (False, True),
 
3098
                       (None, parent_id),
 
3099
                       (None, self.utf8_decode(entry[0][1])[0]),
 
3100
                       (None, path_info[2]),
 
3101
                       (None, target_exec))
 
3102
            else:
 
3103
                # Its a missing file, report it as such.
 
3104
                return (entry[0][2],
 
3105
                       (None, self.utf8_decode(path)[0]),
 
3106
                       False,
 
3107
                       (False, True),
 
3108
                       (None, parent_id),
 
3109
                       (None, self.utf8_decode(entry[0][1])[0]),
 
3110
                       (None, None),
 
3111
                       (None, False))
 
3112
        elif source_minikind in 'fdlt' and target_minikind in 'a':
 
3113
            # unversioned, possibly, or possibly not deleted: we dont care.
 
3114
            # if its still on disk, *and* theres no other entry at this
 
3115
            # path [we dont know this in this routine at the moment -
 
3116
            # perhaps we should change this - then it would be an unknown.
 
3117
            old_path = pathjoin(entry[0][0], entry[0][1])
 
3118
            # parent id is the entry for the path in the target tree
 
3119
            parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]
 
3120
            if parent_id == entry[0][2]:
 
3121
                parent_id = None
 
3122
            return (entry[0][2],
 
3123
                   (self.utf8_decode(old_path)[0], None),
 
3124
                   True,
 
3125
                   (True, False),
 
3126
                   (parent_id, None),
 
3127
                   (self.utf8_decode(entry[0][1])[0], None),
 
3128
                   (DirState._minikind_to_kind[source_minikind], None),
 
3129
                   (source_details[3], None))
 
3130
        elif source_minikind in 'fdlt' and target_minikind in 'r':
 
3131
            # a rename; could be a true rename, or a rename inherited from
 
3132
            # a renamed parent. TODO: handle this efficiently. Its not
 
3133
            # common case to rename dirs though, so a correct but slow
 
3134
            # implementation will do.
 
3135
            if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
 
3136
                self.search_specific_files.add(target_details[1])
 
3137
        elif source_minikind in 'ra' and target_minikind in 'ra':
 
3138
            # neither of the selected trees contain this file,
 
3139
            # so skip over it. This is not currently directly tested, but
 
3140
            # is indirectly via test_too_much.TestCommands.test_conflicts.
 
3141
            pass
 
3142
        else:
 
3143
            raise AssertionError("don't know how to compare "
 
3144
                "source_minikind=%r, target_minikind=%r"
 
3145
                % (source_minikind, target_minikind))
 
3146
            ## import pdb;pdb.set_trace()
 
3147
        return None
 
3148
 
 
3149
    def __iter__(self):
 
3150
        return self
 
3151
 
 
3152
    def iter_changes(self):
 
3153
        """Iterate over the changes."""
 
3154
        utf8_decode = cache_utf8._utf8_decode
 
3155
        _cmp_by_dirs = cmp_by_dirs
 
3156
        _process_entry = self._process_entry
 
3157
        uninteresting = self.uninteresting
 
3158
        search_specific_files = self.search_specific_files
 
3159
        searched_specific_files = self.searched_specific_files
 
3160
        splitpath = osutils.splitpath
 
3161
        # sketch: 
 
3162
        # compare source_index and target_index at or under each element of search_specific_files.
 
3163
        # follow the following comparison table. Note that we only want to do diff operations when
 
3164
        # the target is fdl because thats when the walkdirs logic will have exposed the pathinfo 
 
3165
        # for the target.
 
3166
        # cases:
 
3167
        # 
 
3168
        # Source | Target | disk | action
 
3169
        #   r    | fdlt   |      | add source to search, add id path move and perform
 
3170
        #        |        |      | diff check on source-target
 
3171
        #   r    | fdlt   |  a   | dangling file that was present in the basis. 
 
3172
        #        |        |      | ???
 
3173
        #   r    |  a     |      | add source to search
 
3174
        #   r    |  a     |  a   | 
 
3175
        #   r    |  r     |      | this path is present in a non-examined tree, skip.
 
3176
        #   r    |  r     |  a   | this path is present in a non-examined tree, skip.
 
3177
        #   a    | fdlt   |      | add new id
 
3178
        #   a    | fdlt   |  a   | dangling locally added file, skip
 
3179
        #   a    |  a     |      | not present in either tree, skip
 
3180
        #   a    |  a     |  a   | not present in any tree, skip
 
3181
        #   a    |  r     |      | not present in either tree at this path, skip as it
 
3182
        #        |        |      | may not be selected by the users list of paths.
 
3183
        #   a    |  r     |  a   | not present in either tree at this path, skip as it
 
3184
        #        |        |      | may not be selected by the users list of paths.
 
3185
        #  fdlt  | fdlt   |      | content in both: diff them
 
3186
        #  fdlt  | fdlt   |  a   | deleted locally, but not unversioned - show as deleted ?
 
3187
        #  fdlt  |  a     |      | unversioned: output deleted id for now
 
3188
        #  fdlt  |  a     |  a   | unversioned and deleted: output deleted id
 
3189
        #  fdlt  |  r     |      | relocated in this tree, so add target to search.
 
3190
        #        |        |      | Dont diff, we will see an r,fd; pair when we reach
 
3191
        #        |        |      | this id at the other path.
 
3192
        #  fdlt  |  r     |  a   | relocated in this tree, so add target to search.
 
3193
        #        |        |      | Dont diff, we will see an r,fd; pair when we reach
 
3194
        #        |        |      | this id at the other path.
 
3195
 
 
3196
        # TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
 
3197
        #       keeping a cache of directories that we have seen.
 
3198
 
 
3199
        while search_specific_files:
 
3200
            # TODO: the pending list should be lexically sorted?  the
 
3201
            # interface doesn't require it.
 
3202
            current_root = search_specific_files.pop()
 
3203
            current_root_unicode = current_root.decode('utf8')
 
3204
            searched_specific_files.add(current_root)
 
3205
            # process the entries for this containing directory: the rest will be
 
3206
            # found by their parents recursively.
 
3207
            root_entries = self.state._entries_for_path(current_root)
 
3208
            root_abspath = self.tree.abspath(current_root_unicode)
 
3209
            try:
 
3210
                root_stat = os.lstat(root_abspath)
 
3211
            except OSError, e:
 
3212
                if e.errno == errno.ENOENT:
 
3213
                    # the path does not exist: let _process_entry know that.
 
3214
                    root_dir_info = None
 
3215
                else:
 
3216
                    # some other random error: hand it up.
 
3217
                    raise
 
3218
            else:
 
3219
                root_dir_info = ('', current_root,
 
3220
                    osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,
 
3221
                    root_abspath)
 
3222
                if root_dir_info[2] == 'directory':
 
3223
                    if self.tree._directory_is_tree_reference(
 
3224
                        current_root.decode('utf8')):
 
3225
                        root_dir_info = root_dir_info[:2] + \
 
3226
                            ('tree-reference',) + root_dir_info[3:]
 
3227
 
 
3228
            if not root_entries and not root_dir_info:
 
3229
                # this specified path is not present at all, skip it.
 
3230
                continue
 
3231
            path_handled = False
 
3232
            for entry in root_entries:
 
3233
                result = _process_entry(entry, root_dir_info)
 
3234
                if result is not None:
 
3235
                    path_handled = True
 
3236
                    if result is not uninteresting:
 
3237
                        yield result
 
3238
            if self.want_unversioned and not path_handled and root_dir_info:
 
3239
                new_executable = bool(
 
3240
                    stat.S_ISREG(root_dir_info[3].st_mode)
 
3241
                    and stat.S_IEXEC & root_dir_info[3].st_mode)
 
3242
                yield (None,
 
3243
                       (None, current_root_unicode),
 
3244
                       True,
 
3245
                       (False, False),
 
3246
                       (None, None),
 
3247
                       (None, splitpath(current_root_unicode)[-1]),
 
3248
                       (None, root_dir_info[2]),
 
3249
                       (None, new_executable)
 
3250
                      )
 
3251
            initial_key = (current_root, '', '')
 
3252
            block_index, _ = self.state._find_block_index_from_key(initial_key)
 
3253
            if block_index == 0:
 
3254
                # we have processed the total root already, but because the
 
3255
                # initial key matched it we should skip it here.
 
3256
                block_index +=1
 
3257
            if root_dir_info and root_dir_info[2] == 'tree-reference':
 
3258
                current_dir_info = None
 
3259
            else:
 
3260
                dir_iterator = osutils._walkdirs_utf8(root_abspath, prefix=current_root)
 
3261
                try:
 
3262
                    current_dir_info = dir_iterator.next()
 
3263
                except OSError, e:
 
3264
                    # on win32, python2.4 has e.errno == ERROR_DIRECTORY, but
 
3265
                    # python 2.5 has e.errno == EINVAL,
 
3266
                    #            and e.winerror == ERROR_DIRECTORY
 
3267
                    e_winerror = getattr(e, 'winerror', None)
 
3268
                    win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
 
3269
                    # there may be directories in the inventory even though
 
3270
                    # this path is not a file on disk: so mark it as end of
 
3271
                    # iterator
 
3272
                    if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
 
3273
                        current_dir_info = None
 
3274
                    elif (sys.platform == 'win32'
 
3275
                          and (e.errno in win_errors
 
3276
                               or e_winerror in win_errors)):
 
3277
                        current_dir_info = None
 
3278
                    else:
 
3279
                        raise
 
3280
                else:
 
3281
                    if current_dir_info[0][0] == '':
 
3282
                        # remove .bzr from iteration
 
3283
                        bzr_index = bisect.bisect_left(current_dir_info[1], ('.bzr',))
 
3284
                        if current_dir_info[1][bzr_index][0] != '.bzr':
 
3285
                            raise AssertionError()
 
3286
                        del current_dir_info[1][bzr_index]
 
3287
            # walk until both the directory listing and the versioned metadata
 
3288
            # are exhausted. 
 
3289
            if (block_index < len(self.state._dirblocks) and
 
3290
                osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
 
3291
                current_block = self.state._dirblocks[block_index]
 
3292
            else:
 
3293
                current_block = None
 
3294
            while (current_dir_info is not None or
 
3295
                   current_block is not None):
 
3296
                if (current_dir_info and current_block
 
3297
                    and current_dir_info[0][0] != current_block[0]):
 
3298
                    if _cmp_by_dirs(current_dir_info[0][0], current_block[0]) < 0:
 
3299
                        # filesystem data refers to paths not covered by the dirblock.
 
3300
                        # this has two possibilities:
 
3301
                        # A) it is versioned but empty, so there is no block for it
 
3302
                        # B) it is not versioned.
 
3303
 
 
3304
                        # if (A) then we need to recurse into it to check for
 
3305
                        # new unknown files or directories.
 
3306
                        # if (B) then we should ignore it, because we don't
 
3307
                        # recurse into unknown directories.
 
3308
                        path_index = 0
 
3309
                        while path_index < len(current_dir_info[1]):
 
3310
                                current_path_info = current_dir_info[1][path_index]
 
3311
                                if self.want_unversioned:
 
3312
                                    if current_path_info[2] == 'directory':
 
3313
                                        if self.tree._directory_is_tree_reference(
 
3314
                                            current_path_info[0].decode('utf8')):
 
3315
                                            current_path_info = current_path_info[:2] + \
 
3316
                                                ('tree-reference',) + current_path_info[3:]
 
3317
                                    new_executable = bool(
 
3318
                                        stat.S_ISREG(current_path_info[3].st_mode)
 
3319
                                        and stat.S_IEXEC & current_path_info[3].st_mode)
 
3320
                                    yield (None,
 
3321
                                        (None, utf8_decode(current_path_info[0])[0]),
 
3322
                                        True,
 
3323
                                        (False, False),
 
3324
                                        (None, None),
 
3325
                                        (None, utf8_decode(current_path_info[1])[0]),
 
3326
                                        (None, current_path_info[2]),
 
3327
                                        (None, new_executable))
 
3328
                                # dont descend into this unversioned path if it is
 
3329
                                # a dir
 
3330
                                if current_path_info[2] in ('directory',
 
3331
                                                            'tree-reference'):
 
3332
                                    del current_dir_info[1][path_index]
 
3333
                                    path_index -= 1
 
3334
                                path_index += 1
 
3335
 
 
3336
                        # This dir info has been handled, go to the next
 
3337
                        try:
 
3338
                            current_dir_info = dir_iterator.next()
 
3339
                        except StopIteration:
 
3340
                            current_dir_info = None
 
3341
                    else:
 
3342
                        # We have a dirblock entry for this location, but there
 
3343
                        # is no filesystem path for this. This is most likely
 
3344
                        # because a directory was removed from the disk.
 
3345
                        # We don't have to report the missing directory,
 
3346
                        # because that should have already been handled, but we
 
3347
                        # need to handle all of the files that are contained
 
3348
                        # within.
 
3349
                        for current_entry in current_block[1]:
 
3350
                            # entry referring to file not present on disk.
 
3351
                            # advance the entry only, after processing.
 
3352
                            result = _process_entry(current_entry, None)
 
3353
                            if result is not None:
 
3354
                                if result is not uninteresting:
 
3355
                                    yield result
 
3356
                        block_index +=1
 
3357
                        if (block_index < len(self.state._dirblocks) and
 
3358
                            osutils.is_inside(current_root,
 
3359
                                              self.state._dirblocks[block_index][0])):
 
3360
                            current_block = self.state._dirblocks[block_index]
 
3361
                        else:
 
3362
                            current_block = None
 
3363
                    continue
 
3364
                entry_index = 0
 
3365
                if current_block and entry_index < len(current_block[1]):
 
3366
                    current_entry = current_block[1][entry_index]
 
3367
                else:
 
3368
                    current_entry = None
 
3369
                advance_entry = True
 
3370
                path_index = 0
 
3371
                if current_dir_info and path_index < len(current_dir_info[1]):
 
3372
                    current_path_info = current_dir_info[1][path_index]
 
3373
                    if current_path_info[2] == 'directory':
 
3374
                        if self.tree._directory_is_tree_reference(
 
3375
                            current_path_info[0].decode('utf8')):
 
3376
                            current_path_info = current_path_info[:2] + \
 
3377
                                ('tree-reference',) + current_path_info[3:]
 
3378
                else:
 
3379
                    current_path_info = None
 
3380
                advance_path = True
 
3381
                path_handled = False
 
3382
                while (current_entry is not None or
 
3383
                    current_path_info is not None):
 
3384
                    if current_entry is None:
 
3385
                        # the check for path_handled when the path is adnvaced
 
3386
                        # will yield this path if needed.
 
3387
                        pass
 
3388
                    elif current_path_info is None:
 
3389
                        # no path is fine: the per entry code will handle it.
 
3390
                        result = _process_entry(current_entry, current_path_info)
 
3391
                        if result is not None:
 
3392
                            if result is not uninteresting:
 
3393
                                yield result
 
3394
                    elif (current_entry[0][1] != current_path_info[1]
 
3395
                          or current_entry[1][self.target_index][0] in 'ar'):
 
3396
                        # The current path on disk doesn't match the dirblock
 
3397
                        # record. Either the dirblock is marked as absent, or
 
3398
                        # the file on disk is not present at all in the
 
3399
                        # dirblock. Either way, report about the dirblock
 
3400
                        # entry, and let other code handle the filesystem one.
 
3401
 
 
3402
                        # Compare the basename for these files to determine
 
3403
                        # which comes first
 
3404
                        if current_path_info[1] < current_entry[0][1]:
 
3405
                            # extra file on disk: pass for now, but only
 
3406
                            # increment the path, not the entry
 
3407
                            advance_entry = False
 
3408
                        else:
 
3409
                            # entry referring to file not present on disk.
 
3410
                            # advance the entry only, after processing.
 
3411
                            result = _process_entry(current_entry, None)
 
3412
                            if result is not None:
 
3413
                                if result is not uninteresting:
 
3414
                                    yield result
 
3415
                            advance_path = False
 
3416
                    else:
 
3417
                        result = _process_entry(current_entry, current_path_info)
 
3418
                        if result is not None:
 
3419
                            path_handled = True
 
3420
                            if result is not uninteresting:
 
3421
                                yield result
 
3422
                    if advance_entry and current_entry is not None:
 
3423
                        entry_index += 1
 
3424
                        if entry_index < len(current_block[1]):
 
3425
                            current_entry = current_block[1][entry_index]
 
3426
                        else:
 
3427
                            current_entry = None
 
3428
                    else:
 
3429
                        advance_entry = True # reset the advance flaga
 
3430
                    if advance_path and current_path_info is not None:
 
3431
                        if not path_handled:
 
3432
                            # unversioned in all regards
 
3433
                            if self.want_unversioned:
 
3434
                                new_executable = bool(
 
3435
                                    stat.S_ISREG(current_path_info[3].st_mode)
 
3436
                                    and stat.S_IEXEC & current_path_info[3].st_mode)
 
3437
                                try:
 
3438
                                    relpath_unicode = utf8_decode(current_path_info[0])[0]
 
3439
                                except UnicodeDecodeError:
 
3440
                                    raise errors.BadFilenameEncoding(
 
3441
                                        current_path_info[0], osutils._fs_enc)
 
3442
                                yield (None,
 
3443
                                    (None, relpath_unicode),
 
3444
                                    True,
 
3445
                                    (False, False),
 
3446
                                    (None, None),
 
3447
                                    (None, utf8_decode(current_path_info[1])[0]),
 
3448
                                    (None, current_path_info[2]),
 
3449
                                    (None, new_executable))
 
3450
                            # dont descend into this unversioned path if it is
 
3451
                            # a dir
 
3452
                            if current_path_info[2] in ('directory'):
 
3453
                                del current_dir_info[1][path_index]
 
3454
                                path_index -= 1
 
3455
                        # dont descend the disk iterator into any tree 
 
3456
                        # paths.
 
3457
                        if current_path_info[2] == 'tree-reference':
 
3458
                            del current_dir_info[1][path_index]
 
3459
                            path_index -= 1
 
3460
                        path_index += 1
 
3461
                        if path_index < len(current_dir_info[1]):
 
3462
                            current_path_info = current_dir_info[1][path_index]
 
3463
                            if current_path_info[2] == 'directory':
 
3464
                                if self.tree._directory_is_tree_reference(
 
3465
                                    current_path_info[0].decode('utf8')):
 
3466
                                    current_path_info = current_path_info[:2] + \
 
3467
                                        ('tree-reference',) + current_path_info[3:]
 
3468
                        else:
 
3469
                            current_path_info = None
 
3470
                        path_handled = False
 
3471
                    else:
 
3472
                        advance_path = True # reset the advance flagg.
 
3473
                if current_block is not None:
 
3474
                    block_index += 1
 
3475
                    if (block_index < len(self.state._dirblocks) and
 
3476
                        osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
 
3477
                        current_block = self.state._dirblocks[block_index]
 
3478
                    else:
 
3479
                        current_block = None
 
3480
                if current_dir_info is not None:
 
3481
                    try:
 
3482
                        current_dir_info = dir_iterator.next()
 
3483
                    except StopIteration:
 
3484
                        current_dir_info = None
 
3485
_process_entry = ProcessEntryPython
 
3486
 
 
3487
 
2777
3488
# Try to load the compiled form if possible
2778
3489
try:
2779
3490
    from bzrlib._dirstate_helpers_c import (
2782
3493
        _bisect_path_left_c as _bisect_path_left,
2783
3494
        _bisect_path_right_c as _bisect_path_right,
2784
3495
        cmp_by_dirs_c as cmp_by_dirs,
 
3496
        ProcessEntryC as _process_entry,
 
3497
        update_entry as update_entry,
2785
3498
        )
2786
3499
except ImportError:
2787
3500
    from bzrlib._dirstate_helpers_py import (