bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
3398.1.1
by Ian Clatworthy
 simplify the custom Globster to only care about ordering  | 
1  | 
# Copyright (C) 2006, 2008 Canonical Ltd
 | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
2  | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
|
16  | 
||
17  | 
"""Tools for converting globs to regular expressions.
 | 
|
18  | 
||
19  | 
This module provides functions for converting shell-like globs to regular
 | 
|
20  | 
expressions.
 | 
|
21  | 
"""
 | 
|
22  | 
||
23  | 
import re  | 
|
24  | 
||
25  | 
from bzrlib.trace import (  | 
|
26  | 
    warning
 | 
|
27  | 
    )
 | 
|
28  | 
||
29  | 
||
30  | 
class Replacer(object):  | 
|
31  | 
"""Do a multiple-pattern substitution.  | 
|
32  | 
||
33  | 
    The patterns and substitutions are combined into one, so the result of
 | 
|
34  | 
    one replacement is never substituted again. Add the patterns and
 | 
|
35  | 
    replacements via the add method and then call the object. The patterns
 | 
|
36  | 
    must not contain capturing groups.
 | 
|
37  | 
    """
 | 
|
38  | 
||
39  | 
_expand = re.compile(ur'\\&')  | 
|
40  | 
||
41  | 
def __init__(self, source=None):  | 
|
42  | 
self._pat = None  | 
|
43  | 
if source:  | 
|
44  | 
self._pats = list(source._pats)  | 
|
45  | 
self._funs = list(source._funs)  | 
|
46  | 
else:  | 
|
47  | 
self._pats = []  | 
|
48  | 
self._funs = []  | 
|
49  | 
||
50  | 
def add(self, pat, fun):  | 
|
51  | 
r"""Add a pattern and replacement.  | 
|
52  | 
||
53  | 
        The pattern must not contain capturing groups.
 | 
|
54  | 
        The replacement might be either a string template in which \& will be
 | 
|
55  | 
        replaced with the match, or a function that will get the matching text  
 | 
|
56  | 
        as argument. It does not get match object, because capturing is 
 | 
|
57  | 
        forbidden anyway.
 | 
|
58  | 
        """
 | 
|
59  | 
self._pat = None  | 
|
60  | 
self._pats.append(pat)  | 
|
61  | 
self._funs.append(fun)  | 
|
62  | 
||
63  | 
def add_replacer(self, replacer):  | 
|
64  | 
r"""Add all patterns from another replacer.  | 
|
65  | 
||
66  | 
        All patterns and replacements from replacer are appended to the ones
 | 
|
67  | 
        already defined.
 | 
|
68  | 
        """
 | 
|
69  | 
self._pat = None  | 
|
70  | 
self._pats.extend(replacer._pats)  | 
|
71  | 
self._funs.extend(replacer._funs)  | 
|
72  | 
||
73  | 
def __call__(self, text):  | 
|
74  | 
if not self._pat:  | 
|
75  | 
self._pat = re.compile(  | 
|
76  | 
u'|'.join([u'(%s)' % p for p in self._pats]),  | 
|
77  | 
re.UNICODE)  | 
|
78  | 
return self._pat.sub(self._do_sub, text)  | 
|
79  | 
||
80  | 
def _do_sub(self, m):  | 
|
81  | 
fun = self._funs[m.lastindex - 1]  | 
|
82  | 
if hasattr(fun, '__call__'):  | 
|
83  | 
return fun(m.group(0))  | 
|
84  | 
else:  | 
|
85  | 
return self._expand.sub(m.group(0), fun)  | 
|
86  | 
||
87  | 
||
88  | 
_sub_named = Replacer()  | 
|
89  | 
_sub_named.add(ur'\[:digit:\]', ur'\d')  | 
|
90  | 
_sub_named.add(ur'\[:space:\]', ur'\s')  | 
|
91  | 
_sub_named.add(ur'\[:alnum:\]', ur'\w')  | 
|
92  | 
_sub_named.add(ur'\[:ascii:\]', ur'\0-\x7f')  | 
|
93  | 
_sub_named.add(ur'\[:blank:\]', ur' \t')  | 
|
94  | 
_sub_named.add(ur'\[:cntrl:\]', ur'\0-\x1f\x7f-\x9f')  | 
|
95  | 
||
96  | 
||
97  | 
def _sub_group(m):  | 
|
98  | 
if m[1] in (u'!', u'^'):  | 
|
99  | 
return u'[^' + _sub_named(m[2:-1]) + u']'  | 
|
100  | 
return u'[' + _sub_named(m[1:-1]) + u']'  | 
|
101  | 
||
102  | 
||
103  | 
def _invalid_regex(repl):  | 
|
104  | 
def _(m):  | 
|
| 
2135.2.7
by Kent Gibson
 Implement JAM's review suggestions.  | 
105  | 
warning(u"'%s' not allowed within a regular expression. "  | 
106  | 
"Replacing with '%s'" % (m, repl))  | 
|
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
107  | 
return repl  | 
108  | 
return _  | 
|
109  | 
||
110  | 
||
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
111  | 
def _trailing_backslashes_regex(m):  | 
| 
2298.8.2
by Kent Gibson
 Review fixes for lp86451 patch.  | 
112  | 
"""Check trailing backslashes.  | 
113  | 
||
114  | 
    Does a head count on trailing backslashes to ensure there isn't an odd
 | 
|
115  | 
    one on the end that would escape the brackets we wrap the RE in.
 | 
|
116  | 
    """
 | 
|
117  | 
if (len(m) % 2) != 0:  | 
|
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
118  | 
warning(u"Regular expressions cannot end with an odd number of '\\'. "  | 
119  | 
"Dropping the final '\\'.")  | 
|
120  | 
return m[:-1]  | 
|
121  | 
return m  | 
|
122  | 
||
123  | 
||
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
124  | 
_sub_re = Replacer()  | 
125  | 
_sub_re.add(u'^RE:', u'')  | 
|
126  | 
_sub_re.add(u'\((?!\?)', u'(?:')  | 
|
127  | 
_sub_re.add(u'\(\?P<.*>', _invalid_regex(u'(?:'))  | 
|
128  | 
_sub_re.add(u'\(\?P=[^)]*\)', _invalid_regex(u''))  | 
|
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
129  | 
_sub_re.add(ur'\\+$', _trailing_backslashes_regex)  | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
130  | 
|
131  | 
||
| 
2135.2.2
by Kent Gibson
 Ignore pattern matcher (glob.py) patches:  | 
132  | 
_sub_fullpath = Replacer()  | 
133  | 
_sub_fullpath.add(ur'^RE:.*', _sub_re) # RE:<anything> is a regex  | 
|
134  | 
_sub_fullpath.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group  | 
|
135  | 
_sub_fullpath.add(ur'(?:(?<=/)|^)(?:\.?/)+', u'') # canonicalize path  | 
|
136  | 
_sub_fullpath.add(ur'\\.', ur'\&') # keep anything backslashed  | 
|
137  | 
_sub_fullpath.add(ur'[(){}|^$+.]', ur'\\&') # escape specials  | 
|
138  | 
_sub_fullpath.add(ur'(?:(?<=/)|^)\*\*+/', ur'(?:.*/)?') # **/ after ^ or /  | 
|
139  | 
_sub_fullpath.add(ur'\*+', ur'[^/]*') # * elsewhere  | 
|
140  | 
_sub_fullpath.add(ur'\?', ur'[^/]') # ? everywhere  | 
|
141  | 
||
142  | 
||
143  | 
_sub_basename = Replacer()  | 
|
144  | 
_sub_basename.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group  | 
|
145  | 
_sub_basename.add(ur'\\.', ur'\&') # keep anything backslashed  | 
|
146  | 
_sub_basename.add(ur'[(){}|^$+.]', ur'\\&') # escape specials  | 
|
147  | 
_sub_basename.add(ur'\*+', ur'.*') # * everywhere  | 
|
148  | 
_sub_basename.add(ur'\?', ur'.') # ? everywhere  | 
|
149  | 
||
150  | 
||
151  | 
def _sub_extension(pattern):  | 
|
152  | 
return _sub_basename(pattern[2:])  | 
|
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
153  | 
|
154  | 
||
155  | 
class Globster(object):  | 
|
156  | 
"""A simple wrapper for a set of glob patterns.  | 
|
157  | 
||
158  | 
    Provides the capability to search the patterns to find a match for
 | 
|
159  | 
    a given filename (including the full path).
 | 
|
160  | 
||
161  | 
    Patterns are translated to regular expressions to expidite matching.
 | 
|
162  | 
||
163  | 
    The regular expressions for multiple patterns are aggregated into 
 | 
|
164  | 
    a super-regex containing groups of up to 99 patterns.  
 | 
|
165  | 
    The 99 limitation is due to the grouping limit of the Python re module.
 | 
|
166  | 
    The resulting super-regex and associated patterns are stored as a list of
 | 
|
167  | 
    (regex,[patterns]) in _regex_patterns.
 | 
|
168  | 
    
 | 
|
169  | 
    For performance reasons the patterns are categorised as extension patterns
 | 
|
170  | 
    (those that match against a file extension), basename patterns
 | 
|
171  | 
    (those that match against the basename of the filename),
 | 
|
172  | 
    and fullpath patterns (those that match against the full path).
 | 
|
| 
2135.2.2
by Kent Gibson
 Ignore pattern matcher (glob.py) patches:  | 
173  | 
    The translations used for extensions and basenames are relatively simpler 
 | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
174  | 
    and therefore faster to perform than the fullpath patterns.
 | 
175  | 
||
176  | 
    Also, the extension patterns are more likely to find a match and 
 | 
|
177  | 
    so are matched first, then the basename patterns, then the fullpath
 | 
|
178  | 
    patterns.
 | 
|
179  | 
    """
 | 
|
180  | 
def __init__(self, patterns):  | 
|
181  | 
self._regex_patterns = []  | 
|
182  | 
path_patterns = []  | 
|
183  | 
base_patterns = []  | 
|
184  | 
ext_patterns = []  | 
|
185  | 
for pat in patterns:  | 
|
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
186  | 
pat = normalize_pattern(pat)  | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
187  | 
if pat.startswith(u'RE:') or u'/' in pat:  | 
188  | 
path_patterns.append(pat)  | 
|
189  | 
elif pat.startswith(u'*.'):  | 
|
190  | 
ext_patterns.append(pat)  | 
|
191  | 
else:  | 
|
192  | 
base_patterns.append(pat)  | 
|
| 
2135.2.2
by Kent Gibson
 Ignore pattern matcher (glob.py) patches:  | 
193  | 
self._add_patterns(ext_patterns,_sub_extension,  | 
194  | 
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')  | 
|
195  | 
self._add_patterns(base_patterns,_sub_basename,  | 
|
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
196  | 
prefix=r'(?:.*/)?(?!.*/)')  | 
| 
2135.2.2
by Kent Gibson
 Ignore pattern matcher (glob.py) patches:  | 
197  | 
self._add_patterns(path_patterns,_sub_fullpath)  | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
198  | 
|
199  | 
def _add_patterns(self, patterns, translator, prefix=''):  | 
|
200  | 
while patterns:  | 
|
201  | 
grouped_rules = ['(%s)' % translator(pat) for pat in patterns[:99]]  | 
|
202  | 
joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules))  | 
|
| 
2135.2.4
by Kent Gibson
 Reverted case-insensitive matches on case-insensitive platforms.  | 
203  | 
self._regex_patterns.append((re.compile(joined_rule, re.UNICODE),  | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
204  | 
patterns[:99]))  | 
205  | 
patterns = patterns[99:]  | 
|
206  | 
||
207  | 
def match(self, filename):  | 
|
208  | 
"""Searches for a pattern that matches the given filename.  | 
|
209  | 
        
 | 
|
210  | 
        :return A matching pattern or None if there is no matching pattern.
 | 
|
211  | 
        """
 | 
|
212  | 
for regex, patterns in self._regex_patterns:  | 
|
213  | 
match = regex.match(filename)  | 
|
214  | 
if match:  | 
|
215  | 
return patterns[match.lastindex -1]  | 
|
216  | 
return None  | 
|
| 
3398.1.1
by Ian Clatworthy
 simplify the custom Globster to only care about ordering  | 
217  | 
|
218  | 
||
219  | 
class _OrderedGlobster(Globster):  | 
|
220  | 
"""A Globster that keeps pattern order."""  | 
|
221  | 
||
222  | 
def __init__(self, patterns):  | 
|
223  | 
"""Constructor.  | 
|
224  | 
||
225  | 
        :param patterns: sequence of glob patterns
 | 
|
226  | 
        """
 | 
|
227  | 
        # Note: This could be smarter by running like sequences together
 | 
|
228  | 
self._regex_patterns = []  | 
|
229  | 
for pat in patterns:  | 
|
230  | 
pat = normalize_pattern(pat)  | 
|
231  | 
if pat.startswith(u'RE:') or u'/' in pat:  | 
|
232  | 
self._add_patterns([pat], _sub_fullpath)  | 
|
233  | 
elif pat.startswith(u'*.'):  | 
|
234  | 
self._add_patterns([pat], _sub_extension,  | 
|
235  | 
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')  | 
|
236  | 
else:  | 
|
237  | 
self._add_patterns([pat], _sub_basename,  | 
|
238  | 
prefix=r'(?:.*/)?(?!.*/)')  | 
|
239  | 
||
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
240  | 
|
241  | 
def normalize_pattern(pattern):  | 
|
242  | 
"""Converts backslashes in path patterns to forward slashes.  | 
|
| 
2298.8.2
by Kent Gibson
 Review fixes for lp86451 patch.  | 
243  | 
    
 | 
244  | 
    Doesn't normalize regular expressions - they may contain escapes.
 | 
|
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
245  | 
    """
 | 
246  | 
if not pattern.startswith('RE:'):  | 
|
| 
2298.8.2
by Kent Gibson
 Review fixes for lp86451 patch.  | 
247  | 
pattern = pattern.replace('\\','/')  | 
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
248  | 
return pattern.rstrip('/')  |