bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
4763.2.4
by John Arbash Meinel
 merge bzr.2.1 in preparation for NEWS entry.  | 
1  | 
# Copyright (C) 2006-2010 Canonical Ltd
 | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
2  | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
| 
4183.7.1
by Sabin Iacob
 update FSF mailing address  | 
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
16  | 
|
17  | 
"""Tools for converting globs to regular expressions.
 | 
|
18  | 
||
19  | 
This module provides functions for converting shell-like globs to regular
 | 
|
20  | 
expressions.
 | 
|
21  | 
"""
 | 
|
22  | 
||
23  | 
import re  | 
|
24  | 
||
25  | 
from bzrlib.trace import (  | 
|
26  | 
    warning
 | 
|
27  | 
    )
 | 
|
28  | 
||
29  | 
||
30  | 
class Replacer(object):  | 
|
31  | 
"""Do a multiple-pattern substitution.  | 
|
32  | 
||
33  | 
    The patterns and substitutions are combined into one, so the result of
 | 
|
34  | 
    one replacement is never substituted again. Add the patterns and
 | 
|
35  | 
    replacements via the add method and then call the object. The patterns
 | 
|
36  | 
    must not contain capturing groups.
 | 
|
37  | 
    """
 | 
|
38  | 
||
39  | 
_expand = re.compile(ur'\\&')  | 
|
40  | 
||
41  | 
def __init__(self, source=None):  | 
|
42  | 
self._pat = None  | 
|
43  | 
if source:  | 
|
44  | 
self._pats = list(source._pats)  | 
|
45  | 
self._funs = list(source._funs)  | 
|
46  | 
else:  | 
|
47  | 
self._pats = []  | 
|
48  | 
self._funs = []  | 
|
49  | 
||
50  | 
def add(self, pat, fun):  | 
|
51  | 
r"""Add a pattern and replacement.  | 
|
52  | 
||
53  | 
        The pattern must not contain capturing groups.
 | 
|
54  | 
        The replacement might be either a string template in which \& will be
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
55  | 
        replaced with the match, or a function that will get the matching text
 | 
56  | 
        as argument. It does not get match object, because capturing is
 | 
|
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
57  | 
        forbidden anyway.
 | 
58  | 
        """
 | 
|
59  | 
self._pat = None  | 
|
60  | 
self._pats.append(pat)  | 
|
61  | 
self._funs.append(fun)  | 
|
62  | 
||
63  | 
def add_replacer(self, replacer):  | 
|
64  | 
r"""Add all patterns from another replacer.  | 
|
65  | 
||
66  | 
        All patterns and replacements from replacer are appended to the ones
 | 
|
67  | 
        already defined.
 | 
|
68  | 
        """
 | 
|
69  | 
self._pat = None  | 
|
70  | 
self._pats.extend(replacer._pats)  | 
|
71  | 
self._funs.extend(replacer._funs)  | 
|
72  | 
||
73  | 
def __call__(self, text):  | 
|
74  | 
if not self._pat:  | 
|
75  | 
self._pat = re.compile(  | 
|
76  | 
u'|'.join([u'(%s)' % p for p in self._pats]),  | 
|
77  | 
re.UNICODE)  | 
|
78  | 
return self._pat.sub(self._do_sub, text)  | 
|
79  | 
||
80  | 
def _do_sub(self, m):  | 
|
81  | 
fun = self._funs[m.lastindex - 1]  | 
|
82  | 
if hasattr(fun, '__call__'):  | 
|
83  | 
return fun(m.group(0))  | 
|
84  | 
else:  | 
|
85  | 
return self._expand.sub(m.group(0), fun)  | 
|
86  | 
||
87  | 
||
88  | 
_sub_named = Replacer()  | 
|
89  | 
_sub_named.add(ur'\[:digit:\]', ur'\d')  | 
|
90  | 
_sub_named.add(ur'\[:space:\]', ur'\s')  | 
|
91  | 
_sub_named.add(ur'\[:alnum:\]', ur'\w')  | 
|
92  | 
_sub_named.add(ur'\[:ascii:\]', ur'\0-\x7f')  | 
|
93  | 
_sub_named.add(ur'\[:blank:\]', ur' \t')  | 
|
94  | 
_sub_named.add(ur'\[:cntrl:\]', ur'\0-\x1f\x7f-\x9f')  | 
|
95  | 
||
96  | 
||
97  | 
def _sub_group(m):  | 
|
98  | 
if m[1] in (u'!', u'^'):  | 
|
99  | 
return u'[^' + _sub_named(m[2:-1]) + u']'  | 
|
100  | 
return u'[' + _sub_named(m[1:-1]) + u']'  | 
|
101  | 
||
102  | 
||
103  | 
def _invalid_regex(repl):  | 
|
104  | 
def _(m):  | 
|
| 
2135.2.7
by Kent Gibson
 Implement JAM's review suggestions.  | 
105  | 
warning(u"'%s' not allowed within a regular expression. "  | 
106  | 
"Replacing with '%s'" % (m, repl))  | 
|
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
107  | 
return repl  | 
108  | 
return _  | 
|
109  | 
||
110  | 
||
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
111  | 
def _trailing_backslashes_regex(m):  | 
| 
2298.8.2
by Kent Gibson
 Review fixes for lp86451 patch.  | 
112  | 
"""Check trailing backslashes.  | 
113  | 
||
114  | 
    Does a head count on trailing backslashes to ensure there isn't an odd
 | 
|
115  | 
    one on the end that would escape the brackets we wrap the RE in.
 | 
|
116  | 
    """
 | 
|
117  | 
if (len(m) % 2) != 0:  | 
|
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
118  | 
warning(u"Regular expressions cannot end with an odd number of '\\'. "  | 
119  | 
"Dropping the final '\\'.")  | 
|
120  | 
return m[:-1]  | 
|
121  | 
return m  | 
|
122  | 
||
123  | 
||
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
124  | 
_sub_re = Replacer()  | 
125  | 
_sub_re.add(u'^RE:', u'')  | 
|
126  | 
_sub_re.add(u'\((?!\?)', u'(?:')  | 
|
127  | 
_sub_re.add(u'\(\?P<.*>', _invalid_regex(u'(?:'))  | 
|
128  | 
_sub_re.add(u'\(\?P=[^)]*\)', _invalid_regex(u''))  | 
|
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
129  | 
_sub_re.add(ur'\\+$', _trailing_backslashes_regex)  | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
130  | 
|
131  | 
||
| 
2135.2.2
by Kent Gibson
 Ignore pattern matcher (glob.py) patches:  | 
132  | 
_sub_fullpath = Replacer()  | 
133  | 
_sub_fullpath.add(ur'^RE:.*', _sub_re) # RE:<anything> is a regex  | 
|
134  | 
_sub_fullpath.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group  | 
|
135  | 
_sub_fullpath.add(ur'(?:(?<=/)|^)(?:\.?/)+', u'') # canonicalize path  | 
|
136  | 
_sub_fullpath.add(ur'\\.', ur'\&') # keep anything backslashed  | 
|
137  | 
_sub_fullpath.add(ur'[(){}|^$+.]', ur'\\&') # escape specials  | 
|
138  | 
_sub_fullpath.add(ur'(?:(?<=/)|^)\*\*+/', ur'(?:.*/)?') # **/ after ^ or /  | 
|
139  | 
_sub_fullpath.add(ur'\*+', ur'[^/]*') # * elsewhere  | 
|
140  | 
_sub_fullpath.add(ur'\?', ur'[^/]') # ? everywhere  | 
|
141  | 
||
142  | 
||
143  | 
_sub_basename = Replacer()  | 
|
144  | 
_sub_basename.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group  | 
|
145  | 
_sub_basename.add(ur'\\.', ur'\&') # keep anything backslashed  | 
|
146  | 
_sub_basename.add(ur'[(){}|^$+.]', ur'\\&') # escape specials  | 
|
147  | 
_sub_basename.add(ur'\*+', ur'.*') # * everywhere  | 
|
148  | 
_sub_basename.add(ur'\?', ur'.') # ? everywhere  | 
|
149  | 
||
150  | 
||
151  | 
def _sub_extension(pattern):  | 
|
152  | 
return _sub_basename(pattern[2:])  | 
|
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
153  | 
|
154  | 
||
155  | 
class Globster(object):  | 
|
156  | 
"""A simple wrapper for a set of glob patterns.  | 
|
157  | 
||
158  | 
    Provides the capability to search the patterns to find a match for
 | 
|
159  | 
    a given filename (including the full path).
 | 
|
160  | 
||
161  | 
    Patterns are translated to regular expressions to expidite matching.
 | 
|
162  | 
||
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
163  | 
    The regular expressions for multiple patterns are aggregated into
 | 
164  | 
    a super-regex containing groups of up to 99 patterns.
 | 
|
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
165  | 
    The 99 limitation is due to the grouping limit of the Python re module.
 | 
166  | 
    The resulting super-regex and associated patterns are stored as a list of
 | 
|
167  | 
    (regex,[patterns]) in _regex_patterns.
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
168  | 
|
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
169  | 
    For performance reasons the patterns are categorised as extension patterns
 | 
170  | 
    (those that match against a file extension), basename patterns
 | 
|
171  | 
    (those that match against the basename of the filename),
 | 
|
172  | 
    and fullpath patterns (those that match against the full path).
 | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
173  | 
    The translations used for extensions and basenames are relatively simpler
 | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
174  | 
    and therefore faster to perform than the fullpath patterns.
 | 
175  | 
||
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
176  | 
    Also, the extension patterns are more likely to find a match and
 | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
177  | 
    so are matched first, then the basename patterns, then the fullpath
 | 
178  | 
    patterns.
 | 
|
179  | 
    """
 | 
|
180  | 
def __init__(self, patterns):  | 
|
181  | 
self._regex_patterns = []  | 
|
182  | 
path_patterns = []  | 
|
183  | 
base_patterns = []  | 
|
184  | 
ext_patterns = []  | 
|
185  | 
for pat in patterns:  | 
|
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
186  | 
pat = normalize_pattern(pat)  | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
187  | 
if pat.startswith(u'RE:') or u'/' in pat:  | 
188  | 
path_patterns.append(pat)  | 
|
189  | 
elif pat.startswith(u'*.'):  | 
|
190  | 
ext_patterns.append(pat)  | 
|
191  | 
else:  | 
|
192  | 
base_patterns.append(pat)  | 
|
| 
2135.2.2
by Kent Gibson
 Ignore pattern matcher (glob.py) patches:  | 
193  | 
self._add_patterns(ext_patterns,_sub_extension,  | 
194  | 
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
195  | 
self._add_patterns(base_patterns,_sub_basename,  | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
196  | 
prefix=r'(?:.*/)?(?!.*/)')  | 
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
197  | 
self._add_patterns(path_patterns,_sub_fullpath)  | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
198  | 
|
199  | 
def _add_patterns(self, patterns, translator, prefix=''):  | 
|
200  | 
while patterns:  | 
|
201  | 
grouped_rules = ['(%s)' % translator(pat) for pat in patterns[:99]]  | 
|
202  | 
joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules))  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
203  | 
self._regex_patterns.append((re.compile(joined_rule, re.UNICODE),  | 
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
204  | 
patterns[:99]))  | 
205  | 
patterns = patterns[99:]  | 
|
206  | 
||
207  | 
def match(self, filename):  | 
|
208  | 
"""Searches for a pattern that matches the given filename.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
209  | 
|
| 
2135.2.1
by Kent Gibson
 Added glob module to replace broken fnmatch based ignore pattern matching (#57637)  | 
210  | 
        :return A matching pattern or None if there is no matching pattern.
 | 
211  | 
        """
 | 
|
212  | 
for regex, patterns in self._regex_patterns:  | 
|
213  | 
match = regex.match(filename)  | 
|
214  | 
if match:  | 
|
215  | 
return patterns[match.lastindex -1]  | 
|
216  | 
return None  | 
|
| 
3398.1.1
by Ian Clatworthy
 simplify the custom Globster to only care about ordering  | 
217  | 
|
| 
4948.5.7
by John Whitley
 Terminology change: exclusion => exception.  | 
218  | 
class ExceptionGlobster(object):  | 
219  | 
"""A Globster that supports exception patterns.  | 
|
| 
4948.5.5
by John Whitley
 Add descriptive text to ExcludingGlobster.  | 
220  | 
    
 | 
| 
4948.5.7
by John Whitley
 Terminology change: exclusion => exception.  | 
221  | 
    Exceptions are ignore patterns prefixed with '!'.  Exception
 | 
222  | 
    patterns take precedence over regular patterns and cause a 
 | 
|
223  | 
    matching filename to return None from the match() function.  
 | 
|
224  | 
    Patterns using a '!!' prefix are highest precedence, and act 
 | 
|
225  | 
    as regular ignores. '!!' patterns are useful to establish ignores
 | 
|
226  | 
    that apply under paths specified by '!' exception patterns.
 | 
|
| 
4948.5.5
by John Whitley
 Add descriptive text to ExcludingGlobster.  | 
227  | 
    """
 | 
| 
4948.5.3
by John Whitley
 Refactor the exclusion handling functionality out of  | 
228  | 
|
229  | 
def __init__(self,patterns):  | 
|
| 
4948.5.7
by John Whitley
 Terminology change: exclusion => exception.  | 
230  | 
ignores = [[], [], []]  | 
| 
4948.5.3
by John Whitley
 Refactor the exclusion handling functionality out of  | 
231  | 
for p in patterns:  | 
| 
4948.5.6
by John Whitley
 A trial implementation of '!!' syntax for double-negative ignore exclusions.  | 
232  | 
if p.startswith(u'!!'):  | 
| 
4948.5.7
by John Whitley
 Terminology change: exclusion => exception.  | 
233  | 
ignores[2].append(p[2:])  | 
| 
4948.5.6
by John Whitley
 A trial implementation of '!!' syntax for double-negative ignore exclusions.  | 
234  | 
elif p.startswith(u'!'):  | 
| 
4948.5.7
by John Whitley
 Terminology change: exclusion => exception.  | 
235  | 
ignores[1].append(p[1:])  | 
| 
4948.5.3
by John Whitley
 Refactor the exclusion handling functionality out of  | 
236  | 
else:  | 
| 
4948.5.7
by John Whitley
 Terminology change: exclusion => exception.  | 
237  | 
ignores[0].append(p)  | 
238  | 
self._ignores = [Globster(i) for i in ignores]  | 
|
| 
4948.5.3
by John Whitley
 Refactor the exclusion handling functionality out of  | 
239  | 
|
240  | 
def match(self, filename):  | 
|
241  | 
"""Searches for a pattern that matches the given filename.  | 
|
242  | 
||
243  | 
        :return A matching pattern or None if there is no matching pattern.
 | 
|
244  | 
        """
 | 
|
| 
4948.5.7
by John Whitley
 Terminology change: exclusion => exception.  | 
245  | 
double_neg = self._ignores[2].match(filename)  | 
| 
4948.5.6
by John Whitley
 A trial implementation of '!!' syntax for double-negative ignore exclusions.  | 
246  | 
if double_neg:  | 
247  | 
return "!!%s" % double_neg  | 
|
| 
4948.5.7
by John Whitley
 Terminology change: exclusion => exception.  | 
248  | 
elif self._ignores[1].match(filename):  | 
| 
4948.5.3
by John Whitley
 Refactor the exclusion handling functionality out of  | 
249  | 
return None  | 
250  | 
else:  | 
|
| 
4948.5.7
by John Whitley
 Terminology change: exclusion => exception.  | 
251  | 
return self._ignores[0].match(filename)  | 
| 
3398.1.1
by Ian Clatworthy
 simplify the custom Globster to only care about ordering  | 
252  | 
|
253  | 
class _OrderedGlobster(Globster):  | 
|
254  | 
"""A Globster that keeps pattern order."""  | 
|
255  | 
||
256  | 
def __init__(self, patterns):  | 
|
257  | 
"""Constructor.  | 
|
258  | 
||
259  | 
        :param patterns: sequence of glob patterns
 | 
|
260  | 
        """
 | 
|
261  | 
        # Note: This could be smarter by running like sequences together
 | 
|
262  | 
self._regex_patterns = []  | 
|
263  | 
for pat in patterns:  | 
|
264  | 
pat = normalize_pattern(pat)  | 
|
265  | 
if pat.startswith(u'RE:') or u'/' in pat:  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
266  | 
self._add_patterns([pat], _sub_fullpath)  | 
| 
3398.1.1
by Ian Clatworthy
 simplify the custom Globster to only care about ordering  | 
267  | 
elif pat.startswith(u'*.'):  | 
268  | 
self._add_patterns([pat], _sub_extension,  | 
|
269  | 
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')  | 
|
270  | 
else:  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
271  | 
self._add_patterns([pat], _sub_basename,  | 
| 
3398.1.1
by Ian Clatworthy
 simplify the custom Globster to only care about ordering  | 
272  | 
prefix=r'(?:.*/)?(?!.*/)')  | 
273  | 
||
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
274  | 
|
| 
4792.4.1
by Gordon Tyler
 Fixed globbing.normalize_pattern to not strip '/' down to '' and normalize multiple slashes.  | 
275  | 
_slashes = re.compile(r'[\\/]+')  | 
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
276  | 
def normalize_pattern(pattern):  | 
277  | 
"""Converts backslashes in path patterns to forward slashes.  | 
|
| 
3943.8.1
by Marius Kruger
 remove all trailing whitespace from bzr source  | 
278  | 
|
| 
2298.8.2
by Kent Gibson
 Review fixes for lp86451 patch.  | 
279  | 
    Doesn't normalize regular expressions - they may contain escapes.
 | 
| 
2298.8.1
by Kent Gibson
 Normalise ignore patterns to use '/' path separator.  | 
280  | 
    """
 | 
| 
4948.5.4
by John Whitley
 bzrlib.globbing.normalize_pattern needed fix to avoid mangling ignore  | 
281  | 
if not (pattern.startswith('RE:') or pattern.startswith('!RE:')):  | 
| 
4792.4.1
by Gordon Tyler
 Fixed globbing.normalize_pattern to not strip '/' down to '' and normalize multiple slashes.  | 
282  | 
pattern = _slashes.sub('/', pattern)  | 
283  | 
if len(pattern) > 1:  | 
|
284  | 
pattern = pattern.rstrip('/')  | 
|
285  | 
return pattern  |