1
# Copyright (C) 2006 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Tools for converting globs to regular expressions.
19
This module provides functions for converting shell-like globs to regular
25
from bzrlib.trace import (
31
class Replacer(object):
32
"""Do a multiple-pattern substitution.
34
The patterns and substitutions are combined into one, so the result of
35
one replacement is never substituted again. Add the patterns and
36
replacements via the add method and then call the object. The patterns
37
must not contain capturing groups.
40
_expand = re.compile(ur'\\&')
42
def __init__(self, source=None):
45
self._pats = list(source._pats)
46
self._funs = list(source._funs)
51
def add(self, pat, fun):
52
r"""Add a pattern and replacement.
54
The pattern must not contain capturing groups.
55
The replacement might be either a string template in which \& will be
56
replaced with the match, or a function that will get the matching text
57
as argument. It does not get match object, because capturing is
61
self._pats.append(pat)
62
self._funs.append(fun)
64
def add_replacer(self, replacer):
65
r"""Add all patterns from another replacer.
67
All patterns and replacements from replacer are appended to the ones
71
self._pats.extend(replacer._pats)
72
self._funs.extend(replacer._funs)
74
def __call__(self, text):
76
self._pat = re.compile(
77
u'|'.join([u'(%s)' % p for p in self._pats]),
79
return self._pat.sub(self._do_sub, text)
82
fun = self._funs[m.lastindex - 1]
83
if hasattr(fun, '__call__'):
84
return fun(m.group(0))
86
return self._expand.sub(m.group(0), fun)
89
_sub_named = Replacer()
90
_sub_named.add(ur'\[:digit:\]', ur'\d')
91
_sub_named.add(ur'\[:space:\]', ur'\s')
92
_sub_named.add(ur'\[:alnum:\]', ur'\w')
93
_sub_named.add(ur'\[:ascii:\]', ur'\0-\x7f')
94
_sub_named.add(ur'\[:blank:\]', ur' \t')
95
_sub_named.add(ur'\[:cntrl:\]', ur'\0-\x1f\x7f-\x9f')
98
_sub_leading_named = Replacer()
99
_sub_leading_named.add(ur'\[:ascii:\]', ur'\0-\x2d\x2f-\x7f')
100
_sub_leading_named.add_replacer(_sub_named)
104
if m[1] in (u'!', u'^'):
105
return u'[^' + _sub_named(m[2:-1]) + u']'
106
return u'[' + _sub_named(m[1:-1]) + u']'
109
def _sub_leading_group(m):
110
if m[1] in (u'!', u'^'):
111
return u'[^.' + _sub_named(m[2:-1]) + u']'
112
return u'[' + _sub_leading_named(m[1:-1]) + u']'
115
def _invalid_regex(repl):
117
warning(u"'%s' not allowed withing regexp. Replacing with '%s'" %
124
_sub_re.add(u'^RE:', u'')
125
_sub_re.add(u'\((?!\?)', u'(?:')
126
_sub_re.add(u'\(\?P<.*>', _invalid_regex(u'(?:'))
127
_sub_re.add(u'\(\?P=[^)]*\)', _invalid_regex(u''))
130
_sub_shell = Replacer()
131
_sub_shell.add(ur'^RE:.*', _sub_re) # RE:<anything> is a regex
132
_sub_shell.add(ur'(?:(?<=/)|^)\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]',
133
_sub_leading_group) # char group
134
_sub_shell.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group
135
_sub_shell.add(ur'(?:(?<=/)|^)(?:\.?/)+', u'') # canonicalize path
136
_sub_shell.add(ur'\\.', ur'\&') # keep anything backslashed
137
_sub_shell.add(ur'[(){}|^$+.]', ur'\\&') # escape specials
138
_sub_shell.add(ur'(?:(?<=/)|^)\*\.', ur'[^./][^/]*\.') # *. after /|^
139
_sub_shell.add(ur'(?:(?<=/)|^)\*', ur'(?:[^./][^/]*)?') # * after /|^
140
_sub_shell.add(ur'\*', ur'[^/]*') # * elsewhere
141
_sub_shell.add(ur'(?:(?<=/)|^)\?', ur'[^./]') # ? after /|^
142
_sub_shell.add(ur'\?', ur'[^/]') # ? elsewhere
145
_sub_shell_basename = Replacer()
146
_sub_shell_basename.add(ur'^\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]',
147
_sub_leading_group) # char group
148
_sub_shell_basename.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]',
149
_sub_group) # char group
150
_sub_shell_basename.add(ur'\\.', ur'\&') # keep anything backslashed
151
_sub_shell_basename.add(ur'[(){}|^$+.]', ur'\\&') # escape specials
152
_sub_shell_basename.add(ur'^\*\.', ur'[^.].*\.') # *. after ^
153
_sub_shell_basename.add(ur'^\*', ur'(?:[^.].*)?') # * after ^
154
_sub_shell_basename.add(ur'\*', ur'.*') # * elsewhere
155
_sub_shell_basename.add(ur'^\?', ur'[^.]') # ? after ^
156
_sub_shell_basename.add(ur'\?', ur'.') # ? elsewhere
159
def _sub_shell_extension(pattern):
160
return _sub_shell_basename(pattern[2:])
163
class Globster(object):
164
"""A simple wrapper for a set of glob patterns.
166
Provides the capability to search the patterns to find a match for
167
a given filename (including the full path).
169
Patterns are translated to regular expressions to expidite matching.
171
The regular expressions for multiple patterns are aggregated into
172
a super-regex containing groups of up to 99 patterns.
173
The 99 limitation is due to the grouping limit of the Python re module.
174
The resulting super-regex and associated patterns are stored as a list of
175
(regex,[patterns]) in _regex_patterns.
177
For performance reasons the patterns are categorised as extension patterns
178
(those that match against a file extension), basename patterns
179
(those that match against the basename of the filename),
180
and fullpath patterns (those that match against the full path).
181
The regexs used for extensions and basenames are relatively simpler
182
and therefore faster to perform than the fullpath patterns.
184
Also, the extension patterns are more likely to find a match and
185
so are matched first, then the basename patterns, then the fullpath
188
def __init__(self, patterns):
189
self._regex_patterns = []
194
if pat.startswith(u'RE:') or u'/' in pat:
195
path_patterns.append(pat)
196
elif pat.startswith(u'*.'):
197
ext_patterns.append(pat)
199
base_patterns.append(pat)
200
self._add_patterns(ext_patterns,_sub_shell_extension,
201
prefix=r'(?:.*/)?(?!.*/)(?:[^.].*\.)')
202
self._add_patterns(base_patterns,_sub_shell_basename,
203
prefix=r'(?:.*/)?(?!.*/)')
204
self._add_patterns(path_patterns,_sub_shell)
206
def _add_patterns(self, patterns, translator, prefix=''):
208
grouped_rules = ['(%s)' % translator(pat) for pat in patterns[:99]]
209
joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules))
210
self._regex_patterns.append((re.compile(joined_rule, re.UNICODE),
212
patterns = patterns[99:]
214
def match(self, filename):
215
"""Searches for a pattern that matches the given filename.
217
:return A matching pattern or None if there is no matching pattern.
219
for regex, patterns in self._regex_patterns:
220
match = regex.match(filename)
222
return patterns[match.lastindex -1]