92
88
_sub_named = Replacer()
93
_sub_named.add(r'\[:digit:\]', r'\d')
94
_sub_named.add(r'\[:space:\]', r'\s')
95
_sub_named.add(r'\[:alnum:\]', r'\w')
96
_sub_named.add(r'\[:ascii:\]', r'\0-\x7f')
97
_sub_named.add(r'\[:blank:\]', r' \t')
98
_sub_named.add(r'\[:cntrl:\]', r'\0-\x1f\x7f-\x9f')
89
_sub_named.add(ur'\[:digit:\]', ur'\d')
90
_sub_named.add(ur'\[:space:\]', ur'\s')
91
_sub_named.add(ur'\[:alnum:\]', ur'\w')
92
_sub_named.add(ur'\[:ascii:\]', ur'\0-\x7f')
93
_sub_named.add(ur'\[:blank:\]', ur' \t')
94
_sub_named.add(ur'\[:cntrl:\]', ur'\0-\x1f\x7f-\x9f')
101
97
def _sub_group(m):
128
124
_sub_re = Replacer()
129
125
_sub_re.add(u'^RE:', u'')
130
_sub_re.add(u'\\((?!\\?)', u'(?:')
131
_sub_re.add(u'\\(\\?P<.*>', _invalid_regex(u'(?:'))
132
_sub_re.add(u'\\(\\?P=[^)]*\\)', _invalid_regex(u''))
133
_sub_re.add(r'\\+$', _trailing_backslashes_regex)
126
_sub_re.add(u'\((?!\?)', u'(?:')
127
_sub_re.add(u'\(\?P<.*>', _invalid_regex(u'(?:'))
128
_sub_re.add(u'\(\?P=[^)]*\)', _invalid_regex(u''))
129
_sub_re.add(ur'\\+$', _trailing_backslashes_regex)
136
132
_sub_fullpath = Replacer()
137
_sub_fullpath.add(r'^RE:.*', _sub_re) # RE:<anything> is a regex
138
_sub_fullpath.add(r'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]',
139
_sub_group) # char group
140
_sub_fullpath.add(r'(?:(?<=/)|^)(?:\.?/)+', u'') # canonicalize path
141
_sub_fullpath.add(r'\\.', r'\&') # keep anything backslashed
142
_sub_fullpath.add(r'[(){}|^$+.]', r'\\&') # escape specials
143
_sub_fullpath.add(r'(?:(?<=/)|^)\*\*+/', r'(?:.*/)?') # **/ after ^ or /
144
_sub_fullpath.add(r'\*+', r'[^/]*') # * elsewhere
145
_sub_fullpath.add(r'\?', r'[^/]') # ? everywhere
133
_sub_fullpath.add(ur'^RE:.*', _sub_re) # RE:<anything> is a regex
134
_sub_fullpath.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group
135
_sub_fullpath.add(ur'(?:(?<=/)|^)(?:\.?/)+', u'') # canonicalize path
136
_sub_fullpath.add(ur'\\.', ur'\&') # keep anything backslashed
137
_sub_fullpath.add(ur'[(){}|^$+.]', ur'\\&') # escape specials
138
_sub_fullpath.add(ur'(?:(?<=/)|^)\*\*+/', ur'(?:.*/)?') # **/ after ^ or /
139
_sub_fullpath.add(ur'\*+', ur'[^/]*') # * elsewhere
140
_sub_fullpath.add(ur'\?', ur'[^/]') # ? everywhere
148
143
_sub_basename = Replacer()
149
_sub_basename.add(r'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]',
150
_sub_group) # char group
151
_sub_basename.add(r'\\.', r'\&') # keep anything backslashed
152
_sub_basename.add(r'[(){}|^$+.]', r'\\&') # escape specials
153
_sub_basename.add(r'\*+', r'.*') # * everywhere
154
_sub_basename.add(r'\?', r'.') # ? everywhere
144
_sub_basename.add(ur'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]', _sub_group) # char group
145
_sub_basename.add(ur'\\.', ur'\&') # keep anything backslashed
146
_sub_basename.add(ur'[(){}|^$+.]', ur'\\&') # escape specials
147
_sub_basename.add(ur'\*+', ur'.*') # * everywhere
148
_sub_basename.add(ur'\?', ur'.') # ? everywhere
157
151
def _sub_extension(pattern):
183
177
so are matched first, then the basename patterns, then the fullpath
186
# We want to _add_patterns in a specific order (as per type_list below)
187
# starting with the shortest and going to the longest.
188
# As some Python version don't support ordered dicts the list below is
189
# used to select inputs for _add_pattern in a specific order.
190
pattern_types = ["extension", "basename", "fullpath"]
194
"translator": _sub_extension,
195
"prefix": r'(?:.*/)?(?!.*/)(?:.*\.)'
198
"translator": _sub_basename,
199
"prefix": r'(?:.*/)?(?!.*/)'
202
"translator": _sub_fullpath,
207
180
def __init__(self, patterns):
208
181
self._regex_patterns = []
214
185
for pat in patterns:
215
186
pat = normalize_pattern(pat)
216
pattern_lists[Globster.identify(pat)].append(pat)
217
pi = Globster.pattern_info
218
for t in Globster.pattern_types:
219
self._add_patterns(pattern_lists[t], pi[t]["translator"],
187
if pat.startswith(u'RE:') or u'/' in pat:
188
path_patterns.append(pat)
189
elif pat.startswith(u'*.'):
190
ext_patterns.append(pat)
192
base_patterns.append(pat)
193
self._add_patterns(ext_patterns,_sub_extension,
194
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')
195
self._add_patterns(base_patterns,_sub_basename,
196
prefix=r'(?:.*/)?(?!.*/)')
197
self._add_patterns(path_patterns,_sub_fullpath)
222
199
def _add_patterns(self, patterns, translator, prefix=''):
225
'(%s)' % translator(pat) for pat in patterns[:99]]
201
grouped_rules = ['(%s)' % translator(pat) for pat in patterns[:99]]
226
202
joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules))
227
# Explicitly use lazy_compile here, because we count on its
228
# nicer error reporting.
229
self._regex_patterns.append((
230
lazy_regex.lazy_compile(joined_rule, re.UNICODE),
203
self._regex_patterns.append((re.compile(joined_rule, re.UNICODE),
232
205
patterns = patterns[99:]
237
210
:return A matching pattern or None if there is no matching pattern.
240
for regex, patterns in self._regex_patterns:
241
match = regex.match(filename)
243
return patterns[match.lastindex - 1]
244
except lazy_regex.InvalidPattern as e:
245
# We can't show the default e.msg to the user as thats for
246
# the combined pattern we sent to regex. Instead we indicate to
247
# the user that an ignore file needs fixing.
248
mutter('Invalid pattern found in regex: %s.', e.msg)
250
"File ~/.config/breezy/ignore or "
251
".bzrignore contains error(s).")
253
for _, patterns in self._regex_patterns:
255
if not Globster.is_pattern_valid(p):
256
bad_patterns += ('\n %s' % p)
257
e.msg += bad_patterns
212
for regex, patterns in self._regex_patterns:
213
match = regex.match(filename)
215
return patterns[match.lastindex -1]
262
def identify(pattern):
263
"""Returns pattern category.
265
:param pattern: normalized pattern.
266
Identify if a pattern is fullpath, basename or extension
267
and returns the appropriate type.
269
if pattern.startswith(u'RE:') or u'/' in pattern:
271
elif pattern.startswith(u'*.'):
277
def is_pattern_valid(pattern):
278
"""Returns True if pattern is valid.
280
:param pattern: Normalized pattern.
281
is_pattern_valid() assumes pattern to be normalized.
282
see: globbing.normalize_pattern
285
translator = Globster.pattern_info[Globster.identify(
286
pattern)]["translator"]
287
tpattern = '(%s)' % translator(pattern)
289
re_obj = lazy_regex.lazy_compile(tpattern, re.UNICODE)
290
re_obj.search("") # force compile
291
except lazy_regex.InvalidPattern:
296
class ExceptionGlobster(object):
297
"""A Globster that supports exception patterns.
299
Exceptions are ignore patterns prefixed with '!'. Exception
300
patterns take precedence over regular patterns and cause a
301
matching filename to return None from the match() function.
302
Patterns using a '!!' prefix are highest precedence, and act
303
as regular ignores. '!!' patterns are useful to establish ignores
304
that apply under paths specified by '!' exception patterns.
307
def __init__(self, patterns):
308
ignores = [[], [], []]
310
if p.startswith(u'!!'):
311
ignores[2].append(p[2:])
312
elif p.startswith(u'!'):
313
ignores[1].append(p[1:])
316
self._ignores = [Globster(i) for i in ignores]
318
def match(self, filename):
319
"""Searches for a pattern that matches the given filename.
321
:return A matching pattern or None if there is no matching pattern.
323
double_neg = self._ignores[2].match(filename)
325
return "!!%s" % double_neg
326
elif self._ignores[1].match(filename):
329
return self._ignores[0].match(filename)
332
219
class _OrderedGlobster(Globster):
333
220
"""A Globster that keeps pattern order."""
341
228
self._regex_patterns = []
342
229
for pat in patterns:
343
230
pat = normalize_pattern(pat)
344
t = Globster.identify(pat)
345
self._add_patterns([pat], Globster.pattern_info[t]["translator"],
346
Globster.pattern_info[t]["prefix"])
349
_slashes = lazy_regex.lazy_compile(r'[\\/]+')
231
if pat.startswith(u'RE:') or u'/' in pat:
232
self._add_patterns([pat], _sub_fullpath)
233
elif pat.startswith(u'*.'):
234
self._add_patterns([pat], _sub_extension,
235
prefix=r'(?:.*/)?(?!.*/)(?:.*\.)')
237
self._add_patterns([pat], _sub_basename,
238
prefix=r'(?:.*/)?(?!.*/)')
352
241
def normalize_pattern(pattern):