/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5967.9.3 by Martin Pool
Explicitly use lazy_regexp where we count on its error reporting behaviour
1
# Copyright (C) 2006-2011 Canonical Ltd
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
2
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
16
17
"""Tools for converting globs to regular expressions.
18
19
This module provides functions for converting shell-like globs to regular
20
expressions.
21
"""
22
23
import re
24
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
25
from . import (
5967.9.3 by Martin Pool
Explicitly use lazy_regexp where we count on its error reporting behaviour
26
    lazy_regex,
27
    )
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
28
from .trace import (
5326.2.7 by Parth Malwankar
Globster now mutters regex failure message before changing message
29
    mutter,
5326.2.1 by Parth Malwankar
added InvalidPattern error.
30
    warning,
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
31
    )
32
33
34
class Replacer(object):
35
    """Do a multiple-pattern substitution.
36
37
    The patterns and substitutions are combined into one, so the result of
38
    one replacement is never substituted again. Add the patterns and
39
    replacements via the add method and then call the object. The patterns
40
    must not contain capturing groups.
41
    """
42
6789.1.1 by Martin
Avoid combined ur string prefix when globbing
43
    _expand = lazy_regex.lazy_compile(u'\\\\&')
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
44
45
    def __init__(self, source=None):
46
        self._pat = None
47
        if source:
48
            self._pats = list(source._pats)
49
            self._funs = list(source._funs)
50
        else:
51
            self._pats = []
52
            self._funs = []
53
54
    def add(self, pat, fun):
55
        r"""Add a pattern and replacement.
56
57
        The pattern must not contain capturing groups.
58
        The replacement might be either a string template in which \& will be
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
59
        replaced with the match, or a function that will get the matching text
60
        as argument. It does not get match object, because capturing is
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
61
        forbidden anyway.
62
        """
63
        self._pat = None
64
        self._pats.append(pat)
65
        self._funs.append(fun)
66
67
    def add_replacer(self, replacer):
68
        r"""Add all patterns from another replacer.
69
70
        All patterns and replacements from replacer are appended to the ones
71
        already defined.
72
        """
73
        self._pat = None
74
        self._pats.extend(replacer._pats)
75
        self._funs.extend(replacer._funs)
76
77
    def __call__(self, text):
78
        if not self._pat:
5967.9.5 by Martin Pool
More explicit laziness
79
            self._pat = lazy_regex.lazy_compile(
7143.15.2 by Jelmer Vernooij
Run autopep8.
80
                u'|'.join([u'(%s)' % p for p in self._pats]),
81
                re.UNICODE)
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
82
        return self._pat.sub(self._do_sub, text)
83
84
    def _do_sub(self, m):
85
        fun = self._funs[m.lastindex - 1]
86
        if hasattr(fun, '__call__'):
87
            return fun(m.group(0))
88
        else:
89
            return self._expand.sub(m.group(0), fun)
90
91
92
_sub_named = Replacer()
6789.1.1 by Martin
Avoid combined ur string prefix when globbing
93
_sub_named.add(r'\[:digit:\]', r'\d')
94
_sub_named.add(r'\[:space:\]', r'\s')
95
_sub_named.add(r'\[:alnum:\]', r'\w')
96
_sub_named.add(r'\[:ascii:\]', r'\0-\x7f')
97
_sub_named.add(r'\[:blank:\]', r' \t')
98
_sub_named.add(r'\[:cntrl:\]', r'\0-\x1f\x7f-\x9f')
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
99
100
101
def _sub_group(m):
102
    if m[1] in (u'!', u'^'):
103
        return u'[^' + _sub_named(m[2:-1]) + u']'
104
    return u'[' + _sub_named(m[1:-1]) + u']'
105
106
107
def _invalid_regex(repl):
108
    def _(m):
2135.2.7 by Kent Gibson
Implement JAM's review suggestions.
109
        warning(u"'%s' not allowed within a regular expression. "
110
                "Replacing with '%s'" % (m, repl))
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
111
        return repl
112
    return _
113
114
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
115
def _trailing_backslashes_regex(m):
2298.8.2 by Kent Gibson
Review fixes for lp86451 patch.
116
    """Check trailing backslashes.
117
118
    Does a head count on trailing backslashes to ensure there isn't an odd
119
    one on the end that would escape the brackets we wrap the RE in.
120
    """
121
    if (len(m) % 2) != 0:
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
122
        warning(u"Regular expressions cannot end with an odd number of '\\'. "
123
                "Dropping the final '\\'.")
124
        return m[:-1]
125
    return m
126
127
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
128
_sub_re = Replacer()
129
_sub_re.add(u'^RE:', u'')
6798.1.1 by Jelmer Vernooij
Properly escape backslashes.
130
_sub_re.add(u'\\((?!\\?)', u'(?:')
131
_sub_re.add(u'\\(\\?P<.*>', _invalid_regex(u'(?:'))
132
_sub_re.add(u'\\(\\?P=[^)]*\\)', _invalid_regex(u''))
6789.1.1 by Martin
Avoid combined ur string prefix when globbing
133
_sub_re.add(r'\\+$', _trailing_backslashes_regex)
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
134
135
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
136
_sub_fullpath = Replacer()
7143.15.2 by Jelmer Vernooij
Run autopep8.
137
_sub_fullpath.add(r'^RE:.*', _sub_re)  # RE:<anything> is a regex
138
_sub_fullpath.add(r'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]',
139
                  _sub_group)  # char group
140
_sub_fullpath.add(r'(?:(?<=/)|^)(?:\.?/)+', u'')  # canonicalize path
141
_sub_fullpath.add(r'\\.', r'\&')  # keep anything backslashed
142
_sub_fullpath.add(r'[(){}|^$+.]', r'\\&')  # escape specials
143
_sub_fullpath.add(r'(?:(?<=/)|^)\*\*+/', r'(?:.*/)?')  # **/ after ^ or /
144
_sub_fullpath.add(r'\*+', r'[^/]*')  # * elsewhere
145
_sub_fullpath.add(r'\?', r'[^/]')  # ? everywhere
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
146
147
148
_sub_basename = Replacer()
7143.15.2 by Jelmer Vernooij
Run autopep8.
149
_sub_basename.add(r'\[\^?\]?(?:[^][]|\[:[^]]+:\])+\]',
150
                  _sub_group)  # char group
151
_sub_basename.add(r'\\.', r'\&')  # keep anything backslashed
152
_sub_basename.add(r'[(){}|^$+.]', r'\\&')  # escape specials
153
_sub_basename.add(r'\*+', r'.*')  # * everywhere
154
_sub_basename.add(r'\?', r'.')  # ? everywhere
2135.2.2 by Kent Gibson
Ignore pattern matcher (glob.py) patches:
155
156
157
def _sub_extension(pattern):
158
    return _sub_basename(pattern[2:])
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
159
160
161
class Globster(object):
162
    """A simple wrapper for a set of glob patterns.
163
164
    Provides the capability to search the patterns to find a match for
165
    a given filename (including the full path).
166
167
    Patterns are translated to regular expressions to expidite matching.
168
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
169
    The regular expressions for multiple patterns are aggregated into
170
    a super-regex containing groups of up to 99 patterns.
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
171
    The 99 limitation is due to the grouping limit of the Python re module.
172
    The resulting super-regex and associated patterns are stored as a list of
173
    (regex,[patterns]) in _regex_patterns.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
174
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
175
    For performance reasons the patterns are categorised as extension patterns
176
    (those that match against a file extension), basename patterns
177
    (those that match against the basename of the filename),
178
    and fullpath patterns (those that match against the full path).
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
179
    The translations used for extensions and basenames are relatively simpler
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
180
    and therefore faster to perform than the fullpath patterns.
181
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
182
    Also, the extension patterns are more likely to find a match and
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
183
    so are matched first, then the basename patterns, then the fullpath
184
    patterns.
185
    """
5050.14.2 by Parth Malwankar
_add_patterns is now done in a specific order in Globster
186
    # We want to _add_patterns in a specific order (as per type_list below)
187
    # starting with the shortest and going to the longest.
188
    # As some Python version don't support ordered dicts the list below is
189
    # used to select inputs for _add_pattern in a specific order.
7143.15.2 by Jelmer Vernooij
Run autopep8.
190
    pattern_types = ["extension", "basename", "fullpath"]
5050.14.3 by Parth Malwankar
use dict for managining pattern information
191
192
    pattern_info = {
6809.1.1 by Martin
Apply 2to3 ws_comma fixer
193
        "extension": {
7143.15.2 by Jelmer Vernooij
Run autopep8.
194
            "translator": _sub_extension,
195
            "prefix": r'(?:.*/)?(?!.*/)(?:.*\.)'
5050.14.3 by Parth Malwankar
use dict for managining pattern information
196
        },
6809.1.1 by Martin
Apply 2to3 ws_comma fixer
197
        "basename": {
7143.15.2 by Jelmer Vernooij
Run autopep8.
198
            "translator": _sub_basename,
199
            "prefix": r'(?:.*/)?(?!.*/)'
5050.14.3 by Parth Malwankar
use dict for managining pattern information
200
        },
6809.1.1 by Martin
Apply 2to3 ws_comma fixer
201
        "fullpath": {
7143.15.2 by Jelmer Vernooij
Run autopep8.
202
            "translator": _sub_fullpath,
203
            "prefix": r''
5050.14.3 by Parth Malwankar
use dict for managining pattern information
204
        },
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
205
    }
206
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
207
    def __init__(self, patterns):
208
        self._regex_patterns = []
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
209
        pattern_lists = {
6809.1.1 by Martin
Apply 2to3 ws_comma fixer
210
            "extension": [],
211
            "basename": [],
212
            "fullpath": [],
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
213
        }
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
214
        for pat in patterns:
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
215
            pat = normalize_pattern(pat)
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
216
            pattern_lists[Globster.identify(pat)].append(pat)
5050.14.3 by Parth Malwankar
use dict for managining pattern information
217
        pi = Globster.pattern_info
218
        for t in Globster.pattern_types:
219
            self._add_patterns(pattern_lists[t], pi[t]["translator"],
7143.15.2 by Jelmer Vernooij
Run autopep8.
220
                               pi[t]["prefix"])
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
221
222
    def _add_patterns(self, patterns, translator, prefix=''):
223
        while patterns:
5967.9.3 by Martin Pool
Explicitly use lazy_regexp where we count on its error reporting behaviour
224
            grouped_rules = [
225
                '(%s)' % translator(pat) for pat in patterns[:99]]
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
226
            joined_rule = '%s(?:%s)$' % (prefix, '|'.join(grouped_rules))
5967.9.3 by Martin Pool
Explicitly use lazy_regexp where we count on its error reporting behaviour
227
            # Explicitly use lazy_compile here, because we count on its
228
            # nicer error reporting.
229
            self._regex_patterns.append((
230
                lazy_regex.lazy_compile(joined_rule, re.UNICODE),
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
231
                patterns[:99]))
232
            patterns = patterns[99:]
233
234
    def match(self, filename):
235
        """Searches for a pattern that matches the given filename.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
236
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
237
        :return A matching pattern or None if there is no matching pattern.
238
        """
5326.2.1 by Parth Malwankar
added InvalidPattern error.
239
        try:
240
            for regex, patterns in self._regex_patterns:
241
                match = regex.match(filename)
242
                if match:
7143.15.2 by Jelmer Vernooij
Run autopep8.
243
                    return patterns[match.lastindex - 1]
6729.3.1 by Jelmer Vernooij
Move lazy regex error to breezy.lazy_regex.
244
        except lazy_regex.InvalidPattern as e:
5339.1.1 by Parth Malwankar
fixes errors.InvalidPattern to work on Python2.5
245
            # We can't show the default e.msg to the user as thats for
5326.2.1 by Parth Malwankar
added InvalidPattern error.
246
            # the combined pattern we sent to regex. Instead we indicate to
247
            # the user that an ignore file needs fixing.
5339.1.1 by Parth Malwankar
fixes errors.InvalidPattern to work on Python2.5
248
            mutter('Invalid pattern found in regex: %s.', e.msg)
7290.2.1 by Jelmer Vernooij
Update references to home location.
249
            e.msg = (
250
                "File ~/.config/breezy/ignore or "
251
                ".bzrignore contains error(s).")
5339.3.2 by Parth Malwankar
Globster now prints specific patterns that are bad.
252
            bad_patterns = ''
253
            for _, patterns in self._regex_patterns:
254
                for p in patterns:
255
                    if not Globster.is_pattern_valid(p):
256
                        bad_patterns += ('\n  %s' % p)
5050.14.1 by Parth Malwankar
'bzr ignore' now fails on bad patterns. failing patterns are displayed.
257
            e.msg += bad_patterns
5326.2.1 by Parth Malwankar
added InvalidPattern error.
258
            raise e
2135.2.1 by Kent Gibson
Added glob module to replace broken fnmatch based ignore pattern matching (#57637)
259
        return None
3398.1.1 by Ian Clatworthy
simplify the custom Globster to only care about ordering
260
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
261
    @staticmethod
262
    def identify(pattern):
263
        """Returns pattern category.
264
265
        :param pattern: normalized pattern.
266
        Identify if a pattern is fullpath, basename or extension
267
        and returns the appropriate type.
268
        """
269
        if pattern.startswith(u'RE:') or u'/' in pattern:
5050.14.3 by Parth Malwankar
use dict for managining pattern information
270
            return "fullpath"
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
271
        elif pattern.startswith(u'*.'):
5050.14.3 by Parth Malwankar
use dict for managining pattern information
272
            return "extension"
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
273
        else:
5050.14.3 by Parth Malwankar
use dict for managining pattern information
274
            return "basename"
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
275
276
    @staticmethod
277
    def is_pattern_valid(pattern):
278
        """Returns True if pattern is valid.
279
280
        :param pattern: Normalized pattern.
281
        is_pattern_valid() assumes pattern to be normalized.
282
        see: globbing.normalize_pattern
283
        """
284
        result = True
7143.15.2 by Jelmer Vernooij
Run autopep8.
285
        translator = Globster.pattern_info[Globster.identify(
286
            pattern)]["translator"]
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
287
        tpattern = '(%s)' % translator(pattern)
288
        try:
5967.9.5 by Martin Pool
More explicit laziness
289
            re_obj = lazy_regex.lazy_compile(tpattern, re.UNICODE)
7143.15.2 by Jelmer Vernooij
Run autopep8.
290
            re_obj.search("")  # force compile
7143.15.5 by Jelmer Vernooij
More PEP8 fixes.
291
        except lazy_regex.InvalidPattern:
5339.3.1 by Parth Malwankar
'bzr ignore' now fails on bad pattern.
292
            result = False
293
        return result
294
295
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
296
class ExceptionGlobster(object):
297
    """A Globster that supports exception patterns.
7143.15.2 by Jelmer Vernooij
Run autopep8.
298
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
299
    Exceptions are ignore patterns prefixed with '!'.  Exception
7195.5.1 by Martin
Fix remaining whitespace lint in codebase
300
    patterns take precedence over regular patterns and cause a
301
    matching filename to return None from the match() function.
302
    Patterns using a '!!' prefix are highest precedence, and act
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
303
    as regular ignores. '!!' patterns are useful to establish ignores
304
    that apply under paths specified by '!' exception patterns.
4948.5.5 by John Whitley
Add descriptive text to ExcludingGlobster.
305
    """
7143.15.2 by Jelmer Vernooij
Run autopep8.
306
6809.1.1 by Martin
Apply 2to3 ws_comma fixer
307
    def __init__(self, patterns):
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
308
        ignores = [[], [], []]
4948.5.3 by John Whitley
Refactor the exclusion handling functionality out of
309
        for p in patterns:
4948.5.6 by John Whitley
A trial implementation of '!!' syntax for double-negative ignore exclusions.
310
            if p.startswith(u'!!'):
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
311
                ignores[2].append(p[2:])
4948.5.6 by John Whitley
A trial implementation of '!!' syntax for double-negative ignore exclusions.
312
            elif p.startswith(u'!'):
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
313
                ignores[1].append(p[1:])
4948.5.3 by John Whitley
Refactor the exclusion handling functionality out of
314
            else:
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
315
                ignores[0].append(p)
316
        self._ignores = [Globster(i) for i in ignores]
7143.15.2 by Jelmer Vernooij
Run autopep8.
317
4948.5.3 by John Whitley
Refactor the exclusion handling functionality out of
318
    def match(self, filename):
319
        """Searches for a pattern that matches the given filename.
320
321
        :return A matching pattern or None if there is no matching pattern.
322
        """
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
323
        double_neg = self._ignores[2].match(filename)
4948.5.6 by John Whitley
A trial implementation of '!!' syntax for double-negative ignore exclusions.
324
        if double_neg:
325
            return "!!%s" % double_neg
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
326
        elif self._ignores[1].match(filename):
4948.5.3 by John Whitley
Refactor the exclusion handling functionality out of
327
            return None
328
        else:
4948.5.7 by John Whitley
Terminology change: exclusion => exception.
329
            return self._ignores[0].match(filename)
3398.1.1 by Ian Clatworthy
simplify the custom Globster to only care about ordering
330
7143.15.2 by Jelmer Vernooij
Run autopep8.
331
3398.1.1 by Ian Clatworthy
simplify the custom Globster to only care about ordering
332
class _OrderedGlobster(Globster):
333
    """A Globster that keeps pattern order."""
334
335
    def __init__(self, patterns):
336
        """Constructor.
337
338
        :param patterns: sequence of glob patterns
339
        """
340
        # Note: This could be smarter by running like sequences together
341
        self._regex_patterns = []
342
        for pat in patterns:
343
            pat = normalize_pattern(pat)
5050.14.3 by Parth Malwankar
use dict for managining pattern information
344
            t = Globster.identify(pat)
345
            self._add_patterns([pat], Globster.pattern_info[t]["translator"],
7143.15.2 by Jelmer Vernooij
Run autopep8.
346
                               Globster.pattern_info[t]["prefix"])
3398.1.1 by Ian Clatworthy
simplify the custom Globster to only care about ordering
347
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
348
5967.9.5 by Martin Pool
More explicit laziness
349
_slashes = lazy_regex.lazy_compile(r'[\\/]+')
7143.15.2 by Jelmer Vernooij
Run autopep8.
350
351
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
352
def normalize_pattern(pattern):
353
    """Converts backslashes in path patterns to forward slashes.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
354
2298.8.2 by Kent Gibson
Review fixes for lp86451 patch.
355
    Doesn't normalize regular expressions - they may contain escapes.
2298.8.1 by Kent Gibson
Normalise ignore patterns to use '/' path separator.
356
    """
4948.5.4 by John Whitley
bzrlib.globbing.normalize_pattern needed fix to avoid mangling ignore
357
    if not (pattern.startswith('RE:') or pattern.startswith('!RE:')):
4792.4.1 by Gordon Tyler
Fixed globbing.normalize_pattern to not strip '/' down to '' and normalize multiple slashes.
358
        pattern = _slashes.sub('/', pattern)
359
    if len(pattern) > 1:
360
        pattern = pattern.rstrip('/')
361
    return pattern