1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
# Copyright (C) 2006, 2008-2011, 2017 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""Lazily compiled regex objects.
This module defines a class which creates proxy objects for regex
compilation. This allows overriding re.compile() to return lazily compiled
objects.
We do this rather than just providing a new interface so that it will also
be used by existing Python modules that create regexs.
"""
import re
from . import errors
class InvalidPattern(errors.BzrError):
_fmt = ('Invalid pattern(s) found. %(msg)s')
def __init__(self, msg):
self.msg = msg
class LazyRegex(object):
"""A proxy around a real regex, which won't be compiled until accessed."""
# These are the parameters on a real _sre.SRE_Pattern object, which we
# will map to local members so that we don't have the proxy overhead.
_regex_attributes_to_copy = [
'__copy__', '__deepcopy__', 'findall', 'finditer', 'match',
'scanner', 'search', 'split', 'sub', 'subn'
]
# We use slots to keep the overhead low. But we need a slot entry for
# all of the attributes we will copy
__slots__ = ['_real_regex', '_regex_args', '_regex_kwargs',
] + _regex_attributes_to_copy
def __init__(self, args, kwargs):
"""Create a new proxy object, passing in the args to pass to re.compile
:param args: The `*args` to pass to re.compile
:param kwargs: The `**kwargs` to pass to re.compile
"""
self._real_regex = None
self._regex_args = args
self._regex_kwargs = kwargs
def _compile_and_collapse(self):
"""Actually compile the requested regex"""
self._real_regex = self._real_re_compile(*self._regex_args,
**self._regex_kwargs)
for attr in self._regex_attributes_to_copy:
setattr(self, attr, getattr(self._real_regex, attr))
def _real_re_compile(self, *args, **kwargs):
"""Thunk over to the original re.compile"""
try:
return re.compile(*args, **kwargs)
except re.error as e:
# raise InvalidPattern instead of re.error as this gives a
# cleaner message to the user.
raise InvalidPattern('"' + args[0] + '" ' + str(e))
def __getstate__(self):
"""Return the state to use when pickling."""
return {
"args": self._regex_args,
"kwargs": self._regex_kwargs,
}
def __setstate__(self, dict):
"""Restore from a pickled state."""
self._real_regex = None
setattr(self, "_regex_args", dict["args"])
setattr(self, "_regex_kwargs", dict["kwargs"])
def __getattr__(self, attr):
"""Return a member from the proxied regex object.
If the regex hasn't been compiled yet, compile it
"""
if self._real_regex is None:
self._compile_and_collapse()
# Once we have compiled, the only time we should come here
# is actually if the attribute is missing.
return getattr(self._real_regex, attr)
def lazy_compile(*args, **kwargs):
"""Create a proxy object which will compile the regex on demand.
:return: a LazyRegex proxy object.
"""
return LazyRegex(args, kwargs)
|