bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
1  | 
# Copyright (C) 2010 Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# This program is free software; you can redistribute it and/or modify
 | 
|
4  | 
# it under the terms of the GNU General Public License as published by
 | 
|
5  | 
# the Free Software Foundation; either version 2 of the License, or
 | 
|
6  | 
# (at your option) any later version.
 | 
|
7  | 
#
 | 
|
8  | 
# This program is distributed in the hope that it will be useful,
 | 
|
9  | 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|
10  | 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|
11  | 
# GNU General Public License for more details.
 | 
|
12  | 
#
 | 
|
13  | 
# You should have received a copy of the GNU General Public License
 | 
|
14  | 
# along with this program; if not, write to the Free Software
 | 
|
15  | 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
|
16  | 
||
| 
4913.5.23
by Gordon Tyler
 Renamed cmdline.Parser to Splitter to better match its usage.  | 
17  | 
"""Unicode-compatible command-line splitter for all platforms."""
 | 
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
18  | 
|
19  | 
import re  | 
|
20  | 
||
21  | 
||
22  | 
_whitespace_match = re.compile(u'\s', re.UNICODE).match  | 
|
23  | 
||
24  | 
||
25  | 
class _PushbackSequence(object):  | 
|
26  | 
def __init__(self, orig):  | 
|
27  | 
self._iter = iter(orig)  | 
|
28  | 
self._pushback_buffer = []  | 
|
| 
5050.1.3
by Vincent Ladeuil
 Delete spurious spaces.  | 
29  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
30  | 
def next(self):  | 
31  | 
if len(self._pushback_buffer) > 0:  | 
|
32  | 
return self._pushback_buffer.pop()  | 
|
33  | 
else:  | 
|
34  | 
return self._iter.next()  | 
|
| 
5050.1.3
by Vincent Ladeuil
 Delete spurious spaces.  | 
35  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
36  | 
def pushback(self, char):  | 
37  | 
self._pushback_buffer.append(char)  | 
|
| 
5050.1.3
by Vincent Ladeuil
 Delete spurious spaces.  | 
38  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
39  | 
def __iter__(self):  | 
40  | 
return self  | 
|
41  | 
||
42  | 
||
43  | 
class _Whitespace(object):  | 
|
| 
4913.5.21
by Gordon Tyler
 Refactored interface between Parser and states.  | 
44  | 
def process(self, next_char, context):  | 
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
45  | 
if _whitespace_match(next_char):  | 
46  | 
if len(context.token) > 0:  | 
|
47  | 
return None  | 
|
48  | 
else:  | 
|
49  | 
return self  | 
|
| 
4913.5.22
by Gordon Tyler
 Tweaked quote handling.  | 
50  | 
elif next_char in context.allowed_quote_chars:  | 
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
51  | 
context.quoted = True  | 
52  | 
return _Quotes(next_char, self)  | 
|
53  | 
elif next_char == u'\\':  | 
|
54  | 
return _Backslash(self)  | 
|
55  | 
else:  | 
|
56  | 
context.token.append(next_char)  | 
|
57  | 
return _Word()  | 
|
58  | 
||
59  | 
||
60  | 
class _Quotes(object):  | 
|
61  | 
def __init__(self, quote_char, exit_state):  | 
|
62  | 
self.quote_char = quote_char  | 
|
63  | 
self.exit_state = exit_state  | 
|
64  | 
||
| 
4913.5.21
by Gordon Tyler
 Refactored interface between Parser and states.  | 
65  | 
def process(self, next_char, context):  | 
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
66  | 
if next_char == u'\\':  | 
67  | 
return _Backslash(self)  | 
|
68  | 
elif next_char == self.quote_char:  | 
|
69  | 
return self.exit_state  | 
|
70  | 
else:  | 
|
71  | 
context.token.append(next_char)  | 
|
72  | 
return self  | 
|
73  | 
||
74  | 
||
75  | 
class _Backslash(object):  | 
|
76  | 
    # See http://msdn.microsoft.com/en-us/library/bb776391(VS.85).aspx
 | 
|
77  | 
def __init__(self, exit_state):  | 
|
78  | 
self.exit_state = exit_state  | 
|
79  | 
self.count = 1  | 
|
| 
5050.1.3
by Vincent Ladeuil
 Delete spurious spaces.  | 
80  | 
|
| 
4913.5.21
by Gordon Tyler
 Refactored interface between Parser and states.  | 
81  | 
def process(self, next_char, context):  | 
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
82  | 
if next_char == u'\\':  | 
83  | 
self.count += 1  | 
|
84  | 
return self  | 
|
| 
4913.5.22
by Gordon Tyler
 Tweaked quote handling.  | 
85  | 
elif next_char in context.allowed_quote_chars:  | 
86  | 
            # 2N backslashes followed by a quote are N backslashes
 | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
87  | 
context.token.append(u'\\' * (self.count/2))  | 
| 
4913.5.22
by Gordon Tyler
 Tweaked quote handling.  | 
88  | 
            # 2N+1 backslashes follwed by a quote are N backslashes followed by
 | 
89  | 
            # the quote which should not be processed as the start or end of
 | 
|
90  | 
            # the quoted arg
 | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
91  | 
if self.count % 2 == 1:  | 
| 
4913.5.22
by Gordon Tyler
 Tweaked quote handling.  | 
92  | 
                # odd number of \ escapes the quote
 | 
93  | 
context.token.append(next_char)  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
94  | 
else:  | 
| 
4913.5.22
by Gordon Tyler
 Tweaked quote handling.  | 
95  | 
                # let exit_state handle next_char
 | 
96  | 
context.seq.pushback(next_char)  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
97  | 
self.count = 0  | 
98  | 
return self.exit_state  | 
|
99  | 
else:  | 
|
| 
4913.5.22
by Gordon Tyler
 Tweaked quote handling.  | 
100  | 
            # N backslashes not followed by a quote are just N backslashes
 | 
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
101  | 
if self.count > 0:  | 
102  | 
context.token.append(u'\\' * self.count)  | 
|
103  | 
self.count = 0  | 
|
| 
4913.5.22
by Gordon Tyler
 Tweaked quote handling.  | 
104  | 
            # let exit_state handle next_char
 | 
105  | 
context.seq.pushback(next_char)  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
106  | 
return self.exit_state  | 
| 
5050.1.3
by Vincent Ladeuil
 Delete spurious spaces.  | 
107  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
108  | 
def finish(self, context):  | 
109  | 
if self.count > 0:  | 
|
110  | 
context.token.append(u'\\' * self.count)  | 
|
111  | 
||
112  | 
||
113  | 
class _Word(object):  | 
|
| 
4913.5.21
by Gordon Tyler
 Refactored interface between Parser and states.  | 
114  | 
def process(self, next_char, context):  | 
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
115  | 
if _whitespace_match(next_char):  | 
116  | 
return None  | 
|
| 
4913.5.22
by Gordon Tyler
 Tweaked quote handling.  | 
117  | 
elif next_char in context.allowed_quote_chars:  | 
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
118  | 
return _Quotes(next_char, self)  | 
119  | 
elif next_char == u'\\':  | 
|
120  | 
return _Backslash(self)  | 
|
121  | 
else:  | 
|
122  | 
context.token.append(next_char)  | 
|
123  | 
return self  | 
|
124  | 
||
125  | 
||
| 
4913.5.23
by Gordon Tyler
 Renamed cmdline.Parser to Splitter to better match its usage.  | 
126  | 
class Splitter(object):  | 
| 
4913.5.24
by Gordon Tyler
 Added cmdline.split function, which replaces commands.shlex_split_unicode.  | 
127  | 
def __init__(self, command_line, single_quotes_allowed):  | 
| 
4913.5.21
by Gordon Tyler
 Refactored interface between Parser and states.  | 
128  | 
self.seq = _PushbackSequence(command_line)  | 
| 
4913.5.22
by Gordon Tyler
 Tweaked quote handling.  | 
129  | 
self.allowed_quote_chars = u'"'  | 
130  | 
if single_quotes_allowed:  | 
|
131  | 
self.allowed_quote_chars += u"'"  | 
|
| 
5050.1.3
by Vincent Ladeuil
 Delete spurious spaces.  | 
132  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
133  | 
def __iter__(self):  | 
134  | 
return self  | 
|
| 
5050.1.3
by Vincent Ladeuil
 Delete spurious spaces.  | 
135  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
136  | 
def next(self):  | 
137  | 
quoted, token = self._get_token()  | 
|
138  | 
if token is None:  | 
|
139  | 
raise StopIteration  | 
|
140  | 
return quoted, token  | 
|
| 
5050.1.3
by Vincent Ladeuil
 Delete spurious spaces.  | 
141  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
142  | 
def _get_token(self):  | 
143  | 
self.quoted = False  | 
|
144  | 
self.token = []  | 
|
145  | 
state = _Whitespace()  | 
|
| 
4913.5.21
by Gordon Tyler
 Refactored interface between Parser and states.  | 
146  | 
for next_char in self.seq:  | 
147  | 
state = state.process(next_char, self)  | 
|
| 
4913.5.19
by Gordon Tyler
 Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.  | 
148  | 
if state is None:  | 
149  | 
                break
 | 
|
150  | 
if not state is None and not getattr(state, 'finish', None) is None:  | 
|
151  | 
state.finish(self)  | 
|
152  | 
result = u''.join(self.token)  | 
|
153  | 
if not self.quoted and result == '':  | 
|
154  | 
result = None  | 
|
155  | 
return self.quoted, result  | 
|
| 
4913.5.24
by Gordon Tyler
 Added cmdline.split function, which replaces commands.shlex_split_unicode.  | 
156  | 
|
| 
4913.5.27
by Gordon Tyler
 Code formatting.  | 
157  | 
|
| 
4913.5.24
by Gordon Tyler
 Added cmdline.split function, which replaces commands.shlex_split_unicode.  | 
158  | 
def split(unsplit, single_quotes_allowed=True):  | 
159  | 
splitter = Splitter(unsplit, single_quotes_allowed=single_quotes_allowed)  | 
|
160  | 
return [arg for quoted, arg in splitter]  |