/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
5646.1.2 by Martin Pool
Add brief user documentation of command line splitting
1
# Copyright (C) 2010-2011 Canonical Ltd
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
5646.1.2 by Martin Pool
Add brief user documentation of command line splitting
17
"""Unicode-compatible command-line splitter for all platforms.
18
19
The user-visible behaviour of this module is described in
20
configuring_bazaar.txt.
21
"""
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
22
6379.6.7 by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear.
23
from __future__ import absolute_import
24
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
25
import re
26
27
6798.1.1 by Jelmer Vernooij
Properly escape backslashes.
28
_whitespace_match = re.compile(u'\\s', re.UNICODE).match
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
29
30
31
class _PushbackSequence(object):
32
    def __init__(self, orig):
33
        self._iter = iter(orig)
34
        self._pushback_buffer = []
5050.1.3 by Vincent Ladeuil
Delete spurious spaces.
35
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
36
    def __next__(self):
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
37
        if len(self._pushback_buffer) > 0:
38
            return self._pushback_buffer.pop()
39
        else:
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
40
            return next(self._iter)
41
42
    next = __next__
5050.1.3 by Vincent Ladeuil
Delete spurious spaces.
43
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
44
    def pushback(self, char):
45
        self._pushback_buffer.append(char)
5050.1.3 by Vincent Ladeuil
Delete spurious spaces.
46
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
47
    def __iter__(self):
48
        return self
49
50
51
class _Whitespace(object):
4913.5.21 by Gordon Tyler
Refactored interface between Parser and states.
52
    def process(self, next_char, context):
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
53
        if _whitespace_match(next_char):
54
            if len(context.token) > 0:
55
                return None
56
            else:
57
                return self
4913.5.22 by Gordon Tyler
Tweaked quote handling.
58
        elif next_char in context.allowed_quote_chars:
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
59
            context.quoted = True
60
            return _Quotes(next_char, self)
61
        elif next_char == u'\\':
62
            return _Backslash(self)
63
        else:
64
            context.token.append(next_char)
65
            return _Word()
66
67
68
class _Quotes(object):
69
    def __init__(self, quote_char, exit_state):
70
        self.quote_char = quote_char
71
        self.exit_state = exit_state
72
4913.5.21 by Gordon Tyler
Refactored interface between Parser and states.
73
    def process(self, next_char, context):
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
74
        if next_char == u'\\':
75
            return _Backslash(self)
76
        elif next_char == self.quote_char:
6571.2.1 by Ross Lagerwall
Handle empty quoted strings in command lines
77
            context.token.append(u'')
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
78
            return self.exit_state
79
        else:
80
            context.token.append(next_char)
81
            return self
82
83
84
class _Backslash(object):
85
    # See http://msdn.microsoft.com/en-us/library/bb776391(VS.85).aspx
86
    def __init__(self, exit_state):
87
        self.exit_state = exit_state
88
        self.count = 1
5050.1.3 by Vincent Ladeuil
Delete spurious spaces.
89
4913.5.21 by Gordon Tyler
Refactored interface between Parser and states.
90
    def process(self, next_char, context):
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
91
        if next_char == u'\\':
92
            self.count += 1
93
            return self
4913.5.22 by Gordon Tyler
Tweaked quote handling.
94
        elif next_char in context.allowed_quote_chars:
95
            # 2N backslashes followed by a quote are N backslashes
7143.15.2 by Jelmer Vernooij
Run autopep8.
96
            context.token.append(u'\\' * (self.count // 2))
4913.5.22 by Gordon Tyler
Tweaked quote handling.
97
            # 2N+1 backslashes follwed by a quote are N backslashes followed by
98
            # the quote which should not be processed as the start or end of
99
            # the quoted arg
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
100
            if self.count % 2 == 1:
4913.5.22 by Gordon Tyler
Tweaked quote handling.
101
                # odd number of \ escapes the quote
102
                context.token.append(next_char)
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
103
            else:
4913.5.22 by Gordon Tyler
Tweaked quote handling.
104
                # let exit_state handle next_char
105
                context.seq.pushback(next_char)
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
106
            self.count = 0
107
            return self.exit_state
108
        else:
4913.5.22 by Gordon Tyler
Tweaked quote handling.
109
            # N backslashes not followed by a quote are just N backslashes
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
110
            if self.count > 0:
111
                context.token.append(u'\\' * self.count)
112
                self.count = 0
4913.5.22 by Gordon Tyler
Tweaked quote handling.
113
            # let exit_state handle next_char
114
            context.seq.pushback(next_char)
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
115
            return self.exit_state
5050.1.3 by Vincent Ladeuil
Delete spurious spaces.
116
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
117
    def finish(self, context):
118
        if self.count > 0:
119
            context.token.append(u'\\' * self.count)
120
121
122
class _Word(object):
4913.5.21 by Gordon Tyler
Refactored interface between Parser and states.
123
    def process(self, next_char, context):
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
124
        if _whitespace_match(next_char):
125
            return None
4913.5.22 by Gordon Tyler
Tweaked quote handling.
126
        elif next_char in context.allowed_quote_chars:
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
127
            return _Quotes(next_char, self)
128
        elif next_char == u'\\':
129
            return _Backslash(self)
130
        else:
131
            context.token.append(next_char)
132
            return self
133
134
4913.5.23 by Gordon Tyler
Renamed cmdline.Parser to Splitter to better match its usage.
135
class Splitter(object):
4913.5.24 by Gordon Tyler
Added cmdline.split function, which replaces commands.shlex_split_unicode.
136
    def __init__(self, command_line, single_quotes_allowed):
4913.5.21 by Gordon Tyler
Refactored interface between Parser and states.
137
        self.seq = _PushbackSequence(command_line)
4913.5.22 by Gordon Tyler
Tweaked quote handling.
138
        self.allowed_quote_chars = u'"'
139
        if single_quotes_allowed:
140
            self.allowed_quote_chars += u"'"
5050.1.3 by Vincent Ladeuil
Delete spurious spaces.
141
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
142
    def __iter__(self):
143
        return self
5050.1.3 by Vincent Ladeuil
Delete spurious spaces.
144
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
145
    def __next__(self):
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
146
        quoted, token = self._get_token()
147
        if token is None:
148
            raise StopIteration
149
        return quoted, token
5050.1.3 by Vincent Ladeuil
Delete spurious spaces.
150
6634.2.1 by Martin
Apply 2to3 next fixer and make compatible
151
    next = __next__
152
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
153
    def _get_token(self):
154
        self.quoted = False
155
        self.token = []
156
        state = _Whitespace()
4913.5.21 by Gordon Tyler
Refactored interface between Parser and states.
157
        for next_char in self.seq:
158
            state = state.process(next_char, self)
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
159
            if state is None:
160
                break
7143.15.5 by Jelmer Vernooij
More PEP8 fixes.
161
        if state is not None and not getattr(state, 'finish', None) is None:
4913.5.19 by Gordon Tyler
Moved UnicodeShlex, etc. to a new module, bzrlib.cmdline, and renamed it to Parser.
162
            state.finish(self)
163
        result = u''.join(self.token)
164
        if not self.quoted and result == '':
165
            result = None
166
        return self.quoted, result
4913.5.24 by Gordon Tyler
Added cmdline.split function, which replaces commands.shlex_split_unicode.
167
4913.5.27 by Gordon Tyler
Code formatting.
168
4913.5.24 by Gordon Tyler
Added cmdline.split function, which replaces commands.shlex_split_unicode.
169
def split(unsplit, single_quotes_allowed=True):
170
    splitter = Splitter(unsplit, single_quotes_allowed=single_quotes_allowed)
171
    return [arg for quoted, arg in splitter]