/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
2052.3.2 by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical
1
# Copyright (C) 2005 Canonical Ltd
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
2
#
2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1553.5.6 by Martin Pool
Clean up comments
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
18
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
19
# \subsection{\emph{rio} - simple text metaformat}
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
20
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
21
# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
22
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
23
# The stored data consists of a series of \emph{stanzas}, each of which contains
24
# \emph{fields} identified by an ascii name, with Unicode or string contents.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
25
# The field tag is constrained to alphanumeric characters.
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
26
# There may be more than one field in a stanza with the same name.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
27
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
28
# The format itself does not deal with character encoding issues, though
29
# the result will normally be written in Unicode.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
30
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
31
# The format is intended to be simple enough that there is exactly one character
32
# stream representation of an object and vice versa, and that this relation
33
# will continue to hold for future versions of bzr.
34
35
import re
36
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
37
from . import osutils
38
from .iterablefile import IterableFile
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
39
from .sixish import (
40
    text_type,
41
    )
1534.10.2 by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas
42
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
43
# XXX: some redundancy is allowing to write stanzas in isolation as well as
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
44
# through a writer object.
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
45
46
class RioWriter(object):
47
    def __init__(self, to_file):
48
        self._soft_nl = False
49
        self._to_file = to_file
50
51
    def write_stanza(self, stanza):
52
        if self._soft_nl:
6926.2.15 by Jelmer Vernooij
Fix some tests.
53
            self._to_file.write(b'\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
54
        stanza.write(self._to_file)
55
        self._soft_nl = True
56
57
58
class RioReader(object):
59
    """Read stanzas from a file as a sequence
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
60
61
    to_file can be anything that can be enumerated as a sequence of
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
62
    lines (with newlines.)
63
    """
64
    def __init__(self, from_file):
65
        self._from_file = from_file
66
67
    def __iter__(self):
68
        while True:
69
            s = read_stanza(self._from_file)
70
            if s is None:
71
                break
72
            else:
73
                yield s
74
1534.10.2 by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas
75
76
def rio_file(stanzas, header=None):
77
    """Produce a rio IterableFile from an iterable of stanzas"""
78
    def str_iter():
79
        if header is not None:
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
80
            yield header + b'\n'
1534.10.2 by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas
81
        first_stanza = True
82
        for s in stanzas:
83
            if first_stanza is not True:
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
84
                yield b'\n'
1534.10.2 by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas
85
            for line in s.to_lines():
86
                yield line
87
            first_stanza = False
88
    return IterableFile(str_iter())
89
90
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
91
def read_stanzas(from_file):
92
    while True:
93
        s = read_stanza(from_file)
94
        if s is None:
95
            break
96
        else:
97
            yield s
98
99
class Stanza(object):
100
    """One stanza for rio.
101
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
102
    Each stanza contains a set of named fields.
103
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
104
    Names must be non-empty ascii alphanumeric plus _.  Names can be repeated
105
    within a stanza.  Names are case-sensitive.  The ordering of fields is
106
    preserved.
107
108
    Each field value must be either an int or a string.
109
    """
110
111
    __slots__ = ['items']
112
113
    def __init__(self, **kwargs):
114
        """Construct a new Stanza.
115
116
        The keyword arguments, if any, are added in sorted order to the stanza.
117
        """
118
        self.items = []
119
        if kwargs:
120
            for tag, value in sorted(kwargs.items()):
121
                self.add(tag, value)
122
123
    def add(self, tag, value):
124
        """Append a name and value to the stanza."""
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
125
        if not valid_tag(tag):
126
            raise ValueError("invalid tag %r" % (tag,))
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
127
        if isinstance(value, bytes):
128
            value = value.decode('ascii')
129
        elif isinstance(value, text_type):
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
130
            pass
131
        else:
1553.5.7 by Martin Pool
rio.Stanza.add should raise TypeError on invalid types.
132
            raise TypeError("invalid type for rio value: %r of type %s"
133
                            % (value, type(value)))
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
134
        self.items.append((tag, value))
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
135
4354.3.3 by Jelmer Vernooij
More performance tweaks.
136
    @classmethod
137
    def from_pairs(cls, pairs):
138
        ret = cls()
139
        ret.items = pairs
140
        return ret
141
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
142
    def __contains__(self, find_tag):
143
        """True if there is any field in this stanza with the given tag."""
144
        for tag, value in self.items:
145
            if tag == find_tag:
146
                return True
147
        return False
148
149
    def __len__(self):
150
        """Return number of pairs in the stanza."""
151
        return len(self.items)
152
153
    def __eq__(self, other):
154
        if not isinstance(other, Stanza):
155
            return False
156
        return self.items == other.items
157
158
    def __ne__(self, other):
159
        return not self.__eq__(other)
160
161
    def __repr__(self):
162
        return "Stanza(%r)" % self.items
163
164
    def iter_pairs(self):
165
        """Return iterator of tag, value pairs."""
166
        return iter(self.items)
167
168
    def to_lines(self):
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
169
        """Generate sequence of lines for external version of this file.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
170
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
171
        The lines are always utf-8 encoded strings.
172
        """
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
173
        if not self.items:
174
            # max() complains if sequence is empty
175
            return []
176
        result = []
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
177
        for text_tag, text_value in self.items:
178
            tag = text_tag.encode('ascii')
179
            value = text_value.encode('utf-8')
180
            if value == b'':
181
                result.append(tag + b': \n')
182
            elif b'\n' in value:
1185.47.2 by Martin Pool
Finish rio format and tests.
183
                # don't want splitlines behaviour on empty lines
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
184
                val_lines = value.split(b'\n')
185
                result.append(tag + b': ' + val_lines[0] + b'\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
186
                for line in val_lines[1:]:
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
187
                    result.append(b'\t' + line + b'\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
188
            else:
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
189
                result.append(tag + b': ' + value + b'\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
190
        return result
191
192
    def to_string(self):
193
        """Return stanza as a single string"""
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
194
        return b''.join(self.to_lines())
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
195
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
196
    def to_unicode(self):
197
        """Return stanza as a single Unicode string.
198
199
        This is most useful when adding a Stanza to a parent Stanza
200
        """
201
        if not self.items:
202
            return u''
203
204
        result = []
205
        for tag, value in self.items:
4354.3.3 by Jelmer Vernooij
More performance tweaks.
206
            if value == u'':
207
                result.append(tag + u': \n')
208
            elif u'\n' in value:
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
209
                # don't want splitlines behaviour on empty lines
4354.3.3 by Jelmer Vernooij
More performance tweaks.
210
                val_lines = value.split(u'\n')
211
                result.append(tag + u': ' + val_lines[0] + u'\n')
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
212
                for line in val_lines[1:]:
4354.3.3 by Jelmer Vernooij
More performance tweaks.
213
                    result.append(u'\t' + line + u'\n')
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
214
            else:
4354.3.3 by Jelmer Vernooij
More performance tweaks.
215
                result.append(tag + u': ' + value + u'\n')
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
216
        return u''.join(result)
217
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
218
    def write(self, to_file):
219
        """Write stanza to a file"""
220
        to_file.writelines(self.to_lines())
221
222
    def get(self, tag):
223
        """Return the value for a field wih given tag.
224
225
        If there is more than one value, only the first is returned.  If the
226
        tag is not present, KeyError is raised.
227
        """
228
        for t, v in self.items:
229
            if t == tag:
230
                return v
231
        else:
232
            raise KeyError(tag)
233
234
    __getitem__ = get
235
236
    def get_all(self, tag):
237
        r = []
238
        for t, v in self.items:
239
            if t == tag:
240
                r.append(v)
241
        return r
1553.5.8 by Martin Pool
New Rio.as_dict method
242
243
    def as_dict(self):
244
        """Return a dict containing the unique values of the stanza.
245
        """
246
        d = {}
247
        for tag, value in self.items:
248
            d[tag] = value
249
        return d
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
250
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
251
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
252
def valid_tag(tag):
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
253
    return _valid_tag(tag)
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
254
255
256
def read_stanza(line_iter):
257
    """Return new Stanza read from list of lines or a file
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
258
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
259
    Returns one Stanza that was read, or returns None at end of file.  If a
260
    blank line follows the stanza, it is consumed.  It's not an error for
261
    there to be no blank at end of file.  If there is a blank file at the
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
262
    start of the input this is really an empty stanza and that is returned.
1185.47.2 by Martin Pool
Finish rio format and tests.
263
264
    Only the stanza lines and the trailing blank (if any) are consumed
265
    from the line_iter.
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
266
267
    The raw lines must be in utf-8 encoding.
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
268
    """
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
269
    return _read_stanza_utf8(line_iter)
2030.1.5 by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing
270
271
272
def read_stanza_unicode(unicode_iter):
273
    """Read a Stanza from a list of lines or a file.
274
275
    The lines should already be in unicode form. This returns a single
276
    stanza that was read. If there is a blank line at the end of the Stanza,
277
    it is consumed. It is not an error for there to be no blank line at
278
    the end of the iterable. If there is a blank line at the beginning,
279
    this is treated as an empty Stanza and None is returned.
280
281
    Only the stanza lines and the trailing blank (if any) are consumed
282
    from the unicode_iter
283
284
    :param unicode_iter: A iterable, yeilding Unicode strings. See read_stanza
285
        if you have a utf-8 encoded string.
286
    :return: A Stanza object if there are any lines in the file.
287
        None otherwise
288
    """
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
289
    return _read_stanza_unicode(unicode_iter)
1551.12.38 by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions
290
291
1551.12.10 by Aaron Bentley
Reduce max width to 72
292
def to_patch_lines(stanza, max_width=72):
1551.12.38 by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions
293
    """Convert a stanza into RIO-Patch format lines.
294
295
    RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
296
    It resists common forms of damage such as newline conversion or the removal
297
    of trailing whitespace, yet is also reasonably easy to read.
298
299
    :param max_width: The maximum number of characters per physical line.
300
    :return: a list of lines
301
    """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
302
    if max_width <= 6:
303
        raise ValueError(max_width)
1551.12.10 by Aaron Bentley
Reduce max width to 72
304
    max_rio_width = max_width - 4
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
305
    lines = []
306
    for pline in stanza.to_lines():
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
307
        for line in pline.split(b'\n')[:-1]:
308
            line = re.sub(b'\\\\', b'\\\\\\\\', line)
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
309
            while len(line) > 0:
1551.12.10 by Aaron Bentley
Reduce max width to 72
310
                partline = line[:max_rio_width]
311
                line = line[max_rio_width:]
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
312
                if len(line) > 0 and line[:1] != [b' ']:
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
313
                    break_index = -1
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
314
                    break_index = partline.rfind(b' ', -20)
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
315
                    if break_index < 3:
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
316
                        break_index = partline.rfind(b'-', -20)
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
317
                        break_index += 1
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
318
                    if break_index < 3:
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
319
                        break_index = partline.rfind(b'/', -20)
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
320
                    if break_index >= 3:
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
321
                        line = partline[break_index:] + line
322
                        partline = partline[:break_index]
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
323
                if len(line) > 0:
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
324
                    line = b'  ' + line
325
                partline = re.sub(b'\r', b'\\\\r', partline)
1551.12.11 by Aaron Bentley
Handle trailing whitepace cleanly
326
                blank_line = False
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
327
                if len(line) > 0:
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
328
                    partline += b'\\'
329
                elif re.search(b' $', partline):
330
                    partline += b'\\'
1551.12.11 by Aaron Bentley
Handle trailing whitepace cleanly
331
                    blank_line = True
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
332
                lines.append(b'# ' + partline + b'\n')
1551.12.11 by Aaron Bentley
Handle trailing whitepace cleanly
333
                if blank_line:
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
334
                    lines.append(b'#   \n')
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
335
    return lines
336
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
337
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
338
def _patch_stanza_iter(line_iter):
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
339
    map = {b'\\\\': b'\\',
340
           b'\\r' : b'\r',
341
           b'\\\n': b''}
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
342
    def mapget(match):
343
        return map[match.group(0)]
344
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
345
    last_line = None
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
346
    for line in line_iter:
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
347
        if line.startswith(b'# '):
1551.12.22 by Aaron Bentley
Fix handling of whitespace-stripping without newline munging
348
            line = line[2:]
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
349
        elif line.startswith(b'#'):
1551.12.22 by Aaron Bentley
Fix handling of whitespace-stripping without newline munging
350
            line = line[1:]
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
351
        else:
352
            raise ValueError("bad line %r" % (line,))
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
353
        if last_line is not None and len(line) > 2:
354
            line = line[2:]
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
355
        line = re.sub(b'\r', b'', line)
356
        line = re.sub(b'\\\\(.|\n)', mapget, line)
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
357
        if last_line is None:
358
            last_line = line
359
        else:
360
            last_line += line
6684.1.6 by Martin
Finish making rio module string usage work on Python 3
361
        if last_line[-1:] == b'\n':
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
362
            yield last_line
363
            last_line = None
364
    if last_line is not None:
365
        yield last_line
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
366
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
367
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
368
def read_patch_stanza(line_iter):
1551.12.38 by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions
369
    """Convert an iterable of RIO-Patch format lines into a Stanza.
370
371
    RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
372
    It resists common forms of damage such as newline conversion or the removal
373
    of trailing whitespace, yet is also reasonably easy to read.
374
375
    :return: a Stanza
376
    """
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
377
    return read_stanza(_patch_stanza_iter(line_iter))
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
378
379
380
try:
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
381
    from ._rio_pyx import (
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
382
        _read_stanza_utf8,
383
        _read_stanza_unicode,
384
        _valid_tag,
385
        )
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
386
except ImportError as e:
4574.3.8 by Martin Pool
Only mutter extension load errors when they occur, and record for later
387
    osutils.failed_to_load_extension(e)
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
388
    from ._rio_py import (
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
389
       _read_stanza_utf8,
390
       _read_stanza_unicode,
391
       _valid_tag,
392
       )