/brz/remove-bazaar

To get this branch, use:
bzr branch http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
2052.3.2 by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical
1
# Copyright (C) 2005 Canonical Ltd
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
2
#
2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob
update FSF mailing address
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1553.5.6 by Martin Pool
Clean up comments
16
6379.6.3 by Jelmer Vernooij
Use absolute_import.
17
from __future__ import absolute_import
18
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
19
# \subsection{\emph{rio} - simple text metaformat}
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
20
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
21
# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
22
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
23
# The stored data consists of a series of \emph{stanzas}, each of which contains
24
# \emph{fields} identified by an ascii name, with Unicode or string contents.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
25
# The field tag is constrained to alphanumeric characters.
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
26
# There may be more than one field in a stanza with the same name.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
27
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
28
# The format itself does not deal with character encoding issues, though
29
# the result will normally be written in Unicode.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
30
#
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
31
# The format is intended to be simple enough that there is exactly one character
32
# stream representation of an object and vice versa, and that this relation
33
# will continue to hold for future versions of bzr.
34
35
import re
36
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
37
from . import osutils
38
from .iterablefile import IterableFile
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
39
from .sixish import (
40
    text_type,
41
    )
1534.10.2 by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas
42
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
43
# XXX: some redundancy is allowing to write stanzas in isolation as well as
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
44
# through a writer object.
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
45
46
class RioWriter(object):
47
    def __init__(self, to_file):
48
        self._soft_nl = False
49
        self._to_file = to_file
50
51
    def write_stanza(self, stanza):
52
        if self._soft_nl:
2911.6.1 by Blake Winton
Change 'print >> f,'s to 'f.write('s.
53
            self._to_file.write('\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
54
        stanza.write(self._to_file)
55
        self._soft_nl = True
56
57
58
class RioReader(object):
59
    """Read stanzas from a file as a sequence
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
60
61
    to_file can be anything that can be enumerated as a sequence of
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
62
    lines (with newlines.)
63
    """
64
    def __init__(self, from_file):
65
        self._from_file = from_file
66
67
    def __iter__(self):
68
        while True:
69
            s = read_stanza(self._from_file)
70
            if s is None:
71
                break
72
            else:
73
                yield s
74
1534.10.2 by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas
75
76
def rio_file(stanzas, header=None):
77
    """Produce a rio IterableFile from an iterable of stanzas"""
78
    def str_iter():
79
        if header is not None:
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
80
            yield header + b'\n'
1534.10.2 by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas
81
        first_stanza = True
82
        for s in stanzas:
83
            if first_stanza is not True:
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
84
                yield b'\n'
1534.10.2 by Aaron Bentley
Implemented rio_file to produce a light file object from stanzas
85
            for line in s.to_lines():
86
                yield line
87
            first_stanza = False
88
    return IterableFile(str_iter())
89
90
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
91
def read_stanzas(from_file):
92
    while True:
93
        s = read_stanza(from_file)
94
        if s is None:
95
            break
96
        else:
97
            yield s
98
99
class Stanza(object):
100
    """One stanza for rio.
101
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
102
    Each stanza contains a set of named fields.
103
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
104
    Names must be non-empty ascii alphanumeric plus _.  Names can be repeated
105
    within a stanza.  Names are case-sensitive.  The ordering of fields is
106
    preserved.
107
108
    Each field value must be either an int or a string.
109
    """
110
111
    __slots__ = ['items']
112
113
    def __init__(self, **kwargs):
114
        """Construct a new Stanza.
115
116
        The keyword arguments, if any, are added in sorted order to the stanza.
117
        """
118
        self.items = []
119
        if kwargs:
120
            for tag, value in sorted(kwargs.items()):
121
                self.add(tag, value)
122
123
    def add(self, tag, value):
124
        """Append a name and value to the stanza."""
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
125
        if not valid_tag(tag):
126
            raise ValueError("invalid tag %r" % (tag,))
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
127
        if isinstance(value, bytes):
128
            value = value.decode('ascii')
129
        elif isinstance(value, text_type):
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
130
            pass
131
        else:
1553.5.7 by Martin Pool
rio.Stanza.add should raise TypeError on invalid types.
132
            raise TypeError("invalid type for rio value: %r of type %s"
133
                            % (value, type(value)))
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
134
        self.items.append((tag, value))
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
135
4354.3.3 by Jelmer Vernooij
More performance tweaks.
136
    @classmethod
137
    def from_pairs(cls, pairs):
138
        ret = cls()
139
        ret.items = pairs
140
        return ret
141
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
142
    def __contains__(self, find_tag):
143
        """True if there is any field in this stanza with the given tag."""
144
        for tag, value in self.items:
145
            if tag == find_tag:
146
                return True
147
        return False
148
149
    def __len__(self):
150
        """Return number of pairs in the stanza."""
151
        return len(self.items)
152
153
    def __eq__(self, other):
154
        if not isinstance(other, Stanza):
155
            return False
156
        return self.items == other.items
157
158
    def __ne__(self, other):
159
        return not self.__eq__(other)
160
161
    def __repr__(self):
162
        return "Stanza(%r)" % self.items
163
164
    def iter_pairs(self):
165
        """Return iterator of tag, value pairs."""
166
        return iter(self.items)
167
168
    def to_lines(self):
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
169
        """Generate sequence of lines for external version of this file.
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
170
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
171
        The lines are always utf-8 encoded strings.
172
        """
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
173
        if not self.items:
174
            # max() complains if sequence is empty
175
            return []
176
        result = []
177
        for tag, value in self.items:
178
            if value == '':
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
179
                result.append(tag.encode('ascii') + b': \n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
180
            elif '\n' in value:
1185.47.2 by Martin Pool
Finish rio format and tests.
181
                # don't want splitlines behaviour on empty lines
182
                val_lines = value.split('\n')
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
183
                result.append(tag + b': ' + val_lines[0].encode('utf-8') + b'\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
184
                for line in val_lines[1:]:
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
185
                    result.append(b'\t' + line.encode('utf-8') + b'\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
186
            else:
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
187
                result.append(tag.encode('ascii') + b': ' + value.encode('utf-8') + b'\n')
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
188
        return result
189
190
    def to_string(self):
191
        """Return stanza as a single string"""
6677.1.1 by Martin
Go back to native str for urls and many other py3 changes
192
        return b''.join(self.to_lines())
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
193
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
194
    def to_unicode(self):
195
        """Return stanza as a single Unicode string.
196
197
        This is most useful when adding a Stanza to a parent Stanza
198
        """
199
        if not self.items:
200
            return u''
201
202
        result = []
203
        for tag, value in self.items:
4354.3.3 by Jelmer Vernooij
More performance tweaks.
204
            if value == u'':
205
                result.append(tag + u': \n')
206
            elif u'\n' in value:
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
207
                # don't want splitlines behaviour on empty lines
4354.3.3 by Jelmer Vernooij
More performance tweaks.
208
                val_lines = value.split(u'\n')
209
                result.append(tag + u': ' + val_lines[0] + u'\n')
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
210
                for line in val_lines[1:]:
4354.3.3 by Jelmer Vernooij
More performance tweaks.
211
                    result.append(u'\t' + line + u'\n')
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
212
            else:
4354.3.3 by Jelmer Vernooij
More performance tweaks.
213
                result.append(tag + u': ' + value + u'\n')
2030.1.1 by John Arbash Meinel
Make it easier to nest Stanzas with Unicode contents
214
        return u''.join(result)
215
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
216
    def write(self, to_file):
217
        """Write stanza to a file"""
218
        to_file.writelines(self.to_lines())
219
220
    def get(self, tag):
221
        """Return the value for a field wih given tag.
222
223
        If there is more than one value, only the first is returned.  If the
224
        tag is not present, KeyError is raised.
225
        """
226
        for t, v in self.items:
227
            if t == tag:
228
                return v
229
        else:
230
            raise KeyError(tag)
231
232
    __getitem__ = get
233
234
    def get_all(self, tag):
235
        r = []
236
        for t, v in self.items:
237
            if t == tag:
238
                r.append(v)
239
        return r
1553.5.8 by Martin Pool
New Rio.as_dict method
240
241
    def as_dict(self):
242
        """Return a dict containing the unique values of the stanza.
243
        """
244
        d = {}
245
        for tag, value in self.items:
246
            d[tag] = value
247
        return d
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
248
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
249
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
250
def valid_tag(tag):
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
251
    return _valid_tag(tag)
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
252
253
254
def read_stanza(line_iter):
255
    """Return new Stanza read from list of lines or a file
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
256
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
257
    Returns one Stanza that was read, or returns None at end of file.  If a
258
    blank line follows the stanza, it is consumed.  It's not an error for
259
    there to be no blank at end of file.  If there is a blank file at the
3943.8.1 by Marius Kruger
remove all trailing whitespace from bzr source
260
    start of the input this is really an empty stanza and that is returned.
1185.47.2 by Martin Pool
Finish rio format and tests.
261
262
    Only the stanza lines and the trailing blank (if any) are consumed
263
    from the line_iter.
1553.5.32 by Martin Pool
rio files are always externalized in utf-8. test this.
264
265
    The raw lines must be in utf-8 encoding.
1185.47.1 by Martin Pool
[broken] start converting basic_io to more rfc822-like format
266
    """
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
267
    return _read_stanza_utf8(line_iter)
2030.1.5 by John Arbash Meinel
Create a 'read_stanza_unicode' to handle unicode processing
268
269
270
def read_stanza_unicode(unicode_iter):
271
    """Read a Stanza from a list of lines or a file.
272
273
    The lines should already be in unicode form. This returns a single
274
    stanza that was read. If there is a blank line at the end of the Stanza,
275
    it is consumed. It is not an error for there to be no blank line at
276
    the end of the iterable. If there is a blank line at the beginning,
277
    this is treated as an empty Stanza and None is returned.
278
279
    Only the stanza lines and the trailing blank (if any) are consumed
280
    from the unicode_iter
281
282
    :param unicode_iter: A iterable, yeilding Unicode strings. See read_stanza
283
        if you have a utf-8 encoded string.
284
    :return: A Stanza object if there are any lines in the file.
285
        None otherwise
286
    """
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
287
    return _read_stanza_unicode(unicode_iter)
1551.12.38 by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions
288
289
1551.12.10 by Aaron Bentley
Reduce max width to 72
290
def to_patch_lines(stanza, max_width=72):
1551.12.38 by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions
291
    """Convert a stanza into RIO-Patch format lines.
292
293
    RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
294
    It resists common forms of damage such as newline conversion or the removal
295
    of trailing whitespace, yet is also reasonably easy to read.
296
297
    :param max_width: The maximum number of characters per physical line.
298
    :return: a list of lines
299
    """
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
300
    if max_width <= 6:
301
        raise ValueError(max_width)
1551.12.10 by Aaron Bentley
Reduce max width to 72
302
    max_rio_width = max_width - 4
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
303
    lines = []
304
    for pline in stanza.to_lines():
305
        for line in pline.split('\n')[:-1]:
306
            line = re.sub('\\\\', '\\\\\\\\', line)
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
307
            while len(line) > 0:
1551.12.10 by Aaron Bentley
Reduce max width to 72
308
                partline = line[:max_rio_width]
309
                line = line[max_rio_width:]
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
310
                if len(line) > 0 and line[0] != [' ']:
311
                    break_index = -1
312
                    break_index = partline.rfind(' ', -20)
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
313
                    if break_index < 3:
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
314
                        break_index = partline.rfind('-', -20)
315
                        break_index += 1
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
316
                    if break_index < 3:
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
317
                        break_index = partline.rfind('/', -20)
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
318
                    if break_index >= 3:
1551.12.21 by Aaron Bentley
Patch-RIO does line breaks in slightly more readable places
319
                        line = partline[break_index:] + line
320
                        partline = partline[:break_index]
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
321
                if len(line) > 0:
322
                    line = '  ' + line
1551.12.11 by Aaron Bentley
Handle trailing whitepace cleanly
323
                partline = re.sub('\r', '\\\\r', partline)
324
                blank_line = False
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
325
                if len(line) > 0:
326
                    partline += '\\'
1551.12.11 by Aaron Bentley
Handle trailing whitepace cleanly
327
                elif re.search(' $', partline):
328
                    partline += '\\'
329
                    blank_line = True
330
                lines.append('# ' + partline + '\n')
331
                if blank_line:
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
332
                    lines.append('#   \n')
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
333
    return lines
334
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
335
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
336
def _patch_stanza_iter(line_iter):
337
    map = {'\\\\': '\\',
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
338
           '\\r' : '\r',
339
           '\\\n': ''}
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
340
    def mapget(match):
341
        return map[match.group(0)]
342
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
343
    last_line = None
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
344
    for line in line_iter:
1551.12.22 by Aaron Bentley
Fix handling of whitespace-stripping without newline munging
345
        if line.startswith('# '):
346
            line = line[2:]
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
347
        elif line.startswith('#'):
1551.12.22 by Aaron Bentley
Fix handling of whitespace-stripping without newline munging
348
            line = line[1:]
3376.2.4 by Martin Pool
Remove every assert statement from bzrlib!
349
        else:
350
            raise ValueError("bad line %r" % (line,))
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
351
        if last_line is not None and len(line) > 2:
352
            line = line[2:]
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
353
        line = re.sub('\r', '', line)
1551.12.9 by Aaron Bentley
force patch-rio to 76 characters
354
        line = re.sub('\\\\(.|\n)', mapget, line)
355
        if last_line is None:
356
            last_line = line
357
        else:
358
            last_line += line
359
        if last_line[-1] == '\n':
360
            yield last_line
361
            last_line = None
362
    if last_line is not None:
363
        yield last_line
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
364
1551.12.23 by Aaron Bentley
Indent line continuations in Patch-RIO, to improve readability
365
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
366
def read_patch_stanza(line_iter):
1551.12.38 by Aaron Bentley
Add docs for MergeDirective and RIO-patch functions
367
    """Convert an iterable of RIO-Patch format lines into a Stanza.
368
369
    RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
370
    It resists common forms of damage such as newline conversion or the removal
371
    of trailing whitespace, yet is also reasonably easy to read.
372
373
    :return: a Stanza
374
    """
1551.12.1 by Aaron Bentley
Basic RIO patch-compatible format is working
375
    return read_stanza(_patch_stanza_iter(line_iter))
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
376
377
378
try:
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
379
    from ._rio_pyx import (
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
380
        _read_stanza_utf8,
381
        _read_stanza_unicode,
382
        _valid_tag,
383
        )
6619.3.2 by Jelmer Vernooij
Apply 2to3 except fix.
384
except ImportError as e:
4574.3.8 by Martin Pool
Only mutter extension load errors when they occur, and record for later
385
    osutils.failed_to_load_extension(e)
6624 by Jelmer Vernooij
Merge Python3 porting work ('py3 pokes')
386
    from ._rio_py import (
4354.3.1 by Jelmer Vernooij
Move core RIO parsing functionality to _rio_py.py.
387
       _read_stanza_utf8,
388
       _read_stanza_unicode,
389
       _valid_tag,
390
       )