bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
1  | 
# Copyright (C) 2005 by Canonical Ltd
 | 
2  | 
#
 | 
|
3  | 
# Distributed under the GNU General Public Licence v2
 | 
|
| 
1553.5.6
by Martin Pool
 Clean up comments  | 
4  | 
|
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
5  | 
# \subsection{\emph{rio} - simple text metaformat}
 | 
6  | 
# 
 | 
|
7  | 
# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
 | 
|
8  | 
# 
 | 
|
9  | 
# The stored data consists of a series of \emph{stanzas}, each of which contains
 | 
|
10  | 
# \emph{fields} identified by an ascii name, with Unicode or string contents.
 | 
|
11  | 
# The field tag is constrained to alphanumeric characters.  
 | 
|
12  | 
# There may be more than one field in a stanza with the same name.
 | 
|
13  | 
# 
 | 
|
14  | 
# The format itself does not deal with character encoding issues, though
 | 
|
15  | 
# the result will normally be written in Unicode.
 | 
|
16  | 
# 
 | 
|
17  | 
# The format is intended to be simple enough that there is exactly one character
 | 
|
18  | 
# stream representation of an object and vice versa, and that this relation
 | 
|
19  | 
# will continue to hold for future versions of bzr.
 | 
|
20  | 
||
21  | 
import re  | 
|
22  | 
||
23  | 
# XXX: some redundancy is allowing to write stanzas in isolation as well as
 | 
|
24  | 
# through a writer object.  
 | 
|
25  | 
||
26  | 
class RioWriter(object):  | 
|
27  | 
def __init__(self, to_file):  | 
|
28  | 
self._soft_nl = False  | 
|
29  | 
self._to_file = to_file  | 
|
30  | 
||
31  | 
def write_stanza(self, stanza):  | 
|
32  | 
if self._soft_nl:  | 
|
33  | 
print >>self._to_file  | 
|
34  | 
stanza.write(self._to_file)  | 
|
35  | 
self._soft_nl = True  | 
|
36  | 
||
37  | 
||
38  | 
class RioReader(object):  | 
|
39  | 
"""Read stanzas from a file as a sequence  | 
|
40  | 
    
 | 
|
41  | 
    to_file can be anything that can be enumerated as a sequence of 
 | 
|
42  | 
    lines (with newlines.)
 | 
|
43  | 
    """
 | 
|
44  | 
def __init__(self, from_file):  | 
|
45  | 
self._from_file = from_file  | 
|
46  | 
||
47  | 
def __iter__(self):  | 
|
48  | 
while True:  | 
|
49  | 
s = read_stanza(self._from_file)  | 
|
50  | 
if s is None:  | 
|
51  | 
                break
 | 
|
52  | 
else:  | 
|
53  | 
yield s  | 
|
54  | 
||
55  | 
def read_stanzas(from_file):  | 
|
56  | 
while True:  | 
|
57  | 
s = read_stanza(from_file)  | 
|
58  | 
if s is None:  | 
|
59  | 
            break
 | 
|
60  | 
else:  | 
|
61  | 
yield s  | 
|
62  | 
||
63  | 
class Stanza(object):  | 
|
64  | 
"""One stanza for rio.  | 
|
65  | 
||
66  | 
    Each stanza contains a set of named fields.  
 | 
|
67  | 
    
 | 
|
68  | 
    Names must be non-empty ascii alphanumeric plus _.  Names can be repeated
 | 
|
69  | 
    within a stanza.  Names are case-sensitive.  The ordering of fields is
 | 
|
70  | 
    preserved.
 | 
|
71  | 
||
72  | 
    Each field value must be either an int or a string.
 | 
|
73  | 
    """
 | 
|
74  | 
||
75  | 
__slots__ = ['items']  | 
|
76  | 
||
77  | 
def __init__(self, **kwargs):  | 
|
78  | 
"""Construct a new Stanza.  | 
|
79  | 
||
80  | 
        The keyword arguments, if any, are added in sorted order to the stanza.
 | 
|
81  | 
        """
 | 
|
82  | 
self.items = []  | 
|
83  | 
if kwargs:  | 
|
84  | 
for tag, value in sorted(kwargs.items()):  | 
|
85  | 
self.add(tag, value)  | 
|
86  | 
||
87  | 
def add(self, tag, value):  | 
|
88  | 
"""Append a name and value to the stanza."""  | 
|
89  | 
assert valid_tag(tag), \  | 
|
90  | 
("invalid tag %r" % tag)  | 
|
| 
1553.5.32
by Martin Pool
 rio files are always externalized in utf-8. test this.  | 
91  | 
if isinstance(value, str):  | 
92  | 
value = unicode(value)  | 
|
93  | 
elif isinstance(value, unicode):  | 
|
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
94  | 
            pass
 | 
95  | 
        ## elif isinstance(value, (int, long)):
 | 
|
96  | 
        ##    value = str(value)           # XXX: python2.4 without L-suffix
 | 
|
97  | 
else:  | 
|
| 
1553.5.7
by Martin Pool
 rio.Stanza.add should raise TypeError on invalid types.  | 
98  | 
raise TypeError("invalid type for rio value: %r of type %s"  | 
99  | 
% (value, type(value)))  | 
|
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
100  | 
self.items.append((tag, value))  | 
101  | 
||
102  | 
def __contains__(self, find_tag):  | 
|
103  | 
"""True if there is any field in this stanza with the given tag."""  | 
|
104  | 
for tag, value in self.items:  | 
|
105  | 
if tag == find_tag:  | 
|
106  | 
return True  | 
|
107  | 
return False  | 
|
108  | 
||
109  | 
def __len__(self):  | 
|
110  | 
"""Return number of pairs in the stanza."""  | 
|
111  | 
return len(self.items)  | 
|
112  | 
||
113  | 
def __eq__(self, other):  | 
|
114  | 
if not isinstance(other, Stanza):  | 
|
115  | 
return False  | 
|
116  | 
return self.items == other.items  | 
|
117  | 
||
118  | 
def __ne__(self, other):  | 
|
119  | 
return not self.__eq__(other)  | 
|
120  | 
||
121  | 
def __repr__(self):  | 
|
122  | 
return "Stanza(%r)" % self.items  | 
|
123  | 
||
124  | 
def iter_pairs(self):  | 
|
125  | 
"""Return iterator of tag, value pairs."""  | 
|
126  | 
return iter(self.items)  | 
|
127  | 
||
128  | 
def to_lines(self):  | 
|
| 
1553.5.32
by Martin Pool
 rio files are always externalized in utf-8. test this.  | 
129  | 
"""Generate sequence of lines for external version of this file.  | 
130  | 
        
 | 
|
131  | 
        The lines are always utf-8 encoded strings.
 | 
|
132  | 
        """
 | 
|
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
133  | 
if not self.items:  | 
134  | 
            # max() complains if sequence is empty
 | 
|
135  | 
return []  | 
|
136  | 
result = []  | 
|
137  | 
for tag, value in self.items:  | 
|
| 
1553.5.32
by Martin Pool
 rio files are always externalized in utf-8. test this.  | 
138  | 
assert isinstance(tag, str)  | 
139  | 
assert isinstance(value, unicode)  | 
|
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
140  | 
if value == '':  | 
141  | 
result.append(tag + ': \n')  | 
|
142  | 
elif '\n' in value:  | 
|
| 
1185.47.2
by Martin Pool
 Finish rio format and tests.  | 
143  | 
                # don't want splitlines behaviour on empty lines
 | 
144  | 
val_lines = value.split('\n')  | 
|
| 
1553.5.32
by Martin Pool
 rio files are always externalized in utf-8. test this.  | 
145  | 
result.append(tag + ': ' + val_lines[0].encode('utf-8') + '\n')  | 
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
146  | 
for line in val_lines[1:]:  | 
| 
1553.5.32
by Martin Pool
 rio files are always externalized in utf-8. test this.  | 
147  | 
result.append('\t' + line.encode('utf-8') + '\n')  | 
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
148  | 
else:  | 
| 
1553.5.32
by Martin Pool
 rio files are always externalized in utf-8. test this.  | 
149  | 
result.append(tag + ': ' + value.encode('utf-8') + '\n')  | 
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
150  | 
return result  | 
151  | 
||
152  | 
def to_string(self):  | 
|
153  | 
"""Return stanza as a single string"""  | 
|
154  | 
return ''.join(self.to_lines())  | 
|
155  | 
||
156  | 
def write(self, to_file):  | 
|
157  | 
"""Write stanza to a file"""  | 
|
158  | 
to_file.writelines(self.to_lines())  | 
|
159  | 
||
160  | 
def get(self, tag):  | 
|
161  | 
"""Return the value for a field wih given tag.  | 
|
162  | 
||
163  | 
        If there is more than one value, only the first is returned.  If the
 | 
|
164  | 
        tag is not present, KeyError is raised.
 | 
|
165  | 
        """
 | 
|
166  | 
for t, v in self.items:  | 
|
167  | 
if t == tag:  | 
|
168  | 
return v  | 
|
169  | 
else:  | 
|
170  | 
raise KeyError(tag)  | 
|
171  | 
||
172  | 
__getitem__ = get  | 
|
173  | 
||
174  | 
def get_all(self, tag):  | 
|
175  | 
r = []  | 
|
176  | 
for t, v in self.items:  | 
|
177  | 
if t == tag:  | 
|
178  | 
r.append(v)  | 
|
179  | 
return r  | 
|
| 
1553.5.8
by Martin Pool
 New Rio.as_dict method  | 
180  | 
|
181  | 
def as_dict(self):  | 
|
182  | 
"""Return a dict containing the unique values of the stanza.  | 
|
183  | 
        """
 | 
|
184  | 
d = {}  | 
|
185  | 
for tag, value in self.items:  | 
|
186  | 
assert tag not in d  | 
|
187  | 
d[tag] = value  | 
|
188  | 
return d  | 
|
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
189  | 
|
190  | 
_tag_re = re.compile(r'^[-a-zA-Z0-9_]+$')  | 
|
191  | 
def valid_tag(tag):  | 
|
192  | 
return bool(_tag_re.match(tag))  | 
|
193  | 
||
194  | 
||
195  | 
def read_stanza(line_iter):  | 
|
196  | 
"""Return new Stanza read from list of lines or a file  | 
|
197  | 
    
 | 
|
198  | 
    Returns one Stanza that was read, or returns None at end of file.  If a
 | 
|
199  | 
    blank line follows the stanza, it is consumed.  It's not an error for
 | 
|
200  | 
    there to be no blank at end of file.  If there is a blank file at the
 | 
|
201  | 
    start of the input this is really an empty stanza and that is returned. 
 | 
|
| 
1185.47.2
by Martin Pool
 Finish rio format and tests.  | 
202  | 
|
203  | 
    Only the stanza lines and the trailing blank (if any) are consumed
 | 
|
204  | 
    from the line_iter.
 | 
|
| 
1553.5.32
by Martin Pool
 rio files are always externalized in utf-8. test this.  | 
205  | 
|
206  | 
    The raw lines must be in utf-8 encoding.
 | 
|
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
207  | 
    """
 | 
208  | 
items = []  | 
|
209  | 
stanza = Stanza()  | 
|
| 
1185.47.2
by Martin Pool
 Finish rio format and tests.  | 
210  | 
tag = None  | 
211  | 
accum_value = None  | 
|
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
212  | 
for line in line_iter:  | 
213  | 
if line == None or line == '':  | 
|
214  | 
break # end of file  | 
|
215  | 
if line == '\n':  | 
|
216  | 
break # end of stanza  | 
|
| 
1553.5.32
by Martin Pool
 rio files are always externalized in utf-8. test this.  | 
217  | 
line = line.decode('utf-8')  | 
| 
1185.47.1
by Martin Pool
 [broken] start converting basic_io to more rfc822-like format  | 
218  | 
assert line[-1] == '\n'  | 
219  | 
real_l = line  | 
|
| 
1185.47.2
by Martin Pool
 Finish rio format and tests.  | 
220  | 
if line[0] == '\t': # continues previous value  | 
221  | 
if tag is None:  | 
|
222  | 
raise ValueError('invalid continuation line %r' % real_l)  | 
|
223  | 
accum_value += '\n' + line[1:-1]  | 
|
224  | 
else: # new tag:value line  | 
|
225  | 
if tag is not None:  | 
|
226  | 
stanza.add(tag, accum_value)  | 
|
227  | 
try:  | 
|
228  | 
colon_index = line.index(': ')  | 
|
229  | 
except ValueError:  | 
|
230  | 
raise ValueError('tag/value separator not found in line %r' % real_l)  | 
|
231  | 
tag = line[:colon_index]  | 
|
232  | 
assert valid_tag(tag), \  | 
|
233  | 
"invalid rio tag %r" % tag  | 
|
234  | 
accum_value = line[colon_index+2:-1]  | 
|
235  | 
if tag is not None: # add last tag-value  | 
|
236  | 
stanza.add(tag, accum_value)  | 
|
237  | 
return stanza  | 
|
238  | 
else: # didn't see any content  | 
|
239  | 
return None  |