13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
17
# \subsection{\emph{rio} - simple text metaformat}
19
19
# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
21
21
# The stored data consists of a series of \emph{stanzas}, each of which contains
22
22
# \emph{fields} identified by an ascii name, with Unicode or string contents.
23
# The field tag is constrained to alphanumeric characters.
23
# The field tag is constrained to alphanumeric characters.
24
24
# There may be more than one field in a stanza with the same name.
26
26
# The format itself does not deal with character encoding issues, though
27
27
# the result will normally be written in Unicode.
29
29
# The format is intended to be simple enough that there is exactly one character
30
30
# stream representation of an object and vice versa, and that this relation
31
31
# will continue to hold for future versions of bzr.
36
from .iterablefile import IterableFile
35
from bzrlib.iterablefile import IterableFile
38
37
# XXX: some redundancy is allowing to write stanzas in isolation as well as
39
# through a writer object.
38
# through a writer object.
42
40
class RioWriter(object):
44
41
def __init__(self, to_file):
45
42
self._soft_nl = False
46
43
self._to_file = to_file
48
45
def write_stanza(self, stanza):
50
self._to_file.write(b'\n')
51
48
stanza.write(self._to_file)
52
49
self._soft_nl = True
55
52
class RioReader(object):
56
53
"""Read stanzas from a file as a sequence
58
to_file can be anything that can be enumerated as a sequence of
55
to_file can be anything that can be enumerated as a sequence of
59
56
lines (with newlines.)
62
58
def __init__(self, from_file):
63
59
self._from_file = from_file
89
85
def read_stanzas(from_file):
92
87
s = read_stanza(from_file)
98
def read_stanzas_unicode(from_file):
101
s = read_stanza_unicode(from_file)
107
93
class Stanza(object):
108
94
"""One stanza for rio.
110
Each stanza contains a set of named fields.
96
Each stanza contains a set of named fields.
112
98
Names must be non-empty ascii alphanumeric plus _. Names can be repeated
113
99
within a stanza. Names are case-sensitive. The ordering of fields is
131
117
def add(self, tag, value):
132
118
"""Append a name and value to the stanza."""
133
if not valid_tag(tag):
134
raise ValueError("invalid tag %r" % (tag,))
135
if isinstance(value, bytes):
136
value = value.decode('ascii')
137
elif isinstance(value, str):
119
assert valid_tag(tag), \
120
("invalid tag %r" % tag)
121
if isinstance(value, str):
122
value = unicode(value)
123
elif isinstance(value, unicode):
125
## elif isinstance(value, (int, long)):
126
## value = str(value) # XXX: python2.4 without L-suffix
140
128
raise TypeError("invalid type for rio value: %r of type %s"
141
129
% (value, type(value)))
142
130
self.items.append((tag, value))
145
def from_pairs(cls, pairs):
150
132
def __contains__(self, find_tag):
151
133
"""True if there is any field in this stanza with the given tag."""
152
134
for tag, value in self.items:
176
158
def to_lines(self):
177
159
"""Generate sequence of lines for external version of this file.
179
161
The lines are always utf-8 encoded strings.
181
163
if not self.items:
182
164
# max() complains if sequence is empty
185
for text_tag, text_value in self.items:
186
tag = text_tag.encode('ascii')
187
value = text_value.encode('utf-8', 'surrogateescape')
189
result.append(tag + b': \n')
167
for tag, value in self.items:
168
assert isinstance(tag, str), type(tag)
169
assert isinstance(value, unicode)
171
result.append(tag + ': \n')
191
173
# don't want splitlines behaviour on empty lines
192
val_lines = value.split(b'\n')
193
result.append(tag + b': ' + val_lines[0] + b'\n')
174
val_lines = value.split('\n')
175
result.append(tag + ': ' + val_lines[0].encode('utf-8') + '\n')
194
176
for line in val_lines[1:]:
195
result.append(b'\t' + line + b'\n')
177
result.append('\t' + line.encode('utf-8') + '\n')
197
result.append(tag + b': ' + value + b'\n')
179
result.append(tag + ': ' + value.encode('utf-8') + '\n')
200
182
def to_string(self):
201
183
"""Return stanza as a single string"""
202
return b''.join(self.to_lines())
184
return ''.join(self.to_lines())
204
186
def to_unicode(self):
205
187
"""Return stanza as a single Unicode string.
213
195
for tag, value in self.items:
215
result.append(tag + u': \n')
197
result.append(tag + ': \n')
217
199
# don't want splitlines behaviour on empty lines
218
val_lines = value.split(u'\n')
219
result.append(tag + u': ' + val_lines[0] + u'\n')
200
val_lines = value.split('\n')
201
result.append(tag + ': ' + val_lines[0] + '\n')
220
202
for line in val_lines[1:]:
221
result.append(u'\t' + line + u'\n')
203
result.append('\t' + line + '\n')
223
result.append(tag + u': ' + value + u'\n')
205
result.append(tag + ': ' + value + '\n')
224
206
return u''.join(result)
226
208
def write(self, to_file):
255
237
for tag, value in self.items:
242
_tag_re = re.compile(r'^[-a-zA-Z0-9_]+$')
260
243
def valid_tag(tag):
261
return _valid_tag(tag)
244
return bool(_tag_re.match(tag))
264
247
def read_stanza(line_iter):
265
248
"""Return new Stanza read from list of lines or a file
267
250
Returns one Stanza that was read, or returns None at end of file. If a
268
251
blank line follows the stanza, it is consumed. It's not an error for
269
252
there to be no blank at end of file. If there is a blank file at the
270
start of the input this is really an empty stanza and that is returned.
253
start of the input this is really an empty stanza and that is returned.
272
255
Only the stanza lines and the trailing blank (if any) are consumed
273
256
from the line_iter.
275
258
The raw lines must be in utf-8 encoding.
277
return _read_stanza_utf8(line_iter)
260
unicode_iter = (line.decode('utf-8') for line in line_iter)
261
return read_stanza_unicode(unicode_iter)
280
264
def read_stanza_unicode(unicode_iter):
294
278
:return: A Stanza object if there are any lines in the file.
297
return _read_stanza_unicode(unicode_iter)
285
# TODO: jam 20060922 This code should raise real errors rather than
286
# using 'assert' to process user input, or raising ValueError
287
# rather than a more specific error.
289
for line in unicode_iter:
290
if line is None or line == '':
293
break # end of stanza
294
assert line.endswith('\n')
296
if line[0] == '\t': # continues previous value
298
raise ValueError('invalid continuation line %r' % real_l)
299
accum_value += '\n' + line[1:-1]
300
else: # new tag:value line
302
stanza.add(tag, accum_value)
304
colon_index = line.index(': ')
306
raise ValueError('tag/value separator not found in line %r'
308
tag = str(line[:colon_index])
309
assert valid_tag(tag), \
310
"invalid rio tag %r" % tag
311
accum_value = line[colon_index+2:-1]
313
if tag is not None: # add last tag-value
314
stanza.add(tag, accum_value)
316
else: # didn't see any content
300
320
def to_patch_lines(stanza, max_width=72):
307
327
:param max_width: The maximum number of characters per physical line.
308
328
:return: a list of lines
311
raise ValueError(max_width)
312
331
max_rio_width = max_width - 4
314
333
for pline in stanza.to_lines():
315
for line in pline.split(b'\n')[:-1]:
316
line = re.sub(b'\\\\', b'\\\\\\\\', line)
334
for line in pline.split('\n')[:-1]:
335
line = re.sub('\\\\', '\\\\\\\\', line)
317
336
while len(line) > 0:
318
337
partline = line[:max_rio_width]
319
338
line = line[max_rio_width:]
320
if len(line) > 0 and line[:1] != [b' ']:
339
if len(line) > 0 and line[0] != [' ']:
322
break_index = partline.rfind(b' ', -20)
341
break_index = partline.rfind(' ', -20)
323
342
if break_index < 3:
324
break_index = partline.rfind(b'-', -20)
343
break_index = partline.rfind('-', -20)
326
345
if break_index < 3:
327
break_index = partline.rfind(b'/', -20)
346
break_index = partline.rfind('/', -20)
328
347
if break_index >= 3:
329
348
line = partline[break_index:] + line
330
349
partline = partline[:break_index]
331
350
if len(line) > 0:
333
partline = re.sub(b'\r', b'\\\\r', partline)
352
partline = re.sub('\r', '\\\\r', partline)
334
353
blank_line = False
335
354
if len(line) > 0:
337
elif re.search(b' $', partline):
356
elif re.search(' $', partline):
339
358
blank_line = True
340
lines.append(b'# ' + partline + b'\n')
359
lines.append('# ' + partline + '\n')
342
lines.append(b'# \n')
346
365
def _patch_stanza_iter(line_iter):
347
map = {b'\\\\': b'\\',
351
369
def mapget(match):
352
370
return map[match.group(0)]
355
373
for line in line_iter:
356
if line.startswith(b'# '):
374
if line.startswith('# '):
358
elif line.startswith(b'#'):
377
assert line.startswith('#')
361
raise ValueError("bad line %r" % (line,))
362
379
if last_line is not None and len(line) > 2:
364
line = re.sub(b'\r', b'', line)
365
line = re.sub(b'\\\\(.|\n)', mapget, line)
381
line = re.sub('\r', '', line)
382
line = re.sub('\\\\(.|\n)', mapget, line)
366
383
if last_line is None:
369
386
last_line += line
370
if last_line[-1:] == b'\n':
387
if last_line[-1] == '\n':
373
390
if last_line is not None: