1
# Copyright (C) 2011 Canonical Ltd
3
# UTextWrapper._handle_long_word, UTextWrapper._wrap_chunks,
4
# UTextWrapper._fix_sentence_endings, wrap and fill is copied from Python's
5
# textwrap module (under PSF license) and modified for support CJK.
6
# Original Copyright for these functions:
8
# Copyright (C) 1999-2001 Gregory P. Ward.
9
# Copyright (C) 2002, 2003 Python Software Foundation.
11
# Written by Greg Ward <gward@python.net>
12
# This program is free software; you can redistribute it and/or modify
13
# it under the terms of the GNU General Public License as published by
14
# the Free Software Foundation; either version 2 of the License, or
15
# (at your option) any later version.
17
# This program is distributed in the hope that it will be useful,
18
# but WITHOUT ANY WARRANTY; without even the implied warranty of
19
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
# GNU General Public License for more details.
22
# You should have received a copy of the GNU General Public License
23
# along with this program; if not, write to the Free Software
24
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26
from __future__ import absolute_import
29
from unicodedata import east_asian_width as _eawidth
33
__all__ = ["UTextWrapper", "fill", "wrap"]
36
class UTextWrapper(textwrap.TextWrapper):
38
Extend TextWrapper for Unicode.
40
This textwrapper handles east asian double width and split word
41
even if !break_long_words when word contains double width
44
:param ambiguous_width: (keyword argument) width for character when
45
unicodedata.east_asian_width(c) == 'A'
49
* expand_tabs doesn't fixed. It uses len() for calculating width
50
of string on left of TAB.
51
* Handles one codeunit as a single character having 1 or 2 width.
52
This is not correct when there are surrogate pairs, combined
53
characters or zero-width characters.
54
* Treats all asian character are line breakable. But it is not
55
true because line breaking is prohibited around some characters.
56
(For example, breaking before punctation mark is prohibited.)
57
See UAX # 14 "UNICODE LINE BREAKING ALGORITHM"
60
def __init__(self, width=None, **kwargs):
62
width = (osutils.terminal_width() or
63
osutils.default_terminal_width) - 1
65
ambi_width = kwargs.pop('ambiguous_width', 1)
67
self._east_asian_doublewidth = 'FW'
69
self._east_asian_doublewidth = 'FWA'
71
raise ValueError("ambiguous_width should be 1 or 2")
73
self.max_lines = kwargs.get('max_lines', None)
74
textwrap.TextWrapper.__init__(self, width, **kwargs)
76
def _unicode_char_width(self, uc):
77
"""Return width of character `uc`.
79
:param: uc Single unicode character.
81
# 'A' means width of the character is not be able to determine.
82
# We assume that it's width is 2 because longer wrap may over
83
# terminal width but shorter wrap may be acceptable.
84
return (_eawidth(uc) in self._east_asian_doublewidth and 2) or 1
87
"""Returns width for s.
89
When s is unicode, take care of east asian width.
90
When s is bytes, treat all byte is single width character.
92
charwidth = self._unicode_char_width
93
return sum(charwidth(c) for c in s)
95
def _cut(self, s, width):
96
"""Returns head and rest of s. (head+rest == s)
98
Head is large as long as _width(head) <= width.
101
charwidth = self._unicode_char_width
102
for pos, c in enumerate(s):
105
return s[:pos], s[pos:]
108
def _fix_sentence_endings(self, chunks):
109
"""_fix_sentence_endings(chunks : [string])
111
Correct for sentence endings buried in 'chunks'. Eg. when the
112
original text contains "... foo.\nBar ...", munge_whitespace()
113
and split() will convert that to [..., "foo.", " ", "Bar", ...]
114
which has one too few spaces; this method simply changes the one
117
Note: This function is copied from textwrap.TextWrap and modified
118
to use unicode always.
122
patsearch = self.sentence_end_re.search
124
if chunks[i + 1] == u" " and patsearch(chunks[i]):
130
def _handle_long_word(self, chunks, cur_line, cur_len, width):
131
# Figure out when indent is larger than the specified width, and make
132
# sure at least one character is stripped off on every pass
134
space_left = chunks[-1] and self._width(chunks[-1][0]) or 1
136
space_left = width - cur_len
138
# If we're allowed to break long words, then do so: put as much
139
# of the next chunk onto the current line as will fit.
140
if self.break_long_words:
141
head, rest = self._cut(chunks[-1], space_left)
142
cur_line.append(head)
148
# Otherwise, we have to preserve the long word intact. Only add
149
# it to the current line if there's nothing already there --
150
# that minimizes how much we violate the width constraint.
152
cur_line.append(chunks.pop())
154
# If we're not allowed to break long words, and there's already
155
# text on the current line, do nothing. Next time through the
156
# main loop of _wrap_chunks(), we'll wind up here again, but
157
# cur_len will be zero, so the next line will be entirely
158
# devoted to the long word that we can't handle right now.
160
def _wrap_chunks(self, chunks):
163
raise ValueError("invalid width %r (must be > 0)" % self.width)
164
if self.max_lines is not None:
165
if self.max_lines > 1:
166
indent = self.subsequent_indent
168
indent = self.initial_indent
169
if self._width(indent) + self._width(self.placeholder.lstrip()) > self.width:
170
raise ValueError("placeholder too large for max width")
172
# Arrange in reverse order so items can be efficiently popped
173
# from a stack of chucks.
178
# Start the list of chunks that will make up the current line.
179
# cur_len is just the length of all the chunks in cur_line.
183
# Figure out which static string will prefix this line.
185
indent = self.subsequent_indent
187
indent = self.initial_indent
189
# Maximum width for this line.
190
width = self.width - len(indent)
192
# First chunk on line is whitespace -- drop it, unless this
193
# is the very beginning of the text (ie. no lines started yet).
194
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
198
# Use _width instead of len for east asian width
199
l = self._width(chunks[-1])
201
# Can at least squeeze this chunk onto the current line.
202
if cur_len + l <= width:
203
cur_line.append(chunks.pop())
206
# Nope, this line is full.
210
# The current line is full, and the next chunk is too big to
211
# fit on *any* line (not just this one).
212
if chunks and self._width(chunks[-1]) > width:
213
self._handle_long_word(chunks, cur_line, cur_len, width)
214
cur_len = sum(map(len, cur_line))
216
# If the last chunk on this line is all whitespace, drop it.
217
if self.drop_whitespace and cur_line and not cur_line[-1].strip():
220
# Convert current line back to a string and store it in list
221
# of all lines (return value).
223
if (self.max_lines is None or
224
len(lines) + 1 < self.max_lines or
226
self.drop_whitespace and
228
not chunks[0].strip()) and cur_len <= width):
229
# Convert current line back to a string and store it in
230
# list of all lines (return value).
231
lines.append(indent + u''.join(cur_line))
234
if (cur_line[-1].strip() and
235
cur_len + self._width(self.placeholder) <= width):
236
cur_line.append(self.placeholder)
237
lines.append(indent + ''.join(cur_line))
239
cur_len -= self._width(cur_line[-1])
243
prev_line = lines[-1].rstrip()
244
if (self._width(prev_line) + self._width(self.placeholder) <=
246
lines[-1] = prev_line + self.placeholder
248
lines.append(indent + self.placeholder.lstrip())
253
def _split(self, text):
254
chunks = textwrap.TextWrapper._split(self, osutils.safe_unicode(text))
255
cjk_split_chunks = []
258
for pos, char in enumerate(chunk):
259
if self._unicode_char_width(char) == 2:
261
cjk_split_chunks.append(chunk[prev_pos:pos])
262
cjk_split_chunks.append(char)
264
if prev_pos < len(chunk):
265
cjk_split_chunks.append(chunk[prev_pos:])
266
return cjk_split_chunks
268
def wrap(self, text):
269
# ensure text is unicode
270
return textwrap.TextWrapper.wrap(self, osutils.safe_unicode(text))
272
# -- Convenience interface ---------------------------------------------
275
def wrap(text, width=None, **kwargs):
276
"""Wrap a single paragraph of text, returning a list of wrapped lines.
278
Reformat the single paragraph in 'text' so it fits in lines of no
279
more than 'width' columns, and return a list of wrapped lines. By
280
default, tabs in 'text' are expanded with string.expandtabs(), and
281
all other whitespace characters (including newline) are converted to
282
space. See TextWrapper class for available keyword args to customize
285
return UTextWrapper(width=width, **kwargs).wrap(text)
288
def fill(text, width=None, **kwargs):
289
"""Fill a single paragraph of text, returning a new string.
291
Reformat the single paragraph in 'text' to fit in lines of no more
292
than 'width' columns, and return a new string containing the entire
293
wrapped paragraph. As with wrap(), tabs are expanded and other
294
whitespace characters converted to space. See TextWrapper class for
295
available keyword args to customize wrapping behaviour.
297
return UTextWrapper(width=width, **kwargs).fill(text)