/brz/remove-bazaar : contents of breezy/utextwrap.py at revision 7166

: (revision 7166)

To get this branch, use:

bzr branch
http://gegoxaren.bato24.eu/bzr/brz/remove-bazaar

5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	1	# Copyright (C) 2011 Canonical Ltd
	2	#
5820.1.18 by INADA Naoki Add copyright for some function.	3	# UTextWrapper._handle_long_word, UTextWrapper._wrap_chunks,
5820.1.27 by INADA Naoki Fix error when fix_sentence_endings=True.	4	# UTextWrapper._fix_sentence_endings, wrap and fill is copied from Python's
	5	# textwrap module (under PSF license) and modified for support CJK.
5820.1.18 by INADA Naoki Add copyright for some function.	6	# Original Copyright for these functions:
	7	#
	8	# Copyright (C) 1999-2001 Gregory P. Ward.
	9	# Copyright (C) 2002, 2003 Python Software Foundation.
	10	#
	11	# Written by Greg Ward <gward@python.net>
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	12	# This program is free software; you can redistribute it and/or modify
	13	# it under the terms of the GNU General Public License as published by
	14	# the Free Software Foundation; either version 2 of the License, or
	15	# (at your option) any later version.
	16	#
	17	# This program is distributed in the hope that it will be useful,
	18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	# GNU General Public License for more details.
	21	#
	22	# You should have received a copy of the GNU General Public License
	23	# along with this program; if not, write to the Free Software
	24	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	25
6379.6.3 by Jelmer Vernooij Use absolute_import.	26	from __future__ import absolute_import
6379.6.3 by Jelmer Vernooij Use absolute_import.	27
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	28	import textwrap
	29	from unicodedata import east_asian_width as _eawidth
	30
6624 by Jelmer Vernooĳ Merge Python3 porting work ('py3 pokes')	31	from . import osutils
5820.1.10 by INADA Naoki utextwrap: Change a way to split between CJK characters.	32
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	33	__all__ = ["UTextWrapper", "fill", "wrap"]
	34
	35	class UTextWrapper(textwrap.TextWrapper):
	36	"""
	37	Extend TextWrapper for Unicode.
	38
	39	This textwrapper handles east asian double width and split word
	40	even if !break_long_words when word contains double width
	41	characters.
5820.1.19 by INADA Naoki Add keyword parameter 'ambiguous_width' that specifies width for character	42
	43	:param ambiguous_width: (keyword argument) width for character when
	44	unicodedata.east_asian_width(c) == 'A'
5820.1.21 by INADA Naoki Change default value of ambiguous_width from 2 to 1.	45	(default: 1)
5820.1.22 by INADA Naoki Add document of some limitations in docstring.	46
	47	Limitations:
	48	* expand_tabs doesn't fixed. It uses len() for calculating width
	49	of string on left of TAB.
	50	* Handles one codeunit as a single character having 1 or 2 width.
	51	This is not correct when there are surrogate pairs, combined
	52	characters or zero-width characters.
	53	* Treats all asian character are line breakable. But it is not
	54	true because line breaking is prohibited around some characters.
	55	(For example, breaking before punctation mark is prohibited.)
	56	See UAX # 14 "UNICODE LINE BREAKING ALGORITHM"
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	57	"""
5820.1.15 by Martin Cope with lack of TextWrapper.drop_whitespace before Python 2.6	58
5820.1.9 by INADA Naoki Default width of UTextWrapper is also osutils.terminal_widtth() and	59	def __init__(self, width=None, **kwargs):
	60	if width is None:
	61	width = (osutils.terminal_width() or
	62	osutils.default_terminal_width) - 1
5820.1.19 by INADA Naoki Add keyword parameter 'ambiguous_width' that specifies width for character	63
5820.1.21 by INADA Naoki Change default value of ambiguous_width from 2 to 1.	64	ambi_width = kwargs.pop('ambiguous_width', 1)
5820.1.19 by INADA Naoki Add keyword parameter 'ambiguous_width' that specifies width for character	65	if ambi_width == 1:
	66	self._east_asian_doublewidth = 'FW'
	67	elif ambi_width == 2:
	68	self._east_asian_doublewidth = 'FWA'
	69	else:
	70	raise ValueError("ambiguous_width should be 1 or 2")
	71
7112.1.2 by Jelmer Vernooĳ Fix test on python2.	72	self.max_lines = kwargs.get('max_lines', None)
5820.1.9 by INADA Naoki Default width of UTextWrapper is also osutils.terminal_widtth() and	73	textwrap.TextWrapper.__init__(self, width, **kwargs)
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	74
5820.1.19 by INADA Naoki Add keyword parameter 'ambiguous_width' that specifies width for character	75	def _unicode_char_width(self, uc):
	76	"""Return width of character `uc`.
	77
	78	:param: uc Single unicode character.
	79	"""
	80	# 'A' means width of the character is not be able to determine.
	81	# We assume that it's width is 2 because longer wrap may over
	82	# terminal width but shorter wrap may be acceptable.
	83	return (_eawidth(uc) in self._east_asian_doublewidth and 2) or 1
	84
	85	def _width(self, s):
	86	"""Returns width for s.
5820.1.26 by INADA Naoki Cleanup. Remove spaces in empty line and shorten line having 80 characters.	87
5820.1.19 by INADA Naoki Add keyword parameter 'ambiguous_width' that specifies width for character	88	When s is unicode, take care of east asian width.
	89	When s is bytes, treat all byte is single width character.
	90	"""
	91	charwidth = self._unicode_char_width
	92	return sum(charwidth(c) for c in s)
	93
	94	def _cut(self, s, width):
	95	"""Returns head and rest of s. (head+rest == s)
	96
	97	Head is large as long as _width(head) <= width.
	98	"""
	99	w = 0
	100	charwidth = self._unicode_char_width
	101	for pos, c in enumerate(s):
	102	w += charwidth(c)
	103	if w > width:
	104	return s[:pos], s[pos:]
	105	return s, u''
	106
5820.1.27 by INADA Naoki Fix error when fix_sentence_endings=True.	107	def _fix_sentence_endings(self, chunks):
	108	"""_fix_sentence_endings(chunks : [string])
	109
	110	Correct for sentence endings buried in 'chunks'. Eg. when the
	111	original text contains "... foo.\nBar ...", munge_whitespace()
	112	and split() will convert that to [..., "foo.", " ", "Bar", ...]
	113	which has one too few spaces; this method simply changes the one
	114	space to two.
	115
	116	Note: This function is copied from textwrap.TextWrap and modified
	117	to use unicode always.
	118	"""
	119	i = 0
	120	L = len(chunks)-1
	121	patsearch = self.sentence_end_re.search
	122	while i < L:
	123	if chunks[i+1] == u" " and patsearch(chunks[i]):
	124	chunks[i+1] = u" "
	125	i += 2
	126	else:
	127	i += 1
	128
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	129	def _handle_long_word(self, chunks, cur_line, cur_len, width):
5820.1.10 by INADA Naoki utextwrap: Change a way to split between CJK characters.	130	# Figure out when indent is larger than the specified width, and make
	131	# sure at least one character is stripped off on every pass
	132	if width < 2:
5820.1.19 by INADA Naoki Add keyword parameter 'ambiguous_width' that specifies width for character	133	space_left = chunks[-1] and self._width(chunks[-1][0]) or 1
5820.1.10 by INADA Naoki utextwrap: Change a way to split between CJK characters.	134	else:
	135	space_left = width - cur_len
	136
	137	# If we're allowed to break long words, then do so: put as much
	138	# of the next chunk onto the current line as will fit.
	139	if self.break_long_words:
5820.1.19 by INADA Naoki Add keyword parameter 'ambiguous_width' that specifies width for character	140	head, rest = self._cut(chunks[-1], space_left)
5820.1.10 by INADA Naoki utextwrap: Change a way to split between CJK characters.	141	cur_line.append(head)
5820.1.5 by INADA Naoki Make UTextWrapper support byte string and add tests including Python's	142	if rest:
5820.1.10 by INADA Naoki utextwrap: Change a way to split between CJK characters.	143	chunks[-1] = rest
	144	else:
	145	del chunks[-1]
	146
	147	# Otherwise, we have to preserve the long word intact. Only add
	148	# it to the current line if there's nothing already there --
	149	# that minimizes how much we violate the width constraint.
	150	elif not cur_line:
	151	cur_line.append(chunks.pop())
	152
	153	# If we're not allowed to break long words, and there's already
	154	# text on the current line, do nothing. Next time through the
	155	# main loop of _wrap_chunks(), we'll wind up here again, but
	156	# cur_len will be zero, so the next line will be entirely
	157	# devoted to the long word that we can't handle right now.
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	158
	159	def _wrap_chunks(self, chunks):
	160	lines = []
	161	if self.width <= 0:
	162	raise ValueError("invalid width %r (must be > 0)" % self.width)
7112.1.1 by Jelmer Vernooĳ Fix utextwrap tests on Python 3.5.	163	if self.max_lines is not None:
	164	if self.max_lines > 1:
	165	indent = self.subsequent_indent
	166	else:
	167	indent = self.initial_indent
	168	if self._width(indent) + self._width(self.placeholder.lstrip()) > self.width:
	169	raise ValueError("placeholder too large for max width")
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	170
	171	# Arrange in reverse order so items can be efficiently popped
	172	# from a stack of chucks.
	173	chunks.reverse()
	174
	175	while chunks:
	176
	177	# Start the list of chunks that will make up the current line.
	178	# cur_len is just the length of all the chunks in cur_line.
	179	cur_line = []
	180	cur_len = 0
	181
	182	# Figure out which static string will prefix this line.
	183	if lines:
	184	indent = self.subsequent_indent
	185	else:
	186	indent = self.initial_indent
	187
	188	# Maximum width for this line.
	189	width = self.width - len(indent)
	190
	191	# First chunk on line is whitespace -- drop it, unless this
	192	# is the very beginning of the text (ie. no lines started yet).
	193	if self.drop_whitespace and chunks[-1].strip() == '' and lines:
	194	del chunks[-1]
	195
	196	while chunks:
	197	# Use _width instead of len for east asian width
5820.1.19 by INADA Naoki Add keyword parameter 'ambiguous_width' that specifies width for character	198	l = self._width(chunks[-1])
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	199
	200	# Can at least squeeze this chunk onto the current line.
	201	if cur_len + l <= width:
	202	cur_line.append(chunks.pop())
	203	cur_len += l
	204
	205	# Nope, this line is full.
	206	else:
	207	break
	208
	209	# The current line is full, and the next chunk is too big to
	210	# fit on any line (not just this one).
5820.1.19 by INADA Naoki Add keyword parameter 'ambiguous_width' that specifies width for character	211	if chunks and self._width(chunks[-1]) > width:
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	212	self._handle_long_word(chunks, cur_line, cur_len, width)
7112.1.1 by Jelmer Vernooĳ Fix utextwrap tests on Python 3.5.	213	cur_len = sum(map(len, cur_line))
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	214
	215	# If the last chunk on this line is all whitespace, drop it.
5820.1.26 by INADA Naoki Cleanup. Remove spaces in empty line and shorten line having 80 characters.	216	if self.drop_whitespace and cur_line and not cur_line[-1].strip():
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	217	del cur_line[-1]
	218
	219	# Convert current line back to a string and store it in list
	220	# of all lines (return value).
	221	if cur_line:
7112.1.1 by Jelmer Vernooĳ Fix utextwrap tests on Python 3.5.	222	if (self.max_lines is None or
	223	len(lines) + 1 < self.max_lines or
	224	(not chunks or
	225	self.drop_whitespace and
	226	len(chunks) == 1 and
	227	not chunks[0].strip()) and cur_len <= width):
	228	# Convert current line back to a string and store it in
	229	# list of all lines (return value).
	230	lines.append(indent + u''.join(cur_line))
	231	else:
	232	while cur_line:
	233	if (cur_line[-1].strip() and
	234	cur_len + self._width(self.placeholder) <= width):
	235	cur_line.append(self.placeholder)
	236	lines.append(indent + ''.join(cur_line))
	237	break
	238	cur_len -= self._width(cur_line[-1])
	239	del cur_line[-1]
	240	else:
	241	if lines:
	242	prev_line = lines[-1].rstrip()
	243	if (self._width(prev_line) + self._width(self.placeholder) <=
	244	self.width):
	245	lines[-1] = prev_line + self.placeholder
	246	break
	247	lines.append(indent + self.placeholder.lstrip())
	248	break
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	249
	250	return lines
	251
5820.1.10 by INADA Naoki utextwrap: Change a way to split between CJK characters.	252	def _split(self, text):
7012.2.1 by Jelmer Vernooĳ Always pass in unicode to utextwrap.	253	chunks = textwrap.TextWrapper._split(self, osutils.safe_unicode(text))
5820.1.10 by INADA Naoki utextwrap: Change a way to split between CJK characters.	254	cjk_split_chunks = []
	255	for chunk in chunks:
	256	prev_pos = 0
	257	for pos, char in enumerate(chunk):
5820.1.27 by INADA Naoki Fix error when fix_sentence_endings=True.	258	if self._unicode_char_width(char) == 2:
5820.1.10 by INADA Naoki utextwrap: Change a way to split between CJK characters.	259	if prev_pos < pos:
	260	cjk_split_chunks.append(chunk[prev_pos:pos])
	261	cjk_split_chunks.append(char)
	262	prev_pos = pos+1
	263	if prev_pos < len(chunk):
	264	cjk_split_chunks.append(chunk[prev_pos:])
	265	return cjk_split_chunks
	266
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	267	def wrap(self, text):
	268	# ensure text is unicode
7112.1.2 by Jelmer Vernooĳ Fix test on python2.	269	return textwrap.TextWrapper.wrap(self, osutils.safe_unicode(text))
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	270
	271	# -- Convenience interface ---------------------------------------------
	272
5820.1.2 by INADA Naoki bzrlib.utextwrap uses bzrlib.osutils.terminal_width() when width is not specified.	273	def wrap(text, width=None, **kwargs):
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	274	"""Wrap a single paragraph of text, returning a list of wrapped lines.
	275
	276	Reformat the single paragraph in 'text' so it fits in lines of no
	277	more than 'width' columns, and return a list of wrapped lines. By
	278	default, tabs in 'text' are expanded with string.expandtabs(), and
	279	all other whitespace characters (including newline) are converted to
	280	space. See TextWrapper class for available keyword args to customize
	281	wrapping behaviour.
	282	"""
5820.1.9 by INADA Naoki Default width of UTextWrapper is also osutils.terminal_widtth() and	283	return UTextWrapper(width=width, **kwargs).wrap(text)
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	284
5820.1.2 by INADA Naoki bzrlib.utextwrap uses bzrlib.osutils.terminal_width() when width is not specified.	285	def fill(text, width=None, **kwargs):
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	286	"""Fill a single paragraph of text, returning a new string.
	287
	288	Reformat the single paragraph in 'text' to fit in lines of no more
	289	than 'width' columns, and return a new string containing the entire
	290	wrapped paragraph. As with wrap(), tabs are expanded and other
	291	whitespace characters converted to space. See TextWrapper class for
	292	available keyword args to customize wrapping behaviour.
	293	"""
5820.1.9 by INADA Naoki Default width of UTextWrapper is also osutils.terminal_widtth() and	294	return UTextWrapper(width=width, **kwargs).fill(text)
5820.1.1 by INADA Naoki Add utextwrap that is same to textwrap but supports double width characters in east asia.	295