1
# Copyright (C) 2006 Canonical Ltd
2
# -*- coding: utf-8 -*-
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
"""Adapter for running test cases against multiple encodings."""
20
from copy import deepcopy
22
from bzrlib.tests import TestSuite
25
# prefix for micro (1/1000000)
28
# greek letter omega, not to be confused with
29
# the Ohm sign, u'\u2126'. Though they are probably identical
30
# cp437 can handle the first, but not the second
33
# smallest error possible, epsilon
34
# cp437 handles u03b5, but not u2208 the 'element of' operator
38
_erik = u'Erik B\xe5gfors'
40
# Swedish 'räksmörgås' means shrimp sandwich
41
_shrimp_sandwich = u'r\xe4ksm\xf6rg\xe5s'
43
# Arabic, probably only Unicode encodings can handle this one
44
_juju = u'\u062c\u0648\u062c\u0648'
46
# iso-8859-1 alternative for juju
47
_juju_alt = u'j\xfbj\xfa'
49
# Russian, 'Alexander' in russian
50
_alexander = u'\u0410\u043b\u0435\u043a\u0441\u0430\u043d\u0434\u0440'
51
# The word 'test' in Russian
52
_russian_test = u'\u0422\u0435\u0441\u0442'
55
# It is a kanji sequence for nihonjin, or Japanese in English.
57
# '\u4eba' being person, 'u\65e5' sun and '\u672c' origin. Ie,
58
# sun-origin-person, 'native from the land where the sun rises'. Note, I'm
59
# not a fluent speaker, so this is just my crude breakdown.
62
_nihonjin = u'\u65e5\u672c\u4eba'
65
# It's what is usually used for showing how fonts look, because it contains
66
# most accented characters, ie. in places where Englishman use 'Quick brown fox
67
# jumped over a lazy dog'. The literal translation of the Czech version would
68
# be something like 'Yellow horse groaned devilish codes'. Actually originally
69
# the last word used to be 'ódy' (odes). The 'k' was added as a pun when using
70
# the sentece to check whether one has properly set encoding.
71
_yellow_horse = (u'\u017dlu\u0165ou\u010dk\xfd k\u016f\u0148'
72
u' \xfap\u011bl \u010f\xe1belsk\xe9 k\xf3dy')
73
_yellow = u'\u017dlu\u0165ou\u010dk\xfd'
74
_someone = u'Some\u016f\u0148\u011b'
75
_something = u'\u0165ou\u010dk\xfd'
78
# Shalom -> 'hello' or 'peace', used as a common greeting
79
_shalom = u'\u05e9\u05dc\u05d5\u05dd'
82
encoding_scenarios = [
83
# Permutation 1 of utf-8
84
('utf-8,1', {'committer':_erik
85
, 'message':_yellow_horse
86
, 'filename':_shrimp_sandwich
87
, 'directory':_nihonjin}),
88
# Permutation 2 of utf-8
89
('utf-8,2', {'committer':_alexander
90
, 'message':u'Testing ' + _mu
92
, 'directory':_juju}),
93
('iso-8859-1', {'committer':_erik
94
, 'message':u'Testing ' + _mu
95
, 'filename':_juju_alt
96
, 'directory':_shrimp_sandwich}),
97
('iso-8859-2', {'committer':_someone
98
, 'message':_yellow_horse
100
, 'directory':_something}),
101
('cp1251', {'committer':_alexander
102
, 'message':u'Testing ' + _mu
103
, 'filename':_russian_test
104
, 'directory':_russian_test + 'dir'}),
105
# The iso-8859-1 tests run on a default windows cp437 installation
106
# and it takes a long time to run an extra permutation of the tests
107
# But just in case we want to add this back in:
108
# ('cp437', {'committer':_erik
109
# , 'message':u'Testing ' + _mu
110
# , 'filename':'file_' + _omega
111
# , 'directory':_epsilon + '_dir'}),