1
# Copyright (C) 2006, 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Black-box tests for bzr handling non-ascii characters."""
22
from bzrlib import osutils, urlutils
23
from bzrlib.tests import TestCaseWithTransport, TestSkipped
24
from bzrlib.trace import mutter, note
27
class TestNonAscii(TestCaseWithTransport):
28
"""Test that bzr handles files/committers/etc which are non-ascii."""
31
super(TestNonAscii, self).setUp()
32
self._orig_email = os.environ.get('BZR_EMAIL', None)
33
self._orig_encoding = osutils._cached_user_encoding
35
osutils._cached_user_encoding = self.encoding
36
email = self.info['committer'] + ' <joe@foo.com>'
37
os.environ['BZR_EMAIL'] = email.encode(osutils.get_user_encoding())
41
if self._orig_email is not None:
42
os.environ['BZR_EMAIL'] = self._orig_email
44
if os.environ.get('BZR_EMAIL', None) is not None:
45
del os.environ['BZR_EMAIL']
46
osutils._cached_user_encoding = self._orig_encoding
47
super(TestNonAscii, self).tearDown()
49
def run_bzr_decode(self, args, encoding=None, fail=False, retcode=None,
51
"""Run bzr and decode the output into a particular encoding.
53
Returns a string containing the stdout output from bzr.
55
:param fail: If true, the operation is expected to fail with
59
encoding = osutils.get_user_encoding()
61
out = self.run_bzr(args, output_encoding=encoding, encoding=encoding,
62
retcode=retcode, working_dir=working_dir)[0]
63
return out.decode(encoding)
64
except UnicodeError, e:
68
# This command, run from the regular command line, will give a
69
# traceback to the user. That's not really good for a situation
70
# that can be provoked just by the interaction of their input data
71
# and locale, as some of these are. What would be better?
73
self.fail("Expected UnicodeError not raised")
75
def create_base(self):
76
fs_enc = sys.getfilesystemencoding()
77
terminal_enc = osutils.get_terminal_encoding()
78
fname = self.info['filename']
79
dir_name = self.info['directory']
80
for thing in [fname, dir_name]:
83
except UnicodeEncodeError:
84
raise TestSkipped(('Unable to represent path %r'
85
' in filesystem encoding "%s"')
88
thing.encode(terminal_enc)
89
except UnicodeEncodeError:
90
raise TestSkipped(('Unable to represent path %r'
91
' in terminal encoding "%s"'
92
' (even though it is valid in'
93
' filesystem encoding "%s")')
94
% (thing, terminal_enc, fs_enc))
96
wt = self.make_branch_and_tree('.')
97
self.build_tree_contents([('a', 'foo\n')])
101
self.build_tree_contents(
102
[('b', 'non-ascii \xFF\xFF\xFC\xFB\x00 in b\n')])
104
wt.commit(self.info['message'])
106
self.build_tree_contents([(fname, 'unicode filename\n')])
108
wt.commit(u'And a unicode file\n')
111
def test_status(self):
112
self.build_tree_contents(
113
[(self.info['filename'], 'changed something\n')])
114
txt = self.run_bzr_decode('status')
115
self.assertEqual(u'modified:\n %s\n' % (self.info['filename'],), txt)
117
txt = self.run_bzr_decode('status', encoding='ascii')
118
expected = u'modified:\n %s\n' % (
119
self.info['filename'].encode('ascii', 'replace'),)
120
self.assertEqual(expected, txt)
123
# bzr cat shouldn't change the contents
124
# using run_bzr since that doesn't decode
125
txt = self.run_bzr('cat b')[0]
126
self.assertEqual('non-ascii \xFF\xFF\xFC\xFB\x00 in b\n', txt)
128
txt = self.run_bzr(['cat', self.info['filename']])[0]
129
self.assertEqual('unicode filename\n', txt)
131
def test_cat_revision(self):
132
committer = self.info['committer']
133
txt = self.run_bzr_decode('cat-revision -r 1')
134
self.failUnless(committer in txt,
135
'failed to find %r in %r' % (committer, txt))
137
msg = self.info['message']
138
txt = self.run_bzr_decode('cat-revision -r 2')
139
self.failUnless(msg in txt, 'failed to find %r in %r' % (msg, txt))
141
def test_mkdir(self):
142
txt = self.run_bzr_decode(['mkdir', self.info['directory']])
143
self.assertEqual(u'added %s\n' % self.info['directory'], txt)
145
# The text should be garbled, but the command should succeed
146
txt = self.run_bzr_decode(['mkdir', self.info['directory'] + '2'],
148
expected = u'added %s2\n' % (self.info['directory'],)
149
expected = expected.encode('ascii', 'replace')
150
self.assertEqual(expected, txt)
152
def test_relpath(self):
153
txt = self.run_bzr_decode(['relpath', self.info['filename']])
154
self.assertEqual(self.info['filename'] + '\n', txt)
156
self.run_bzr_decode(['relpath', self.info['filename']],
157
encoding='ascii', fail=True)
159
def test_inventory(self):
160
txt = self.run_bzr_decode('inventory')
161
self.assertEqual(['a', 'b', self.info['filename']],
164
# inventory should fail if unable to encode
165
self.run_bzr_decode('inventory', encoding='ascii', fail=True)
167
# We don't really care about the ids themselves,
168
# but the command shouldn't fail
169
txt = self.run_bzr_decode('inventory --show-ids')
171
def test_revno(self):
172
# There isn't a lot to test here, since revno should always
174
self.assertEqual('3\n', self.run_bzr_decode('revno'))
175
self.assertEqual('3\n', self.run_bzr_decode('revno', encoding='ascii'))
177
def test_revision_info(self):
178
self.run_bzr_decode('revision-info -r 1')
180
# TODO: jam 20060105 If we support revisions with non-ascii characters,
181
# this should be strict and fail.
182
self.run_bzr_decode('revision-info -r 1', encoding='ascii')
185
fname1 = self.info['filename']
186
fname2 = self.info['filename'] + '2'
187
dirname = self.info['directory']
189
# fname1 already exists
190
self.run_bzr_decode(['mv', 'a', fname1], fail=True)
192
txt = self.run_bzr_decode(['mv', 'a', fname2])
193
self.assertEqual(u'a => %s\n' % fname2, txt)
194
self.failIfExists('a')
195
self.failUnlessExists(fname2)
197
# After 'mv' we need to re-open the working tree
198
self.wt = self.wt.bzrdir.open_workingtree()
199
self.wt.commit('renamed to non-ascii')
203
txt = self.run_bzr_decode(['mv', fname1, fname2, dirname])
204
self.assertEqual([u'%s => %s/%s' % (fname1, dirname, fname1),
205
u'%s => %s/%s' % (fname2, dirname, fname2)]
208
# The rename should still succeed
209
newpath = u'%s/%s' % (dirname, fname2)
210
txt = self.run_bzr_decode(['mv', newpath, 'a'], encoding='ascii')
211
self.failUnlessExists('a')
212
self.assertEqual(newpath.encode('ascii', 'replace') + ' => a\n', txt)
214
def test_branch(self):
215
# We should be able to branch into a directory that
216
# has a unicode name, even if we can't display the name
217
self.run_bzr_decode(['branch', u'.', self.info['directory']])
218
self.run_bzr_decode(['branch', u'.', self.info['directory'] + '2'],
222
# Make sure we can pull from paths that can't be encoded
223
dirname1 = self.info['directory']
224
dirname2 = self.info['directory'] + '2'
225
url1 = urlutils.local_path_to_url(dirname1)
226
url2 = urlutils.local_path_to_url(dirname2)
227
out_bzrdir = self.wt.bzrdir.sprout(url1)
228
out_bzrdir.sprout(url2)
230
self.build_tree_contents(
231
[(osutils.pathjoin(dirname1, "a"), 'different text\n')])
232
self.wt.commit('mod a')
234
txt = self.run_bzr_decode('pull', working_dir=dirname2)
236
expected = osutils.pathjoin(osutils.getcwd(), dirname1)
237
self.assertEqual(u'Using saved parent location: %s/\n'
238
'No revisions to pull.\n' % (expected,), txt)
240
self.build_tree_contents(
241
[(osutils.pathjoin(dirname1, 'a'), 'and yet more\n')])
242
self.wt.commit(u'modifying a by ' + self.info['committer'])
244
# We should be able to pull, even if our encoding is bad
245
self.run_bzr_decode('pull --verbose', encoding='ascii',
246
working_dir=dirname2)
249
# TODO: Test push to an SFTP location
250
# Make sure we can pull from paths that can't be encoded
251
# TODO: jam 20060427 For drastically improving performance, we probably
252
# could create a local repository, so it wouldn't have to copy
253
# the files around as much.
255
dirname = self.info['directory']
256
self.run_bzr_decode(['push', dirname])
258
self.build_tree_contents([('a', 'adding more text\n')])
259
self.wt.commit('added some stuff')
261
# TODO: check the output text is properly encoded
262
self.run_bzr_decode('push')
264
self.build_tree_contents(
265
[('a', 'and a bit more: \n%s\n' % (dirname.encode('utf-8'),))])
267
self.wt.commit('Added some ' + dirname)
268
self.run_bzr_decode('push --verbose', encoding='ascii')
270
self.run_bzr_decode(['push', '--verbose', dirname + '2'])
272
self.run_bzr_decode(['push', '--verbose', dirname + '3'],
275
self.run_bzr_decode(['push', '--verbose', '--create-prefix',
276
dirname + '4/' + dirname + '5'])
277
self.run_bzr_decode(['push', '--verbose', '--create-prefix',
278
dirname + '6/' + dirname + '7'], encoding='ascii')
280
def test_renames(self):
281
fname = self.info['filename'] + '2'
282
self.wt.rename_one('a', fname)
283
txt = self.run_bzr_decode('renames')
284
self.assertEqual(u'a => %s\n' % fname, txt)
286
self.run_bzr_decode('renames', fail=True, encoding='ascii')
288
def test_remove(self):
289
fname = self.info['filename']
290
txt = self.run_bzr_decode(['remove', fname], encoding='ascii')
292
def test_remove_verbose(self):
293
fname = self.info['filename']
294
txt = self.run_bzr_decode(['remove', '--verbose', fname],
297
def test_file_id(self):
298
fname = self.info['filename']
299
txt = self.run_bzr_decode(['file-id', fname])
301
# TODO: jam 20060106 We don't support non-ascii file ids yet,
302
# so there is nothing which would fail in ascii encoding
303
# This *should* be retcode=3
304
txt = self.run_bzr_decode(['file-id', fname], encoding='ascii')
306
def test_file_path(self):
307
# Create a directory structure
308
fname = self.info['filename']
309
dirname = self.info['directory']
310
self.build_tree_contents([
312
(osutils.pathjoin('base', '%s/' % (dirname,)), )])
314
self.wt.add('base/'+dirname)
315
path = osutils.pathjoin('base', dirname, fname)
316
self.wt.rename_one(fname, path)
317
self.wt.commit('moving things around')
319
txt = self.run_bzr_decode(['file-path', path])
321
# TODO: jam 20060106 We don't support non-ascii file ids yet,
322
# so there is nothing which would fail in ascii encoding
323
# This *should* be retcode=3
324
txt = self.run_bzr_decode(['file-path', path], encoding='ascii')
326
def test_revision_history(self):
327
# TODO: jam 20060106 We don't support non-ascii revision ids yet,
328
# so there is nothing which would fail in ascii encoding
329
txt = self.run_bzr_decode('revision-history')
331
def test_ancestry(self):
332
# TODO: jam 20060106 We don't support non-ascii revision ids yet,
333
# so there is nothing which would fail in ascii encoding
334
txt = self.run_bzr_decode('ancestry')
337
# TODO: jam 20060106 diff is a difficult one to test, because it
338
# shouldn't encode the file contents, but it needs some sort
339
# of encoding for the paths, etc which are displayed.
340
self.build_tree_contents([(self.info['filename'], 'newline\n')])
341
txt = self.run_bzr('diff', retcode=1)[0]
343
def test_deleted(self):
344
fname = self.info['filename']
346
self.wt.remove(fname)
348
txt = self.run_bzr_decode('deleted')
349
self.assertEqual(fname+'\n', txt)
351
txt = self.run_bzr_decode('deleted --show-ids')
352
self.failUnless(txt.startswith(fname))
354
# Deleted should fail if cannot decode
355
# Because it is giving the exact paths
356
# which might be used by a front end
357
self.run_bzr_decode('deleted', encoding='ascii', fail=True)
359
def test_modified(self):
360
fname = self.info['filename']
361
self.build_tree_contents([(fname, 'modified\n')])
363
txt = self.run_bzr_decode('modified')
364
self.assertEqual('"'+fname+'"'+'\n', txt)
366
self.run_bzr_decode('modified', encoding='ascii', fail=True)
368
def test_added(self):
369
fname = self.info['filename'] + '2'
370
self.build_tree_contents([(fname, 'added\n')])
373
txt = self.run_bzr_decode('added')
374
self.assertEqual('"'+fname+'"'+'\n', txt)
376
self.run_bzr_decode('added', encoding='ascii', fail=True)
379
dirname = self.info['directory']
380
url = urlutils.local_path_to_url(dirname)
381
self.run_bzr_decode('root')
383
self.wt.bzrdir.sprout(url)
385
txt = self.run_bzr_decode('root', working_dir=dirname)
386
self.failUnless(txt.endswith(dirname+'\n'))
388
txt = self.run_bzr_decode('root', encoding='ascii', fail=True,
392
fname = self.info['filename']
394
txt = self.run_bzr_decode('log')
395
self.assertNotEqual(-1, txt.find(self.info['committer']))
396
self.assertNotEqual(-1, txt.find(self.info['message']))
398
txt = self.run_bzr_decode('log --verbose')
399
self.assertNotEqual(-1, txt.find(fname))
401
# Make sure log doesn't fail even if we can't write out
402
txt = self.run_bzr_decode('log --verbose', encoding='ascii')
403
self.assertEqual(-1, txt.find(fname))
404
self.assertNotEqual(-1, txt.find(fname.encode('ascii', 'replace')))
406
def test_touching_revisions(self):
407
fname = self.info['filename']
408
txt = self.run_bzr_decode(['touching-revisions', fname])
409
self.assertEqual(u' 3 added %s\n' % (fname,), txt)
411
fname2 = self.info['filename'] + '2'
412
self.wt.rename_one(fname, fname2)
413
self.wt.commit(u'Renamed %s => %s' % (fname, fname2))
415
txt = self.run_bzr_decode(['touching-revisions', fname2])
416
expected_txt = (u' 3 added %s\n'
417
u' 4 renamed %s => %s\n'
418
% (fname, fname, fname2))
419
self.assertEqual(expected_txt, txt)
421
self.run_bzr_decode(['touching-revisions', fname2], encoding='ascii',
425
txt = self.run_bzr_decode('ls')
426
self.assertEqual(sorted(['a', 'b', self.info['filename']]),
427
sorted(txt.splitlines()))
428
txt = self.run_bzr_decode('ls --null')
429
self.assertEqual(sorted(['', 'a', 'b', self.info['filename']]),
430
sorted(txt.split('\0')))
432
txt = self.run_bzr_decode('ls', encoding='ascii', fail=True)
433
txt = self.run_bzr_decode('ls --null', encoding='ascii', fail=True)
435
def test_unknowns(self):
436
fname = self.info['filename'] + '2'
437
self.build_tree_contents([(fname, 'unknown\n')])
439
# TODO: jam 20060112 bzr unknowns is the only one which
440
# quotes paths do we really want it to?
441
# awilkins 20080521 added and modified do it now as well
442
txt = self.run_bzr_decode('unknowns')
443
self.assertEqual(u'"%s"\n' % (fname,), txt)
445
self.run_bzr_decode('unknowns', encoding='ascii', fail=True)
447
def test_ignore(self):
448
fname2 = self.info['filename'] + '2.txt'
449
self.build_tree_contents([(fname2, 'ignored\n')])
451
def check_unknowns(expected):
452
self.assertEqual(expected, list(self.wt.unknowns()))
454
check_unknowns([fname2])
456
self.run_bzr_decode(['ignore', './' + fname2])
459
fname3 = self.info['filename'] + '3.txt'
460
self.build_tree_contents([(fname3, 'unknown 3\n')])
461
check_unknowns([fname3])
463
# Ignore should not care what the encoding is
464
# (right now it doesn't print anything)
465
self.run_bzr_decode(['ignore', fname3], encoding='ascii')
468
# Now try a wildcard match
469
fname4 = self.info['filename'] + '4.txt'
470
self.build_tree_contents([(fname4, 'unknown 4\n')])
471
self.run_bzr_decode('ignore *.txt')
474
# and a different wildcard that matches everything
475
os.remove('.bzrignore')
476
self.run_bzr_decode(['ignore', self.info['filename'] + '*'])
479
def test_missing(self):
480
# create empty tree as reference for missing
481
self.make_branch_and_tree('empty-tree')
483
msg = self.info['message']
485
txt = self.run_bzr_decode('missing empty-tree')
486
self.assertNotEqual(-1, txt.find(self.info['committer']))
487
self.assertNotEqual(-1, txt.find(msg))
489
# Make sure missing doesn't fail even if we can't write out
490
txt = self.run_bzr_decode('missing empty-tree', encoding='ascii')
491
self.assertEqual(-1, txt.find(msg))
492
self.assertNotEqual(-1, txt.find(msg.encode('ascii', 'replace')))
495
self.run_bzr_decode(['branch', u'.', self.info['directory']])
496
self.run_bzr_decode(['info', self.info['directory']])
497
self.run_bzr_decode(['info', self.info['directory']],
500
def test_ignored(self):
501
fname = self.info['filename'] + '1.txt'
502
self.build_tree_contents([(fname, 'ignored\n')])
503
self.run_bzr(['ignore', fname])
504
txt = self.run_bzr_decode(['ignored'])
505
self.assertEqual(txt, '%-50s %s\n' % (fname, fname))
506
txt = self.run_bzr_decode(['ignored'], encoding='ascii')
507
fname = fname.encode('ascii', 'replace')
508
self.assertEqual(txt, '%-50s %s\n' % (fname, fname))