33
31
The file id should be ascii, and should be an 8-bit string
35
33
file_id = generate_ids.gen_file_id(filename)
36
self.assertContainsRe(file_id, b'^' + regex + b'$')
34
self.assertContainsRe(file_id, '^'+regex+'$')
37
35
# It should be a utf8 file_id, not a unicode one
38
self.assertIsInstance(file_id, bytes)
36
self.assertIsInstance(file_id, str)
39
37
# gen_file_id should always return ascii file ids.
40
38
file_id.decode('ascii')
43
41
gen_file_id = generate_ids.gen_file_id
45
43
# We try to use the filename if possible
46
self.assertStartsWith(gen_file_id('bar'), b'bar-')
44
self.assertStartsWith(gen_file_id(b'bar'), b'bar-')
48
46
# but we squash capitalization, and remove non word characters
49
self.assertStartsWith(gen_file_id('Mwoo oof\t m'), b'mwoooofm-')
47
self.assertStartsWith(gen_file_id(b'Mwoo oof\t m'), b'mwoooofm-')
51
49
# We also remove leading '.' characters to prevent hidden file-ids
52
self.assertStartsWith(gen_file_id('..gam.py'), b'gam.py-')
53
self.assertStartsWith(gen_file_id('..Mwoo oof\t m'), b'mwoooofm-')
50
self.assertStartsWith(gen_file_id(b'..gam.py'), b'gam.py-')
51
self.assertStartsWith(gen_file_id(b'..Mwoo oof\t m'), b'mwoooofm-')
55
53
# we remove unicode characters, and still don't end up with a
62
60
# be <= 20 characters, so the maximum length should now be approx < 60
64
62
# Test both case squashing and length restriction
65
fid = gen_file_id('A' * 50 + '.txt')
66
self.assertStartsWith(fid, b'a' * 20 + b'-')
63
fid = gen_file_id('A'*50 + '.txt')
64
self.assertStartsWith(fid, 'a'*20 + '-')
67
65
self.assertTrue(len(fid) < 60)
69
67
# restricting length happens after the other actions, so
70
68
# we preserve as much as possible
71
69
fid = gen_file_id('\xe5\xb5..aBcd\tefGhijKLMnop\tqrstuvwxyz')
72
self.assertStartsWith(fid, b'abcdefghijklmnopqrst-')
70
self.assertStartsWith(fid, 'abcdefghijklmnopqrst-')
73
71
self.assertTrue(len(fid) < 60)
75
73
def test_file_ids_are_ascii(self):
76
tail = br'-\d{14}-[a-z0-9]{16}-\d+'
77
self.assertGenFileId(b'foo' + tail, 'foo')
78
self.assertGenFileId(b'foo' + tail, u'foo')
79
self.assertGenFileId(b'bar' + tail, u'bar')
80
self.assertGenFileId(b'br' + tail, u'b\xe5r')
74
tail = r'-\d{14}-[a-z0-9]{16}-\d+'
75
self.assertGenFileId('foo' + tail, 'foo')
76
self.assertGenFileId('foo' + tail, u'foo')
77
self.assertGenFileId('bar' + tail, u'bar')
78
self.assertGenFileId('br' + tail, u'b\xe5r')
82
80
def test__next_id_suffix_sets_suffix(self):
83
81
generate_ids._gen_file_id_suffix = None
115
113
def assertGenRevisionId(self, regex, username, timestamp=None):
116
114
"""gen_revision_id should create a revision id matching the regex"""
117
115
revision_id = generate_ids.gen_revision_id(username, timestamp)
118
self.assertContainsRe(revision_id, b'^' + regex + b'$')
116
self.assertContainsRe(revision_id, b'^'+regex+b'$')
119
117
# It should be a utf8 revision_id, not a unicode one
120
118
self.assertIsInstance(revision_id, bytes)
121
119
# gen_revision_id should always return ascii revision ids.
124
122
def test_timestamp(self):
125
123
"""passing a timestamp should cause it to be used"""
126
self.assertGenRevisionId(
127
br'user@host-\d{14}-[a-z0-9]{16}', 'user@host')
124
self.assertGenRevisionId(br'user@host-\d{14}-[a-z0-9]{16}', 'user@host')
128
125
self.assertGenRevisionId(b'user@host-20061102205056-[a-z0-9]{16}',
129
126
'user@host', 1162500656.688)
130
127
self.assertGenRevisionId(br'user@host-20061102205024-[a-z0-9]{16}',
137
134
self.assertGenRevisionId(regex, '<user+joe_bar@foo-bar.com>')
138
135
self.assertGenRevisionId(regex, 'Joe Bar <user+joe_bar@foo-bar.com>')
139
136
self.assertGenRevisionId(regex, 'Joe Bar <user+Joe_Bar@Foo-Bar.com>')
140
self.assertGenRevisionId(
141
regex, u'Joe B\xe5r <user+Joe_Bar@Foo-Bar.com>')
137
self.assertGenRevisionId(regex, u'Joe B\xe5r <user+Joe_Bar@Foo-Bar.com>')
143
139
def test_gen_revision_id_user(self):
144
140
"""If there is no email, fall back to the whole username"""