41
43
gen_file_id = generate_ids.gen_file_id
43
45
# We try to use the filename if possible
44
self.assertStartsWith(gen_file_id('bar'), 'bar-')
46
self.assertStartsWith(gen_file_id('bar'), b'bar-')
46
48
# but we squash capitalization, and remove non word characters
47
self.assertStartsWith(gen_file_id('Mwoo oof\t m'), 'mwoooofm-')
49
self.assertStartsWith(gen_file_id('Mwoo oof\t m'), b'mwoooofm-')
49
51
# We also remove leading '.' characters to prevent hidden file-ids
50
self.assertStartsWith(gen_file_id('..gam.py'), 'gam.py-')
51
self.assertStartsWith(gen_file_id('..Mwoo oof\t m'), 'mwoooofm-')
52
self.assertStartsWith(gen_file_id('..gam.py'), b'gam.py-')
53
self.assertStartsWith(gen_file_id('..Mwoo oof\t m'), b'mwoooofm-')
53
55
# we remove unicode characters, and still don't end up with a
55
self.assertStartsWith(gen_file_id(u'\xe5\xb5.txt'), 'txt-')
57
self.assertStartsWith(gen_file_id(u'\xe5\xb5.txt'), b'txt-')
57
59
# Our current method of generating unique ids adds 33 characters
58
60
# plus an serial number (log10(N) characters)
60
62
# be <= 20 characters, so the maximum length should now be approx < 60
62
64
# Test both case squashing and length restriction
63
fid = gen_file_id('A'*50 + '.txt')
64
self.assertStartsWith(fid, 'a'*20 + '-')
65
fid = gen_file_id('A' * 50 + '.txt')
66
self.assertStartsWith(fid, b'a' * 20 + b'-')
65
67
self.assertTrue(len(fid) < 60)
67
69
# restricting length happens after the other actions, so
68
70
# we preserve as much as possible
69
71
fid = gen_file_id('\xe5\xb5..aBcd\tefGhijKLMnop\tqrstuvwxyz')
70
self.assertStartsWith(fid, 'abcdefghijklmnopqrst-')
72
self.assertStartsWith(fid, b'abcdefghijklmnopqrst-')
71
73
self.assertTrue(len(fid) < 60)
73
75
def test_file_ids_are_ascii(self):
74
tail = r'-\d{14}-[a-z0-9]{16}-\d+'
75
self.assertGenFileId('foo' + tail, 'foo')
76
self.assertGenFileId('foo' + tail, u'foo')
77
self.assertGenFileId('bar' + tail, u'bar')
78
self.assertGenFileId('br' + tail, u'b\xe5r')
76
tail = br'-\d{14}-[a-z0-9]{16}-\d+'
77
self.assertGenFileId(b'foo' + tail, 'foo')
78
self.assertGenFileId(b'foo' + tail, u'foo')
79
self.assertGenFileId(b'bar' + tail, u'bar')
80
self.assertGenFileId(b'br' + tail, u'b\xe5r')
80
82
def test__next_id_suffix_sets_suffix(self):
81
83
generate_ids._gen_file_id_suffix = None
83
85
self.assertNotEqual(None, generate_ids._gen_file_id_suffix)
85
87
def test__next_id_suffix_increments(self):
86
generate_ids._gen_file_id_suffix = "foo-"
88
generate_ids._gen_file_id_suffix = b"foo-"
87
89
generate_ids._gen_file_id_serial = 1
89
self.assertEqual("foo-2", generate_ids._next_id_suffix())
90
self.assertEqual("foo-3", generate_ids._next_id_suffix())
91
self.assertEqual("foo-4", generate_ids._next_id_suffix())
92
self.assertEqual("foo-5", generate_ids._next_id_suffix())
93
self.assertEqual("foo-6", generate_ids._next_id_suffix())
94
self.assertEqual("foo-7", generate_ids._next_id_suffix())
95
self.assertEqual("foo-8", generate_ids._next_id_suffix())
96
self.assertEqual("foo-9", generate_ids._next_id_suffix())
97
self.assertEqual("foo-10", generate_ids._next_id_suffix())
91
self.assertEqual(b"foo-2", generate_ids._next_id_suffix())
92
self.assertEqual(b"foo-3", generate_ids._next_id_suffix())
93
self.assertEqual(b"foo-4", generate_ids._next_id_suffix())
94
self.assertEqual(b"foo-5", generate_ids._next_id_suffix())
95
self.assertEqual(b"foo-6", generate_ids._next_id_suffix())
96
self.assertEqual(b"foo-7", generate_ids._next_id_suffix())
97
self.assertEqual(b"foo-8", generate_ids._next_id_suffix())
98
self.assertEqual(b"foo-9", generate_ids._next_id_suffix())
99
self.assertEqual(b"foo-10", generate_ids._next_id_suffix())
99
101
# Reset so that all future ids generated in the test suite
100
102
# don't end in 'foo-XXX'
113
115
def assertGenRevisionId(self, regex, username, timestamp=None):
114
116
"""gen_revision_id should create a revision id matching the regex"""
115
117
revision_id = generate_ids.gen_revision_id(username, timestamp)
116
self.assertContainsRe(revision_id, '^'+regex+'$')
118
self.assertContainsRe(revision_id, b'^' + regex + b'$')
117
119
# It should be a utf8 revision_id, not a unicode one
118
self.assertIsInstance(revision_id, str)
120
self.assertIsInstance(revision_id, bytes)
119
121
# gen_revision_id should always return ascii revision ids.
120
122
revision_id.decode('ascii')
122
124
def test_timestamp(self):
123
125
"""passing a timestamp should cause it to be used"""
124
self.assertGenRevisionId(r'user@host-\d{14}-[a-z0-9]{16}', 'user@host')
125
self.assertGenRevisionId('user@host-20061102205056-[a-z0-9]{16}',
126
self.assertGenRevisionId(
127
br'user@host-\d{14}-[a-z0-9]{16}', 'user@host')
128
self.assertGenRevisionId(b'user@host-20061102205056-[a-z0-9]{16}',
126
129
'user@host', 1162500656.688)
127
self.assertGenRevisionId(r'user@host-20061102205024-[a-z0-9]{16}',
130
self.assertGenRevisionId(br'user@host-20061102205024-[a-z0-9]{16}',
128
131
'user@host', 1162500624.000)
130
133
def test_gen_revision_id_email(self):
131
134
"""gen_revision_id uses email address if present"""
132
regex = r'user\+joe_bar@foo-bar\.com-\d{14}-[a-z0-9]{16}'
135
regex = br'user\+joe_bar@foo-bar\.com-\d{14}-[a-z0-9]{16}'
133
136
self.assertGenRevisionId(regex, 'user+joe_bar@foo-bar.com')
134
137
self.assertGenRevisionId(regex, '<user+joe_bar@foo-bar.com>')
135
138
self.assertGenRevisionId(regex, 'Joe Bar <user+joe_bar@foo-bar.com>')
136
139
self.assertGenRevisionId(regex, 'Joe Bar <user+Joe_Bar@Foo-Bar.com>')
137
self.assertGenRevisionId(regex, u'Joe B\xe5r <user+Joe_Bar@Foo-Bar.com>')
140
self.assertGenRevisionId(
141
regex, u'Joe B\xe5r <user+Joe_Bar@Foo-Bar.com>')
139
143
def test_gen_revision_id_user(self):
140
144
"""If there is no email, fall back to the whole username"""
141
tail = r'-\d{14}-[a-z0-9]{16}'
142
self.assertGenRevisionId('joe_bar' + tail, 'Joe Bar')
143
self.assertGenRevisionId('joebar' + tail, 'joebar')
144
self.assertGenRevisionId('joe_br' + tail, u'Joe B\xe5r')
145
self.assertGenRevisionId(r'joe_br_user\+joe_bar_foo-bar.com' + tail,
145
tail = br'-\d{14}-[a-z0-9]{16}'
146
self.assertGenRevisionId(b'joe_bar' + tail, 'Joe Bar')
147
self.assertGenRevisionId(b'joebar' + tail, 'joebar')
148
self.assertGenRevisionId(b'joe_br' + tail, u'Joe B\xe5r')
149
self.assertGenRevisionId(br'joe_br_user\+joe_bar_foo-bar.com' + tail,
146
150
u'Joe B\xe5r <user+Joe_Bar_Foo-Bar.com>')
148
152
def test_revision_ids_are_ascii(self):
149
153
"""gen_revision_id should always return an ascii revision id."""
150
tail = r'-\d{14}-[a-z0-9]{16}'
151
self.assertGenRevisionId('joe_bar' + tail, 'Joe Bar')
152
self.assertGenRevisionId('joe_bar' + tail, u'Joe Bar')
153
self.assertGenRevisionId('joe@foo' + tail, u'Joe Bar <joe@foo>')
154
tail = br'-\d{14}-[a-z0-9]{16}'
155
self.assertGenRevisionId(b'joe_bar' + tail, 'Joe Bar')
156
self.assertGenRevisionId(b'joe_bar' + tail, u'Joe Bar')
157
self.assertGenRevisionId(b'joe@foo' + tail, u'Joe Bar <joe@foo>')
154
158
# We cheat a little with this one, because email-addresses shouldn't
155
159
# contain non-ascii characters, but generate_ids should strip them
157
self.assertGenRevisionId('joe@f' + tail, u'Joe Bar <joe@f\xb6>')
161
self.assertGenRevisionId(b'joe@f' + tail, u'Joe Bar <joe@f\xb6>')