19
19
"""A collection of function for handling URL operations."""
22
from posixpath import split as _posix_split, normpath as _posix_normpath
25
from bzrlib.lazy_import import lazy_import
26
lazy_import(globals(), """
27
from posixpath import split as _posix_split, normpath as _posix_normpath
27
import bzrlib.errors as errors
37
31
def basename(url, exclude_trailing_slash=True):
117
111
join('http://foo', 'bar') => 'http://foo/bar'
118
112
join('http://foo', 'bar', '../baz') => 'http://foo/baz'
122
match = _url_scheme_re.match(base)
114
m = _url_scheme_re.match(base)
125
scheme = match.group('scheme')
126
path = match.group('path').split('/')
117
scheme = m.group('scheme')
118
path = m.group('path').split('/')
127
119
if path[-1:] == ['']:
128
120
# Strip off a trailing slash
129
121
# This helps both when we are at the root, and when
133
125
path = base.split('/')
135
if scheme is not None and len(path) >= 1:
137
# the path should be represented as an abs path.
138
# we know this must be absolute because of the presence of a URL scheme.
140
path = [''] + path[1:]
142
# create an empty host, but dont alter the path - this might be a
143
# relative url fragment.
148
match = _url_scheme_re.match(arg)
128
m = _url_scheme_re.match(arg)
151
scheme = match.group('scheme')
131
scheme = m.group('scheme')
152
132
# this skips .. normalisation, making http://host/../../..
153
133
# be rather strange.
154
path = match.group('path').split('/')
155
# set the host and path according to new absolute URL, discarding
156
# any previous values.
157
# XXX: duplicates mess from earlier in this function. This URL
158
# manipulation code needs some cleaning up.
159
if scheme is not None and len(path) >= 1:
162
# url scheme implies absolute path.
165
# no url scheme we take the path as is.
134
path = m.group('path').split('/')
168
path = '/'.join(path)
169
path = joinpath(path, arg)
170
path = path.split('/')
171
if remove_root and path[0:1] == ['']:
174
# Remove the leading slash from the path, so long as it isn't also the
175
# trailing slash, which we want to keep if present.
176
if path and path[0] == '' and len(path) > 1:
136
for chunk in arg.split('/'):
141
# Don't pop off the host portion
144
raise errors.InvalidURLJoin('Cannot go above root',
180
149
if scheme is None:
181
150
return '/'.join(path)
182
151
return scheme + '://' + '/'.join(path)
185
def joinpath(base, *args):
186
"""Join URL path segments to a URL path segment.
188
This is somewhat like osutils.joinpath, but intended for URLs.
190
XXX: this duplicates some normalisation logic, and also duplicates a lot of
191
path handling logic that already exists in some Transport implementations.
192
We really should try to have exactly one place in the code base responsible
193
for combining paths of URLs.
195
path = base.split('/')
196
if len(path) > 1 and path[-1] == '':
197
#If the path ends in a trailing /, remove it.
200
if arg.startswith('/'):
202
for chunk in arg.split('/'):
207
raise errors.InvalidURLJoin('Cannot go above root',
215
return '/'.join(path)
218
154
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
219
155
def _posix_local_path_from_url(url):
220
156
"""Convert a url like file:///path/to/foo into /path/to/foo"""
232
168
# importing directly from posixpath allows us to test this
233
169
# on non-posix platforms
234
170
return 'file://' + escape(_posix_normpath(
235
osutils._posix_abspath(path)))
171
bzrlib.osutils._posix_abspath(path)))
238
174
def _win32_local_path_from_url(url):
239
175
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
240
if not url.startswith('file://'):
241
raise errors.InvalidURL(url, 'local urls must start with file:///, '
242
'UNC path urls must start with file://')
176
if not url.startswith('file:///'):
177
raise errors.InvalidURL(url, 'local urls must start with file:///')
243
178
# We strip off all 3 slashes
244
win32_url = url[len('file:'):]
245
# check for UNC path: //HOST/path
246
if not win32_url.startswith('///'):
247
if (win32_url[2] == '/'
248
or win32_url[3] in '|:'):
249
raise errors.InvalidURL(url, 'Win32 UNC path urls'
250
' have form file://HOST/path')
251
return unescape(win32_url)
252
# usual local path with drive letter
253
if (win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
179
win32_url = url[len('file:///'):]
180
if (win32_url[0] not in ('abcdefghijklmnopqrstuvwxyz'
254
181
'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
255
or win32_url[4] not in '|:'
256
or win32_url[5] != '/'):
182
or win32_url[1] not in '|:'
183
or win32_url[2] != '/'):
257
184
raise errors.InvalidURL(url, 'Win32 file urls start with'
258
185
' file:///x:/, where x is a valid drive letter')
259
return win32_url[3].upper() + u':' + unescape(win32_url[5:])
186
return win32_url[0].upper() + u':' + unescape(win32_url[2:])
262
189
def _win32_local_path_to_url(path):
270
197
# which actually strips trailing space characters.
271
198
# The worst part is that under linux ntpath.abspath has different
272
199
# semantics, since 'nt' is not an available module.
273
win32_path = osutils._win32_abspath(path)
274
# check for UNC path \\HOST\path
275
if win32_path.startswith('//'):
276
return 'file:' + escape(win32_path)
200
win32_path = bzrlib.osutils._nt_normpath(
201
bzrlib.osutils._win32_abspath(path)).replace('\\', '/')
277
202
return 'file:///' + win32_path[0].upper() + ':' + escape(win32_path[2:])
328
253
if path[i] not in _url_safe_characters:
329
254
chars = path[i].encode('utf-8')
330
255
path[i] = ''.join(['%%%02X' % ord(c) for c in path[i].encode('utf-8')])
331
return str(scheme + '://' + ''.join(path))
256
return scheme + '://' + ''.join(path)
334
259
def relative_url(base, other):
515
440
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
517
442
# These characters should not be escaped
518
_url_safe_characters = set(
519
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
520
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
521
"0123456789" # Numbers
522
"_.-!~*'()" # Unreserved characters
523
"/;?:@&=+$," # Reserved characters
524
"%#" # Extra reserved characters
443
_url_safe_characters = set('abcdefghijklmnopqrstuvwxyz'
444
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
527
449
def unescape_for_display(url, encoding):
528
450
"""Decode what you can for a URL, so that we get a nice looking path.