99
88
return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
103
quote_from_bytes = urlparse.quote_from_bytes
104
quote = urlparse.quote
105
unquote_to_bytes = urlparse.unquote_to_bytes
107
# Private copies of quote and unquote, copied from Python's urllib module
108
# because urllib unconditionally imports socket, which imports ssl.
110
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
111
'abcdefghijklmnopqrstuvwxyz'
114
for i, c in zip(range(256), ''.join(map(chr, range(256)))):
115
_safe_map[c] = c if (
116
i < 128 and c in always_safe) else '%{0:02X}'.format(i)
119
def quote_from_bytes(s, safe='/'):
120
"""quote('abc def') -> 'abc%20def'
122
Each part of a URL, e.g. the path info, the query, etc., has a
123
different set of reserved characters that must be quoted.
125
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
126
the following reserved characters.
128
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
131
Each of these characters is reserved in some component of a URL,
132
but not necessarily in all of them.
134
By default, the quote function is intended for quoting the path
135
section of a URL. Thus, it will not encode '/'. This character
136
is reserved, but in typical usage the quote function is being
137
called on a path where the existing slash characters are used as
143
raise TypeError('None object cannot be quoted')
145
cachekey = (safe, always_safe)
147
(quoter, safe) = _safe_quoters[cachekey]
149
safe_map = _safe_map.copy()
150
safe_map.update([(c, c) for c in safe])
151
quoter = safe_map.__getitem__
152
safe = always_safe + safe
153
_safe_quoters[cachekey] = (quoter, safe)
154
if not s.rstrip(safe):
156
return ''.join(map(quoter, s))
158
quote = quote_from_bytes
159
unquote_to_bytes = urlparse.unquote
91
quote_from_bytes = urlparse.quote_from_bytes
92
quote = urlparse.quote
93
unquote_to_bytes = urlparse.unquote_to_bytes
162
94
unquote = urlparse.unquote
97
def escape(relpath, safe='/~'):
166
98
"""Escape relpath to be a valid url."""
167
if not isinstance(relpath, str) and sys.version_info[0] == 2:
168
relpath = relpath.encode('utf-8')
169
return quote(relpath, safe='/~')
99
return quote(relpath, safe=safe)
172
102
def file_relpath(base, path):
281
211
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
282
212
def _posix_local_path_from_url(url):
283
213
"""Convert a url like file:///path/to/foo into /path/to/foo"""
284
url = split_segment_parameters_raw(url)[0]
214
url = strip_segment_parameters(url)
285
215
file_localhost_prefix = 'file://localhost/'
286
216
if url.startswith(file_localhost_prefix):
287
217
path = url[len(file_localhost_prefix) - 1:]
309
239
if not url.startswith('file://'):
310
240
raise InvalidURL(url, 'local urls must start with file:///, '
311
241
'UNC path urls must start with file://')
312
url = split_segment_parameters_raw(url)[0]
242
url = strip_segment_parameters(url)
313
243
# We strip off all 3 slashes
314
244
win32_url = url[len('file:'):]
315
245
# check for UNC path: //HOST/path
561
491
(base_url, subsegments) = split_segment_parameters_raw(url)
563
493
for subsegment in subsegments:
564
(key, value) = subsegment.split("=", 1)
495
(key, value) = subsegment.split("=", 1)
497
raise InvalidURL(url, "missing = in subsegment")
565
498
if not isinstance(key, str):
566
499
raise TypeError(key)
567
500
if not isinstance(value, str):
570
503
return (base_url, parameters)
506
def strip_segment_parameters(url):
507
"""Strip the segment parameters from a URL.
509
:param url: A relative or absolute URL
512
base_url, subsegments = split_segment_parameters_raw(url)
573
516
def join_segment_parameters_raw(base, *subsegments):
574
517
"""Create a new URL by adding subsegments to an existing one.
677
620
# try to encode the UNICODE => ASCII, and then decode
681
if isinstance(url, text_type):
684
except UnicodeError as e:
686
url, 'URL was not a plain ASCII url: %s' % (e,))
687
return urlparse.unquote(url)
689
if isinstance(url, text_type):
691
url = url.encode("ascii")
692
except UnicodeError as e:
694
url, 'URL was not a plain ASCII url: %s' % (e,))
695
unquoted = unquote(url)
623
if isinstance(url, str):
697
unicode_path = unquoted.decode('utf-8')
698
626
except UnicodeError as e:
699
627
raise InvalidURL(
700
url, 'Unable to encode the URL as utf-8: %s' % (e,))
628
url, 'URL was not a plain ASCII url: %s' % (e,))
629
return urlparse.unquote(url)
704
632
# These are characters that if escaped, should stay that way
706
634
_no_decode_ords = [ord(c) for c in _no_decode_chars]
707
635
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
708
636
+ ['%02X' % o for o in _no_decode_ords])
709
_hex_display_map = dict(([('%02x' % o, int2byte(o)) for o in range(256)]
710
+ [('%02X' % o, int2byte(o)) for o in range(256)]))
637
_hex_display_map = dict(([('%02x' % o, bytes([o])) for o in range(256)]
638
+ [('%02X' % o, bytes([o])) for o in range(256)]))
711
639
# These entries get mapped to themselves
712
640
_hex_display_map.update((hex, b'%' + hex.encode('ascii'))
713
641
for hex in _no_decode_hex)
751
679
escaped_chunks[j] = _hex_display_map[item[:2]]
753
681
# Put back the percent symbol
754
escaped_chunks[j] = b'%' + \
755
(item[:2].encode('utf-8') if PY3 else item[:2])
682
escaped_chunks[j] = b'%' + (item[:2].encode('utf-8'))
756
683
except UnicodeDecodeError:
757
escaped_chunks[j] = unichr(int(item[:2], 16)).encode('utf-8')
758
escaped_chunks[j] += (item[2:].encode('utf-8') if PY3 else item[2:])
684
escaped_chunks[j] = chr(int(item[:2], 16)).encode('utf-8')
685
escaped_chunks[j] += (item[2:].encode('utf-8'))
759
686
unescaped = b''.join(escaped_chunks)
761
688
decoded = unescaped.decode('utf-8')
818
745
is used without a path, e.g. c:foo-bar => foo-bar.
819
746
If no /, path separator or : is found, the from_location is returned.
821
from_location, unused_params = split_segment_parameters(from_location)
748
from_location = strip_segment_parameters(from_location)
822
749
if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0:
823
750
return os.path.basename(from_location.rstrip("/\\"))