99
96
return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
103
quote_from_bytes = urlparse.quote_from_bytes
104
quote = urlparse.quote
105
unquote_to_bytes = urlparse.unquote_to_bytes
107
# Private copies of quote and unquote, copied from Python's urllib module
108
# because urllib unconditionally imports socket, which imports ssl.
110
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
111
'abcdefghijklmnopqrstuvwxyz'
114
for i, c in zip(range(256), ''.join(map(chr, range(256)))):
115
_safe_map[c] = c if (
116
i < 128 and c in always_safe) else '%{0:02X}'.format(i)
119
def quote_from_bytes(s, safe='/'):
120
"""quote('abc def') -> 'abc%20def'
122
Each part of a URL, e.g. the path info, the query, etc., has a
123
different set of reserved characters that must be quoted.
125
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
126
the following reserved characters.
128
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
131
Each of these characters is reserved in some component of a URL,
132
but not necessarily in all of them.
134
By default, the quote function is intended for quoting the path
135
section of a URL. Thus, it will not encode '/'. This character
136
is reserved, but in typical usage the quote function is being
137
called on a path where the existing slash characters are used as
143
raise TypeError('None object cannot be quoted')
145
cachekey = (safe, always_safe)
147
(quoter, safe) = _safe_quoters[cachekey]
149
safe_map = _safe_map.copy()
150
safe_map.update([(c, c) for c in safe])
151
quoter = safe_map.__getitem__
152
safe = always_safe + safe
153
_safe_quoters[cachekey] = (quoter, safe)
154
if not s.rstrip(safe):
156
return ''.join(map(quoter, s))
158
quote = quote_from_bytes
159
unquote_to_bytes = urlparse.unquote
99
# Private copies of quote and unquote, copied from Python's
100
# urllib module because urllib unconditionally imports socket, which imports
103
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
104
'abcdefghijklmnopqrstuvwxyz'
107
for i, c in zip(range(256), ''.join(map(chr, range(256)))):
108
_safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)
112
def quote(s, safe='/'):
113
"""quote('abc def') -> 'abc%20def'
115
Each part of a URL, e.g. the path info, the query, etc., has a
116
different set of reserved characters that must be quoted.
118
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
119
the following reserved characters.
121
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
124
Each of these characters is reserved in some component of a URL,
125
but not necessarily in all of them.
127
By default, the quote function is intended for quoting the path
128
section of a URL. Thus, it will not encode '/'. This character
129
is reserved, but in typical usage the quote function is being
130
called on a path where the existing slash characters are used as
136
raise TypeError('None object cannot be quoted')
138
cachekey = (safe, always_safe)
140
(quoter, safe) = _safe_quoters[cachekey]
142
safe_map = _safe_map.copy()
143
safe_map.update([(c, c) for c in safe])
144
quoter = safe_map.__getitem__
145
safe = always_safe + safe
146
_safe_quoters[cachekey] = (quoter, safe)
147
if not s.rstrip(safe):
149
return ''.join(map(quoter, s))
162
152
unquote = urlparse.unquote
165
def escape(relpath, safe='/~'):
166
156
"""Escape relpath to be a valid url."""
167
157
if not isinstance(relpath, str) and sys.version_info[0] == 2:
168
# GZ 2019-06-16: Should use _fs_enc instead here really?
169
158
relpath = relpath.encode('utf-8')
170
return quote(relpath, safe=safe)
159
return quote(relpath, safe='/~')
173
162
def file_relpath(base, path):
309
298
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
310
299
if not url.startswith('file://'):
311
300
raise InvalidURL(url, 'local urls must start with file:///, '
312
'UNC path urls must start with file://')
313
url = strip_segment_parameters(url)
301
'UNC path urls must start with file://')
302
url = split_segment_parameters_raw(url)[0]
314
303
# We strip off all 3 slashes
315
304
win32_url = url[len('file:'):]
316
305
# check for UNC path: //HOST/path
317
306
if not win32_url.startswith('///'):
318
307
if (win32_url[2] == '/'
319
or win32_url[3] in '|:'):
308
or win32_url[3] in '|:'):
320
309
raise InvalidURL(url, 'Win32 UNC path urls'
321
' have form file://HOST/path')
310
' have form file://HOST/path')
322
311
return unescape(win32_url)
324
313
# allow empty paths so we can serve all roots
544
534
# GZ 2011-11-18: Dodgy removing the terminal slash like this, function
545
535
# operates on urls not url+segments, and Transport classes
546
# should not be blindly adding slashes in the first place.
536
# should not be blindly adding slashes in the first place.
547
537
lurl = strip_trailing_slash(url)
548
538
# Segments begin at first comma after last forward slash, if one exists
549
segment_start = lurl.find(",", lurl.rfind("/") + 1)
539
segment_start = lurl.find(",", lurl.rfind("/")+1)
550
540
if segment_start == -1:
552
return (lurl[:segment_start],
553
[str(s) for s in lurl[segment_start + 1:].split(",")])
542
return (lurl[:segment_start], [str(s) for s in lurl[segment_start+1:].split(",")])
556
545
def split_segment_parameters(url):
690
666
# plain ASCII strings, or the final .decode will
691
667
# try to encode the UNICODE => ASCII, and then decode
669
if isinstance(url, text_type):
671
url = url.encode("ascii")
672
except UnicodeError as e:
673
raise InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
695
if isinstance(url, text_type):
698
except UnicodeError as e:
700
url, 'URL was not a plain ASCII url: %s' % (e,))
701
return urlparse.unquote(url)
675
unquoted = urlparse.unquote_to_bytes(url)
703
if isinstance(url, text_type):
705
url = url.encode("ascii")
706
except UnicodeError as e:
708
url, 'URL was not a plain ASCII url: %s' % (e,))
709
677
unquoted = unquote(url)
711
unicode_path = unquoted.decode('utf-8')
712
except UnicodeError as e:
714
url, 'Unable to encode the URL as utf-8: %s' % (e,))
679
unicode_path = unquoted.decode('utf-8')
680
except UnicodeError as e:
681
raise InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
718
685
# These are characters that if escaped, should stay that way
719
686
_no_decode_chars = ';/?:@&=+$,#'
720
687
_no_decode_ords = [ord(c) for c in _no_decode_chars]
721
688
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
722
+ ['%02X' % o for o in _no_decode_ords])
723
_hex_display_map = dict(([('%02x' % o, int2byte(o)) for o in range(256)]
724
+ [('%02X' % o, int2byte(o)) for o in range(256)]))
725
# These entries get mapped to themselves
726
_hex_display_map.update((hex, b'%' + hex.encode('ascii'))
727
for hex in _no_decode_hex)
689
+ ['%02X' % o for o in _no_decode_ords])
690
_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]
691
+ [('%02X' % o, chr(o)) for o in range(256)]))
692
#These entries get mapped to themselves
693
_hex_display_map.update((hex, '%'+hex) for hex in _no_decode_hex)
729
695
# These characters shouldn't be percent-encoded, and it's always safe to
730
696
# unencode them if they are.
731
697
_url_dont_escape_characters = set(
732
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
733
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
734
"0123456789" # Numbers
735
"-._~" # Unreserved characters
698
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
699
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
700
"0123456789" # Numbers
701
"-._~" # Unreserved characters
738
704
# These characters should not be escaped
739
705
_url_safe_characters = set(
740
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
741
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
742
"0123456789" # Numbers
743
"_.-!~*'()" # Unreserved characters
744
"/;?:@&=+$," # Reserved characters
745
"%#" # Extra reserved characters
706
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
707
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
708
"0123456789" # Numbers
709
"_.-!~*'()" # Unreserved characters
710
"/;?:@&=+$," # Reserved characters
711
"%#" # Extra reserved characters
749
def _unescape_segment_for_display(segment, encoding):
750
"""Unescape a segment for display.
752
Helper for unescape_for_display
754
:param url: A 7-bit ASCII URL
755
:param encoding: The final output encoding
757
:return: A unicode string which can be safely encoded into the
760
escaped_chunks = segment.split('%')
761
escaped_chunks[0] = escaped_chunks[0].encode('utf-8')
762
for j in range(1, len(escaped_chunks)):
763
item = escaped_chunks[j]
765
escaped_chunks[j] = _hex_display_map[item[:2]]
767
# Put back the percent symbol
768
escaped_chunks[j] = b'%' + \
769
(item[:2].encode('utf-8') if PY3 else item[:2])
770
except UnicodeDecodeError:
771
escaped_chunks[j] = unichr(int(item[:2], 16)).encode('utf-8')
772
escaped_chunks[j] += (item[2:].encode('utf-8') if PY3 else item[2:])
773
unescaped = b''.join(escaped_chunks)
775
decoded = unescaped.decode('utf-8')
776
except UnicodeDecodeError:
777
# If this path segment cannot be properly utf-8 decoded
778
# after doing unescaping we will just leave it alone
782
decoded.encode(encoding)
783
except UnicodeEncodeError:
784
# If this chunk cannot be encoded in the local
785
# encoding, then we should leave it alone
788
# Otherwise take the url decoded one
792
714
def unescape_for_display(url, encoding):
793
715
"""Decode what you can for a URL, so that we get a nice looking path.
817
739
# Split into sections to try to decode utf-8
818
740
res = url.split('/')
819
741
for i in range(1, len(res)):
820
res[i] = _unescape_segment_for_display(res[i], encoding)
742
escaped_chunks = res[i].split('%')
743
for j in range(1, len(escaped_chunks)):
744
item = escaped_chunks[j]
746
escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]
748
# Put back the percent symbol
749
escaped_chunks[j] = '%' + item
750
except UnicodeDecodeError:
751
escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]
752
unescaped = ''.join(escaped_chunks)
753
if sys.version_info[0] == 2:
755
decoded = unescaped.decode('utf-8')
756
except UnicodeDecodeError:
757
# If this path segment cannot be properly utf-8 decoded
758
# after doing unescaping we will just leave it alone
762
decoded.encode(encoding)
763
except UnicodeEncodeError:
764
# If this chunk cannot be encoded in the local
765
# encoding, then we should leave it alone
768
# Otherwise take the url decoded one
821
772
return u'/'.join(res)