67
68
def __init__(self, from_, to):
70
errors.PathError.__init__(self, from_, 'URLs differ by more than path.')
71
errors.PathError.__init__(
72
self, from_, 'URLs differ by more than path.')
73
75
def basename(url, exclude_trailing_slash=True):
102
104
quote = urlparse.quote
103
105
unquote_to_bytes = urlparse.unquote_to_bytes
105
# Private copies of quote and unquote, copied from Python's
106
# urllib module because urllib unconditionally imports socket, which imports
107
# Private copies of quote and unquote, copied from Python's urllib module
108
# because urllib unconditionally imports socket, which imports ssl.
109
110
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
110
111
'abcdefghijklmnopqrstuvwxyz'
111
112
'0123456789' '_.-')
113
114
for i, c in zip(range(256), ''.join(map(chr, range(256)))):
114
_safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)
115
_safe_map[c] = c if (
116
i < 128 and c in always_safe) else '%{0:02X}'.format(i)
115
117
_safe_quoters = {}
117
119
def quote_from_bytes(s, safe='/'):
175
177
if len(base) < MIN_ABS_FILEURL_LENGTH:
176
178
raise ValueError('Length of base (%r) must equal or'
177
' exceed the platform minimum url length (which is %d)' %
178
(base, MIN_ABS_FILEURL_LENGTH))
179
' exceed the platform minimum url length (which is %d)' %
180
(base, MIN_ABS_FILEURL_LENGTH))
179
181
base = osutils.normpath(local_path_from_url(base))
180
182
path = osutils.normpath(local_path_from_url(path))
181
183
return escape(osutils.relpath(base, path))
199
201
first_path_slash = path.find('/')
200
202
if first_path_slash == -1:
201
203
return len(scheme), None
202
return len(scheme), first_path_slash+m.start('path')
204
return len(scheme), first_path_slash + m.start('path')
306
308
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
307
309
if not url.startswith('file://'):
308
310
raise InvalidURL(url, 'local urls must start with file:///, '
309
'UNC path urls must start with file://')
311
'UNC path urls must start with file://')
310
312
url = split_segment_parameters_raw(url)[0]
311
313
# We strip off all 3 slashes
312
314
win32_url = url[len('file:'):]
313
315
# check for UNC path: //HOST/path
314
316
if not win32_url.startswith('///'):
315
317
if (win32_url[2] == '/'
316
or win32_url[3] in '|:'):
318
or win32_url[3] in '|:'):
317
319
raise InvalidURL(url, 'Win32 UNC path urls'
318
' have form file://HOST/path')
320
' have form file://HOST/path')
319
321
return unescape(win32_url)
321
323
# allow empty paths so we can serve all roots
325
327
# usual local path with drive letter
326
328
if (len(win32_url) < 6
327
329
or win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
328
'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
329
or win32_url[4] not in '|:'
330
or win32_url[5] != '/'):
330
'ABCDEFGHIJKLMNOPQRSTUVWXYZ') or
331
win32_url[4] not in '|:'
332
or win32_url[5] != '/'):
331
333
raise InvalidURL(url, 'Win32 file urls start with'
332
' file:///x:/, where x is a valid drive letter')
334
' file:///x:/, where x is a valid drive letter')
333
335
return win32_url[3].upper() + u':' + unescape(win32_url[5:])
413
415
if c not in _url_safe_characters:
414
416
raise InvalidURL(url, 'URLs can only contain specific'
415
' safe characters (not %r)' % c)
417
' safe characters (not %r)' % c)
416
418
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
417
419
return str(prefix + ''.join(path))
422
424
for i in range(len(path_chars)):
423
425
if path_chars[i] not in _url_safe_characters:
424
chars = path_chars[i].encode('utf-8')
425
426
path_chars[i] = ''.join(
426
427
['%%%02X' % c for c in bytearray(path_chars[i].encode('utf-8'))])
427
428
path = ''.join(path_chars)
449
450
if base_scheme != other_scheme:
451
452
elif sys.platform == 'win32' and base_scheme == 'file://':
452
base_drive = base[base_first_slash+1:base_first_slash+3]
453
other_drive = other[other_first_slash+1:other_first_slash+3]
453
base_drive = base[base_first_slash + 1:base_first_slash + 3]
454
other_drive = other[other_first_slash + 1:other_first_slash + 3]
454
455
if base_drive != other_drive:
457
base_path = base[base_first_slash+1:]
458
other_path = other[other_first_slash+1:]
458
base_path = base[base_first_slash + 1:]
459
other_path = other[other_first_slash + 1:]
460
461
if base_path.endswith('/'):
461
462
base_path = base_path[:-1]
487
488
# path is currently /C:/foo
488
489
if len(path) < 4 or path[2] not in ':|' or path[3] != '/':
489
490
raise InvalidURL(url_base + path,
490
'win32 file:/// paths need a drive letter')
491
url_base += path[0:3] # file:// + /C:
492
path = path[3:] # /foo
491
'win32 file:/// paths need a drive letter')
492
url_base += path[0:3] # file:// + /C:
493
path = path[3:] # /foo
493
494
return url_base, path
500
501
:param exclude_trailing_slash: Strip off a final '/' if it is part
501
502
of the path (but not if it is part of the protocol specification)
503
:return: (parent_url, child_dir). child_dir may be the empty string if we're at
504
:return: (parent_url, child_dir). child_dir may be the empty string if
506
507
scheme_loc, first_path_slash = _find_scheme_and_separator(url)
519
520
# We have a fully defined path
520
url_base = url[:first_path_slash] # http://host, file://
521
path = url[first_path_slash:] # /file/foo
521
url_base = url[:first_path_slash] # http://host, file://
522
path = url[first_path_slash:] # /file/foo
523
524
if sys.platform == 'win32' and url.startswith('file:///'):
524
525
# Strip off the drive letter
542
543
# GZ 2011-11-18: Dodgy removing the terminal slash like this, function
543
544
# operates on urls not url+segments, and Transport classes
544
# should not be blindly adding slashes in the first place.
545
# should not be blindly adding slashes in the first place.
545
546
lurl = strip_trailing_slash(url)
546
547
# Segments begin at first comma after last forward slash, if one exists
547
segment_start = lurl.find(",", lurl.rfind("/")+1)
548
segment_start = lurl.find(",", lurl.rfind("/") + 1)
548
549
if segment_start == -1:
550
return (lurl[:segment_start], [str(s) for s in lurl[segment_start+1:].split(",")])
551
return (lurl[:segment_start],
552
[str(s) for s in lurl[segment_start + 1:].split(",")])
553
555
def split_segment_parameters(url):
571
573
def join_segment_parameters_raw(base, *subsegments):
572
"""Create a new URL by adding subsegments to an existing one.
574
"""Create a new URL by adding subsegments to an existing one.
574
576
This adds the specified subsegments to the last path in the specified
575
577
base URL. The subsegments should be bytestrings.
583
585
raise TypeError("Subsegment %r is not a bytestring" % subsegment)
584
586
if "," in subsegment:
585
587
raise InvalidURLJoin(", exists in subsegments",
587
589
return ",".join((base,) + subsegments)
604
606
raise TypeError("parameter key %r is not a str" % key)
605
607
if not isinstance(value, str):
606
608
raise TypeError("parameter value %r for %r is not a str" %
609
611
raise InvalidURLJoin("= exists in parameter key", url,
611
613
new_parameters[key] = value
612
return join_segment_parameters_raw(base,
613
*["%s=%s" % item for item in sorted(new_parameters.items())])
614
return join_segment_parameters_raw(
615
base, *["%s=%s" % item for item in sorted(new_parameters.items())])
616
618
def _win32_strip_local_trailing_slash(url):
681
683
url.encode("ascii")
682
684
except UnicodeError as e:
683
raise InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
686
url, 'URL was not a plain ASCII url: %s' % (e,))
684
687
return urlparse.unquote(url)
686
689
if isinstance(url, text_type):
688
691
url = url.encode("ascii")
689
692
except UnicodeError as e:
690
raise InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
694
url, 'URL was not a plain ASCII url: %s' % (e,))
691
695
unquoted = unquote(url)
693
697
unicode_path = unquoted.decode('utf-8')
694
698
except UnicodeError as e:
695
raise InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
700
url, 'Unable to encode the URL as utf-8: %s' % (e,))
696
701
return unicode_path
700
705
_no_decode_chars = ';/?:@&=+$,#'
701
706
_no_decode_ords = [ord(c) for c in _no_decode_chars]
702
707
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
703
+ ['%02X' % o for o in _no_decode_ords])
708
+ ['%02X' % o for o in _no_decode_ords])
704
709
_hex_display_map = dict(([('%02x' % o, int2byte(o)) for o in range(256)]
705
+ [('%02X' % o, int2byte(o)) for o in range(256)]))
706
#These entries get mapped to themselves
707
_hex_display_map.update((hex, b'%'+hex.encode('ascii')) for hex in _no_decode_hex)
710
+ [('%02X' % o, int2byte(o)) for o in range(256)]))
711
# These entries get mapped to themselves
712
_hex_display_map.update((hex, b'%' + hex.encode('ascii'))
713
for hex in _no_decode_hex)
709
715
# These characters shouldn't be percent-encoded, and it's always safe to
710
716
# unencode them if they are.
711
717
_url_dont_escape_characters = set(
712
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
713
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
714
"0123456789" # Numbers
715
"-._~" # Unreserved characters
718
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
719
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
720
"0123456789" # Numbers
721
"-._~" # Unreserved characters
718
724
# These characters should not be escaped
719
725
_url_safe_characters = set(
720
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
721
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
722
"0123456789" # Numbers
723
"_.-!~*'()" # Unreserved characters
724
"/;?:@&=+$," # Reserved characters
725
"%#" # Extra reserved characters
726
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
727
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
728
"0123456789" # Numbers
729
"_.-!~*'()" # Unreserved characters
730
"/;?:@&=+$," # Reserved characters
731
"%#" # Extra reserved characters
745
751
escaped_chunks[j] = _hex_display_map[item[:2]]
747
753
# Put back the percent symbol
748
escaped_chunks[j] = b'%' + (item[:2].encode('utf-8') if PY3 else item[:2])
754
escaped_chunks[j] = b'%' + \
755
(item[:2].encode('utf-8') if PY3 else item[:2])
749
756
except UnicodeDecodeError:
750
757
escaped_chunks[j] = unichr(int(item[:2], 16)).encode('utf-8')
751
escaped_chunks[j] += (item[2:].encode('utf-8') if PY3 else item[2:])
758
escaped_chunks[j] += (item[2:].encode('utf-8') if PY3 else item[2:])
752
759
unescaped = b''.join(escaped_chunks)
754
761
decoded = unescaped.decode('utf-8')
851
858
to_segments = osutils.splitpath(to_path)
853
860
for count, (from_element, to_element) in enumerate(zip(from_segments,
855
862
if from_element != to_element:
868
875
"""Parsed URL."""
870
877
def __init__(self, scheme, quoted_user, quoted_password, quoted_host,
872
879
self.scheme = scheme
873
880
self.quoted_host = quoted_host
874
881
self.host = unquote(self.quoted_host)
884
891
self.password = None
886
self.quoted_path = _url_hex_escapes_re.sub(_unescape_safe_chars, quoted_path)
893
self.quoted_path = _url_hex_escapes_re.sub(
894
_unescape_safe_chars, quoted_path)
887
895
self.path = unquote(self.quoted_path)
889
897
def __eq__(self, other):
907
915
:param url: URL as bytestring
909
917
# GZ 2017-06-09: Actually validate ascii-ness
910
# pad.lv/1696545: For the moment, accept both native strings and unicode.
918
# pad.lv/1696545: For the moment, accept both native strings and
911
920
if isinstance(url, str):
913
922
elif isinstance(url, text_type):
941
if host != "" and host[0] == '[' and host[-1] == ']': #IPv6
950
if host != "" and host[0] == '[' and host[-1] == ']': # IPv6
942
951
host = host[1:-1]
944
953
return cls(scheme, user, password, host, port, path)
977
986
:param relpath: relative url string for relative part of remote path.
978
987
:return: urlencoded string for final path.
980
# pad.lv/1696545: For the moment, accept both native strings and unicode.
989
# pad.lv/1696545: For the moment, accept both native strings and
981
991
if isinstance(relpath, str):
983
993
elif isinstance(relpath, text_type):
1026
1036
path = self.quoted_path
1027
1037
return self.__class__(self.scheme, self.quoted_user,
1028
self.quoted_password, self.quoted_host, self.port,
1038
self.quoted_password, self.quoted_host, self.port,
1032
1042
def parse_url(url):
1042
1052
parsed_url = URL.from_string(url)
1043
1053
return (parsed_url.scheme, parsed_url.user, parsed_url.password,
1044
parsed_url.host, parsed_url.port, parsed_url.path)
1054
parsed_url.host, parsed_url.port, parsed_url.path)