67
67
def __init__(self, from_, to):
70
errors.PathError.__init__(self, from_, 'URLs differ by more than path.')
70
errors.PathError.__init__(
71
self, from_, 'URLs differ by more than path.')
73
74
def basename(url, exclude_trailing_slash=True):
111
112
'0123456789' '_.-')
113
114
for i, c in zip(range(256), ''.join(map(chr, range(256)))):
114
_safe_map[c] = c if (i < 128 and c in always_safe) else '%{0:02X}'.format(i)
115
_safe_map[c] = c if (
116
i < 128 and c in always_safe) else '%{0:02X}'.format(i)
115
117
_safe_quoters = {}
117
119
def quote_from_bytes(s, safe='/'):
175
177
if len(base) < MIN_ABS_FILEURL_LENGTH:
176
178
raise ValueError('Length of base (%r) must equal or'
177
' exceed the platform minimum url length (which is %d)' %
178
(base, MIN_ABS_FILEURL_LENGTH))
179
' exceed the platform minimum url length (which is %d)' %
180
(base, MIN_ABS_FILEURL_LENGTH))
179
181
base = osutils.normpath(local_path_from_url(base))
180
182
path = osutils.normpath(local_path_from_url(path))
181
183
return escape(osutils.relpath(base, path))
199
201
first_path_slash = path.find('/')
200
202
if first_path_slash == -1:
201
203
return len(scheme), None
202
return len(scheme), first_path_slash+m.start('path')
204
return len(scheme), first_path_slash + m.start('path')
306
308
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
307
309
if not url.startswith('file://'):
308
310
raise InvalidURL(url, 'local urls must start with file:///, '
309
'UNC path urls must start with file://')
311
'UNC path urls must start with file://')
310
312
url = split_segment_parameters_raw(url)[0]
311
313
# We strip off all 3 slashes
312
314
win32_url = url[len('file:'):]
313
315
# check for UNC path: //HOST/path
314
316
if not win32_url.startswith('///'):
315
317
if (win32_url[2] == '/'
316
or win32_url[3] in '|:'):
318
or win32_url[3] in '|:'):
317
319
raise InvalidURL(url, 'Win32 UNC path urls'
318
' have form file://HOST/path')
320
' have form file://HOST/path')
319
321
return unescape(win32_url)
321
323
# allow empty paths so we can serve all roots
325
327
# usual local path with drive letter
326
328
if (len(win32_url) < 6
327
329
or win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
328
'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
329
or win32_url[4] not in '|:'
330
or win32_url[5] != '/'):
330
'ABCDEFGHIJKLMNOPQRSTUVWXYZ') or
331
win32_url[4] not in '|:'
332
or win32_url[5] != '/'):
331
333
raise InvalidURL(url, 'Win32 file urls start with'
332
' file:///x:/, where x is a valid drive letter')
334
' file:///x:/, where x is a valid drive letter')
333
335
return win32_url[3].upper() + u':' + unescape(win32_url[5:])
413
415
if c not in _url_safe_characters:
414
416
raise InvalidURL(url, 'URLs can only contain specific'
415
' safe characters (not %r)' % c)
417
' safe characters (not %r)' % c)
416
418
path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
417
419
return str(prefix + ''.join(path))
449
451
if base_scheme != other_scheme:
451
453
elif sys.platform == 'win32' and base_scheme == 'file://':
452
base_drive = base[base_first_slash+1:base_first_slash+3]
453
other_drive = other[other_first_slash+1:other_first_slash+3]
454
base_drive = base[base_first_slash + 1:base_first_slash + 3]
455
other_drive = other[other_first_slash + 1:other_first_slash + 3]
454
456
if base_drive != other_drive:
457
base_path = base[base_first_slash+1:]
458
other_path = other[other_first_slash+1:]
459
base_path = base[base_first_slash + 1:]
460
other_path = other[other_first_slash + 1:]
460
462
if base_path.endswith('/'):
461
463
base_path = base_path[:-1]
487
489
# path is currently /C:/foo
488
490
if len(path) < 4 or path[2] not in ':|' or path[3] != '/':
489
491
raise InvalidURL(url_base + path,
490
'win32 file:/// paths need a drive letter')
491
url_base += path[0:3] # file:// + /C:
492
path = path[3:] # /foo
492
'win32 file:/// paths need a drive letter')
493
url_base += path[0:3] # file:// + /C:
494
path = path[3:] # /foo
493
495
return url_base, path
519
521
# We have a fully defined path
520
url_base = url[:first_path_slash] # http://host, file://
521
path = url[first_path_slash:] # /file/foo
522
url_base = url[:first_path_slash] # http://host, file://
523
path = url[first_path_slash:] # /file/foo
523
525
if sys.platform == 'win32' and url.startswith('file:///'):
524
526
# Strip off the drive letter
542
544
# GZ 2011-11-18: Dodgy removing the terminal slash like this, function
543
545
# operates on urls not url+segments, and Transport classes
544
# should not be blindly adding slashes in the first place.
546
# should not be blindly adding slashes in the first place.
545
547
lurl = strip_trailing_slash(url)
546
548
# Segments begin at first comma after last forward slash, if one exists
547
segment_start = lurl.find(",", lurl.rfind("/")+1)
549
segment_start = lurl.find(",", lurl.rfind("/") + 1)
548
550
if segment_start == -1:
550
return (lurl[:segment_start], [str(s) for s in lurl[segment_start+1:].split(",")])
552
return (lurl[:segment_start], [str(s) for s in lurl[segment_start + 1:].split(",")])
553
555
def split_segment_parameters(url):
583
585
raise TypeError("Subsegment %r is not a bytestring" % subsegment)
584
586
if "," in subsegment:
585
587
raise InvalidURLJoin(", exists in subsegments",
587
589
return ",".join((base,) + subsegments)
604
606
raise TypeError("parameter key %r is not a str" % key)
605
607
if not isinstance(value, str):
606
608
raise TypeError("parameter value %r for %r is not a str" %
609
611
raise InvalidURLJoin("= exists in parameter key", url,
611
613
new_parameters[key] = value
612
614
return join_segment_parameters_raw(base,
613
*["%s=%s" % item for item in sorted(new_parameters.items())])
615
*["%s=%s" % item for item in sorted(new_parameters.items())])
616
618
def _win32_strip_local_trailing_slash(url):
681
683
url.encode("ascii")
682
684
except UnicodeError as e:
683
raise InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
686
url, 'URL was not a plain ASCII url: %s' % (e,))
684
687
return urlparse.unquote(url)
686
689
if isinstance(url, text_type):
688
691
url = url.encode("ascii")
689
692
except UnicodeError as e:
690
raise InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
694
url, 'URL was not a plain ASCII url: %s' % (e,))
691
695
unquoted = unquote(url)
693
697
unicode_path = unquoted.decode('utf-8')
694
698
except UnicodeError as e:
695
raise InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
700
url, 'Unable to encode the URL as utf-8: %s' % (e,))
696
701
return unicode_path
700
705
_no_decode_chars = ';/?:@&=+$,#'
701
706
_no_decode_ords = [ord(c) for c in _no_decode_chars]
702
707
_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
703
+ ['%02X' % o for o in _no_decode_ords])
708
+ ['%02X' % o for o in _no_decode_ords])
704
709
_hex_display_map = dict(([('%02x' % o, int2byte(o)) for o in range(256)]
705
+ [('%02X' % o, int2byte(o)) for o in range(256)]))
706
#These entries get mapped to themselves
707
_hex_display_map.update((hex, b'%'+hex.encode('ascii')) for hex in _no_decode_hex)
710
+ [('%02X' % o, int2byte(o)) for o in range(256)]))
711
# These entries get mapped to themselves
712
_hex_display_map.update((hex, b'%' + hex.encode('ascii'))
713
for hex in _no_decode_hex)
709
715
# These characters shouldn't be percent-encoded, and it's always safe to
710
716
# unencode them if they are.
711
717
_url_dont_escape_characters = set(
712
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
713
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
714
"0123456789" # Numbers
715
"-._~" # Unreserved characters
718
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
719
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
720
"0123456789" # Numbers
721
"-._~" # Unreserved characters
718
724
# These characters should not be escaped
719
725
_url_safe_characters = set(
720
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
721
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
722
"0123456789" # Numbers
723
"_.-!~*'()" # Unreserved characters
724
"/;?:@&=+$," # Reserved characters
725
"%#" # Extra reserved characters
726
"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
727
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
728
"0123456789" # Numbers
729
"_.-!~*'()" # Unreserved characters
730
"/;?:@&=+$," # Reserved characters
731
"%#" # Extra reserved characters
745
751
escaped_chunks[j] = _hex_display_map[item[:2]]
747
753
# Put back the percent symbol
748
escaped_chunks[j] = b'%' + (item[:2].encode('utf-8') if PY3 else item[:2])
754
escaped_chunks[j] = b'%' + \
755
(item[:2].encode('utf-8') if PY3 else item[:2])
749
756
except UnicodeDecodeError:
750
757
escaped_chunks[j] = unichr(int(item[:2], 16)).encode('utf-8')
751
escaped_chunks[j] += (item[2:].encode('utf-8') if PY3 else item[2:])
758
escaped_chunks[j] += (item[2:].encode('utf-8') if PY3 else item[2:])
752
759
unescaped = b''.join(escaped_chunks)
754
761
decoded = unescaped.decode('utf-8')
851
858
to_segments = osutils.splitpath(to_path)
853
860
for count, (from_element, to_element) in enumerate(zip(from_segments,
855
862
if from_element != to_element:
868
875
"""Parsed URL."""
870
877
def __init__(self, scheme, quoted_user, quoted_password, quoted_host,
872
879
self.scheme = scheme
873
880
self.quoted_host = quoted_host
874
881
self.host = unquote(self.quoted_host)
884
891
self.password = None
886
self.quoted_path = _url_hex_escapes_re.sub(_unescape_safe_chars, quoted_path)
893
self.quoted_path = _url_hex_escapes_re.sub(
894
_unescape_safe_chars, quoted_path)
887
895
self.path = unquote(self.quoted_path)
889
897
def __eq__(self, other):
941
if host != "" and host[0] == '[' and host[-1] == ']': #IPv6
949
if host != "" and host[0] == '[' and host[-1] == ']': # IPv6
942
950
host = host[1:-1]
944
952
return cls(scheme, user, password, host, port, path)
1026
1034
path = self.quoted_path
1027
1035
return self.__class__(self.scheme, self.quoted_user,
1028
self.quoted_password, self.quoted_host, self.port,
1036
self.quoted_password, self.quoted_host, self.port,
1032
1040
def parse_url(url):
1042
1050
parsed_url = URL.from_string(url)
1043
1051
return (parsed_url.scheme, parsed_url.user, parsed_url.password,
1044
parsed_url.host, parsed_url.port, parsed_url.path)
1052
parsed_url.host, parsed_url.port, parsed_url.path)